# ============================================================================== # 10_CREATE_PER_FIELD_TIFFS_UTILS.R # ============================================================================== # UTILITY FUNCTIONS FOR SCRIPT 10: Per-Field TIFF Creation # # PURPOSE: # Extracted helper functions specific to Script 10 (per-field TIFF splitting). # These functions are domain-specific (not generic) and handle raster cropping # and batch processing of satellite imagery for per-field storage. # # FUNCTIONS: # 1. crop_tiff_to_fields() — Crop a single TIFF to field boundaries # 2. process_new_merged_tif() — Batch process all new merged TIFFs into per-field structure # # DEPENDENCIES: # - terra (raster operations) # - sf (geometry operations) # - 00_common_utils.R (logging via safe_log()) # # USAGE: # source(here::here("r_app", "10_create_per_field_tiffs_utils.R")) # # ============================================================================== library(terra) library(sf) # ============================================================================== #' Crop Single TIFF to Field Boundaries #' #' Loads a single satellite TIFF, finds overlapping field boundaries, and writes #' cropped per-field versions to output directory. Handles CRS reprojection and #' idempotent file creation (skips existing files). #' #' @param tif_path Character. Absolute path to source TIFF file (4-band or 5-band). #' @param tif_date Character. Date string (YYYY-MM-DD) for naming output files. #' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column. #' @param output_base_dir Character. Base directory where per-field TIFFs are stored #' (e.g., "field_tiles" or "field_tiles_CI"). #' #' @return List with elements: #' - created: Integer. Number of field TIFFs created #' - skipped: Integer. Number of files that already existed #' - errors: Integer. Number of fields that failed to crop #' #' @details #' **Output Structure:** #' Creates files at: {output_base_dir}/{field_name}/{tif_date}.tif #' #' **CRS Handling:** #' Automatically reprojects field boundaries to match raster CRS before cropping. #' #' **Idempotency:** #' If output file already exists, skips writing (counted as "skipped"). #' Safe to re-run without duplicate file creation. #' #' @examples #' \dontrun{ #' fields <- load_field_boundaries("path/to/pivot.geojson") #' result <- crop_tiff_to_fields( #' tif_path = "merged_tif/2024-01-15.tif", #' tif_date = "2024-01-15", #' fields = fields, #' output_base_dir = "field_tiles" #' ) #' cat(sprintf("Created: %d, Skipped: %d, Errors: %d\n", #' result$created, result$skipped, result$errors)) #' } #' crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) { created <- 0 skipped <- 0 errors <- 0 # Load raster if (!file.exists(tif_path)) { safe_log(paste("ERROR: TIFF not found:", tif_path), "ERROR") return(list(created = 0, skipped = 0, errors = 1)) } rast <- tryCatch({ rast(tif_path) }, error = function(e) { safe_log(paste("ERROR loading raster:", e$message), "ERROR") return(NULL) }) if (is.null(rast)) { return(list(created = 0, skipped = 0, errors = 1)) } # Create raster bounding box in raster CRS rast_bbox <- st_as_sfc(st_bbox(rast)) st_crs(rast_bbox) <- st_crs(rast) # Reproject fields to match raster CRS fields_reprojected <- st_transform(fields, st_crs(rast_bbox)) # Find which fields intersect this raster (CRITICAL: raster bbox first, then fields) overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE) overlapping_indices <- unique(unlist(overlapping_indices)) if (length(overlapping_indices) == 0) { safe_log(paste("No fields intersect TIFF:", basename(tif_path)), "INFO") return(list(created = 0, skipped = 0, errors = 0)) } # Process each overlapping field for (field_idx in overlapping_indices) { field_name <- fields$field[field_idx] field_geom <- fields_reprojected[field_idx, ] # Create field directory field_dir <- file.path(output_base_dir, field_name) if (!dir.exists(field_dir)) { dir.create(field_dir, recursive = TRUE, showWarnings = FALSE) } # Output file path output_path <- file.path(field_dir, paste0(tif_date, ".tif")) # Check if file already exists (idempotent) if (file.exists(output_path)) { skipped <- skipped + 1 next } # Crop raster to field boundary tryCatch({ field_rast <- crop(rast, field_geom) # Skip empty tiles: cloud-masked pixels are stored as 0 in uint16 # (NaN cannot be represented in that format). A band sum of 0 means # no satellite data was captured for this field on this date. band_sums <- terra::global(field_rast, fun = "sum", na.rm = TRUE) if (sum(band_sums$sum, na.rm = TRUE) == 0) { safe_log(paste("SKIP (no data):", field_name, tif_date), "WARNING") skipped <- skipped + 1 } else { writeRaster(field_rast, output_path, overwrite = TRUE) created <- created + 1 } }, error = function(e) { safe_log(paste("ERROR cropping field", field_name, ":", e$message), "ERROR") errors <<- errors + 1 }) } return(list(created = created, skipped = skipped, errors = errors)) } # ============================================================================== #' Process New Merged TIFFs into Per-Field Structure #' #' Batch processes all new 4-band raw satellite TIFFs from merged_tif/ directory. #' Crops each TIFF to field boundaries and stores results in per-field subdirectories. #' Supports migration mode: skips dates already processed and migrated to field_tiles_CI/. #' #' @param merged_tif_dir Character. Source directory containing raw merged TIFFs #' (e.g., "merged_tif/"). #' @param field_tiles_dir Character. Target directory for per-field TIFFs #' (e.g., "field_tiles/"). #' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column. #' @param field_tiles_ci_dir Character. Optional. DEPRECATED - no longer used. #' Kept for backward compatibility but ignored. #' Script 10 now only checks its own output #' directory (field_tiles/). #' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD). #' Default: NULL (process all available dates). #' @param offset Integer. Optional. Number of days to look back from end_date. #' Only used if end_date is also provided. #' Default: NULL (process all available dates). #' #' @return List with elements: #' - total_created: Integer. Total field TIFFs created across all dates #' - total_skipped: Integer. Total files that already existed #' - total_errors: Integer. Total cropping errors across all dates #' #' @details #' **Migration Logic:** #' When migration phase is active (field_tiles_CI/ contains CI-calculated TIFFs), #' this function detects which dates have been migrated and skips reprocessing them. #' This prevents redundant work during the per-field architecture transition. #' #' **Date Detection:** #' Finds all files in merged_tif_dir matching pattern: YYYY-MM-DD.tif #' #' **Output Structure:** #' Creates per-field subdirectories and stores: field_tiles/{FIELD}/{DATE}.tif #' #' @examples #' \dontrun{ #' fields <- load_field_boundaries("path/to/pivot.geojson") #' result <- process_new_merged_tif( #' merged_tif_dir = "merged_tif", #' field_tiles_dir = "field_tiles", #' fields = fields, #' field_tiles_ci_dir = "field_tiles_CI" #' ) #' cat(sprintf("Processing complete: created=%d, skipped=%d, errors=%d\n", #' result$total_created, result$total_skipped, result$total_errors)) #' } #' process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL, end_date = NULL, offset = NULL) { safe_log("\n========================================", "INFO") safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO") safe_log("========================================", "INFO") # Check if download directory exists if (!dir.exists(merged_tif_dir)) { safe_log("No merged_tif/ directory found - no new data to process", "WARNING") return(list(total_created = 0, total_skipped = 0, total_errors = 0)) } # Create output directory if (!dir.exists(field_tiles_dir)) { dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE) } # Find all date-pattern TIFFs in root of merged_tif tiff_files <- list.files( merged_tif_dir, pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$", full.names = TRUE ) # FILTER by date window if end_date and offset provided if (!is.null(end_date) && !is.null(offset)) { start_date <- end_date - offset date_range <- seq(start_date, end_date, by = "day") date_range_str <- format(date_range, "%Y-%m-%d") # Extract dates from filenames and filter tiff_dates <- gsub("\\.tif$", "", basename(tiff_files)) tiff_files <- tiff_files[tiff_dates %in% date_range_str] safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO") } safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO") if (length(tiff_files) == 0) { safe_log("No new TIFFs found - nothing to process", "WARNING") return(list(total_created = 0, total_skipped = 0, total_errors = 0)) } # Process each new TIFF total_created <- 0 total_skipped <- 0 total_errors <- 0 for (tif_path in tiff_files) { tif_date <- gsub("\\.tif$", "", basename(tif_path)) # SKIP CHECK: Only check Script 10's own output directory (field_tiles/) # Do NOT check field_tiles_CI/ — that's Script 20's responsibility. # Script 10 and Script 20 are independent; Script 10 should not depend on # Script 20's success/failure. If field_tiles/ is missing, reprocess it. if (dir.exists(field_tiles_dir)) { date_exists_in_field_tiles <- FALSE # Check if ANY field directory has this date field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE) for (field_dir in field_dirs) { potential_file <- file.path(field_dir, paste0(tif_date, ".tif")) if (file.exists(potential_file)) { date_exists_in_field_tiles <- TRUE break } } if (date_exists_in_field_tiles) { safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO") total_skipped <- total_skipped + 1 next } } safe_log(paste("Processing:", tif_date), "INFO") result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir) total_created <- total_created + result$created total_skipped <- total_skipped + result$skipped total_errors <- total_errors + result$errors } safe_log(paste("Processing complete: created =", total_created, ", skipped =", total_skipped, ", errors =", total_errors), "INFO") return(list(total_created = total_created, total_skipped = total_skipped, total_errors = total_errors)) }