# CI_EXTRACTION_PER_FIELD.R # ========================= # Script 20 (Refactored for Per-Field Architecture) # # This script reads per-field TIFFs from Script 10 output and: # 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR) # 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif # 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds # # Key differences from legacy Script 20: # - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10) # - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI) # - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics) # - Directly extracts CI statistics per sub_field within each field # # Usage: # Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset] # Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7 suppressPackageStartupMessages({ library(sf) library(terra) library(tidyverse) library(lubridate) library(here) }) # ============================================================================= # Main Processing # ============================================================================= main <- function() { # STEP 1: Set working directory to project root (smartcane/) # This ensures all relative paths resolve correctly if (basename(getwd()) == "r_app") { setwd("..") } # STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations) # Parse command-line arguments FIRST args <- commandArgs(trailingOnly = TRUE) project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata" end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date() offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7 # Make project_dir available globally for parameters_project.R assign("project_dir", project_dir, envir = .GlobalEnv) # Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.) tryCatch({ source("r_app/parameters_project.R") }, error = function(e) { cat(sprintf("Error loading parameters_project.R: %s\n", e$message)) stop(e) }) # Load CI extraction utilities tryCatch({ source("r_app/20_ci_extraction_utils.R") }, error = function(e) { cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message)) stop(e) }) # STEP 3: Now all utilities are loaded, proceed with script logic safe_log(sprintf("=== Script 20: CI Extraction Per-Field ===")) safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days", project_dir, format(end_date, "%Y-%m-%d"), offset)) # Set up directory paths from parameters setup <- setup_project_directories(project_dir) # Load field boundaries directly from field_boundaries_path in setup tryCatch({ field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE) safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path)) }, error = function(e) { safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR") stop(e) }) # Get list of dates to process # If in migration mode, dates_to_process is provided by the pipeline runner if (exists("dates_to_process") && !is.null(dates_to_process)) { # Migration mode: Use provided list of dates (process ALL available dates) dates_filter <- sort(dates_to_process) safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter))) } else { # Normal mode: Use 7-day offset window dates <- date_list(end_date, offset) dates_filter <- dates$days_filter safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)", dates$start_date, dates$end_date, length(dates_filter))) } safe_log(sprintf("Input directory: %s", setup$field_tiles_dir)) safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir)) safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir)) # Process each field if (!dir.exists(setup$field_tiles_dir)) { safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR") stop("Script 10 output not found. Run Script 10 first.") } fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE) fields <- fields[fields != ""] # Remove empty strings if (length(fields) == 0) { safe_log("No fields found in field_tiles directory", "WARNING") return() } safe_log(sprintf("Found %d fields to process", length(fields))) # DEBUG: Check what paths are available in setup safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", "))) safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir)) safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir)) # Use daily_ci_vals_dir for per-field daily CI output # Pre-create output subdirectories for all fields for (field in fields) { dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE) if (!is.null(setup$daily_ci_vals_dir)) { dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE) } } # Process each DATE (load merged TIFF once, extract all fields from it) total_processed_dates <- 0 total_skipped_dates <- 0 total_already_complete_dates <- 0 total_error <- 0 for (date_str in dates_filter) { input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str)) output_tifs <- file.path(setup$field_tiles_ci_dir, fields, sprintf("%s.tif", date_str)) output_rds <- file.path(setup$daily_ci_vals_dir, fields, sprintf("%s.rds", date_str)) names(output_tifs) <- fields names(output_rds) <- fields tif_exists <- file.exists(output_tifs) rds_exists <- file.exists(output_rds) fields_need_rds_only <- fields[tif_exists & !rds_exists] fields_need_raster <- fields[!tif_exists] if (length(fields_need_rds_only) == 0 && length(fields_need_raster) == 0) { total_already_complete_dates <- total_already_complete_dates + 1 safe_log(sprintf(" %s: All field outputs already exist (skipping)", date_str)) next } fields_processed_this_date <- 0 rds_only_processed <- 0 raster_processed_this_date <- 0 if (length(fields_need_rds_only) > 0) { for (field in fields_need_rds_only) { tryCatch({ extract_rds_from_ci_tiff(output_tifs[[field]], output_rds[[field]], field_boundaries_sf, field) fields_processed_this_date <- fields_processed_this_date + 1 rds_only_processed <- rds_only_processed + 1 }, error = function(e) { safe_log(sprintf(" Error regenerating RDS for field %s: %s", field, e$message), "WARNING") }) } } if (length(fields_need_raster) == 0) { total_processed_dates <- total_processed_dates + 1 safe_log(sprintf(" %s: Regenerated %d RDS files from existing CI TIFFs", date_str, rds_only_processed)) next } if (!file.exists(input_tif_merged)) { safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str)) total_skipped_dates <- total_skipped_dates + 1 next } tryCatch({ # Load the merged TIFF only when at least one field still needs a CI TIFF. raster_4band <- terra::rast(input_tif_merged) safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields_need_raster))) # Calculate CI from 4-band ci_raster <- calc_ci_from_raster(raster_4band) # Create 5-band (R, G, B, NIR, CI) # Explicitly set band names after combining to ensure proper naming five_band <- c(raster_4band, ci_raster) names(five_band) <- c("Red", "Green", "Blue", "NIR", "CI") # Now process only the fields that still need CI TIFF output for this date. for (field in fields_need_raster) { output_tif_path <- output_tifs[[field]] output_rds_path <- output_rds[[field]] tryCatch({ # Crop 5-band TIFF to field boundary field_geom <- field_boundaries_sf %>% filter(field == !!field) five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE) # Skip empty tiles: cloud-masked pixels are stored as 0 in uint16 # (NaN cannot be represented in that format). Sum of RGBNIR bands == 0 # means no valid satellite data was captured for this field on this date. rgbnir_sum <- sum( terra::global(five_band_cropped[[1:4]], fun = "sum", na.rm = TRUE)$sum, na.rm = TRUE ) if (rgbnir_sum == 0) { safe_log(sprintf(" SKIP (no data): field %s on %s", field, date_str), "WARNING") } else { # Save 5-band field TIFF terra::writeRaster(five_band_cropped, output_tif_path, overwrite = TRUE) # Extract CI statistics by sub_field (from cropped CI raster) ci_cropped <- five_band_cropped[[5]] # 5th band is CI ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field) # Save RDS if (!is.null(ci_stats) && nrow(ci_stats) > 0) { saveRDS(ci_stats, output_rds_path) } fields_processed_this_date <- fields_processed_this_date + 1 raster_processed_this_date <- raster_processed_this_date + 1 } }, error = function(e) { # Error in individual field, continue to next safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING") }) } if (fields_processed_this_date > 0) { total_processed_dates <- total_processed_dates + 1 safe_log(sprintf( " %s: Processed %d fields (%d CI TIFFs created, %d RDS-only regenerated)", date_str, fields_processed_this_date, raster_processed_this_date, rds_only_processed )) } else { total_error <- total_error + 1 safe_log(sprintf(" %s: No field outputs were created; see warnings above", date_str), "ERROR") } }, error = function(e) { total_error <- total_error + 1 safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR") }) } # Summary safe_log(sprintf("\n=== Processing Complete ===")) safe_log(sprintf("Dates processed: %d", total_processed_dates)) safe_log(sprintf("Dates skipped (missing merged_tif): %d", total_skipped_dates)) safe_log(sprintf("Dates already complete: %d", total_already_complete_dates)) safe_log(sprintf("Errors encountered: %d", total_error)) if (total_processed_dates > 0) { safe_log("Output files created in:") safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir)) safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir)) } } # Execute main if called from command line if (sys.nframe() == 0) { main() }