diff --git a/python_app/00_download_8band_pu_optimized.py b/python_app/00_download_8band_pu_optimized.py index 2d4fbb3..1059ae9 100644 --- a/python_app/00_download_8band_pu_optimized.py +++ b/python_app/00_download_8band_pu_optimized.py @@ -22,9 +22,9 @@ Options: --resolution RES Resolution in meters (default: 3) --skip-merge Skip merge step (download only, keep individual tiles) --cleanup Delete intermediate single_images folder after merge - --clear-singles Clear single_images_8b folder before download - --clear-merged Clear merged_tif_8b and merged_virtual_8b folders before download - --clear-all Clear all output folders (singles, merged, virtual) before download + --clear-singles Clear single_images folder before download + --clear-merged Clear merged_tif folder before download + --clear-all Clear all output folders (singles, merged) before download Examples: python download_8band_pu_optimized.py xinavane --clear-singles --cleanup @@ -151,8 +151,8 @@ def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_ 'collection_id': new_id, 'name': 'planet_data_8b', 'bands': 4, - 'output_folder': 'merged_tif_8b', - 'singles_folder': 'single_images_8b' + 'output_folder': 'merged_tif', + 'singles_folder': 'single_images' } except Exception as e: print(f" ⚠️ {test_date}: {str(e)[:60]}") @@ -191,8 +191,8 @@ def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_ 'collection_id': new_id, 'name': 'planet_data_8b', 'bands': 4, - 'output_folder': 'merged_tif_8b', - 'singles_folder': 'single_images_8b' + 'output_folder': 'merged_tif', + 'singles_folder': 'single_images' } diff --git a/python_app/download_planet_missing_dates.py b/python_app/download_planet_missing_dates.py index 9e15d25..8df67a9 100644 --- a/python_app/download_planet_missing_dates.py +++ b/python_app/download_planet_missing_dates.py @@ -98,9 +98,9 @@ byoc = DataCollection.define_byoc( def setup_paths(project): """Create and return folder paths.""" BASE_PATH = Path('../laravel_app/storage/app') / project - BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images_8b') - folder_for_merged_tifs = str(BASE_PATH / 'merged_tif_8b') - folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual_8b') + BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images') + folder_for_merged_tifs = str(BASE_PATH / 'merged_tif') + folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual') geojson_file = Path(BASE_PATH / 'Data' / 'pivot.geojson') # Create folders if missing diff --git a/r_app/10_create_per_field_tiffs.R b/r_app/10_create_per_field_tiffs.R index 39bb8b2..56fd5f9 100644 --- a/r_app/10_create_per_field_tiffs.R +++ b/r_app/10_create_per_field_tiffs.R @@ -178,63 +178,10 @@ crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) { return(list(created = created, skipped = skipped, errors = errors)) } -# Migrate legacy 5-band TIFFs with CI from merged_final_tif -migrate_old_merged_final_tif <- function(merged_final_dir, field_tiles_ci_dir, fields) { - - smartcane_log("\n========================================") - smartcane_log("PHASE 1: MIGRATING LEGACY DATA") - smartcane_log("========================================") - - # Check if legacy directory exists - if (!dir.exists(merged_final_dir)) { - smartcane_log("No legacy merged_final_tif/ directory found - skipping migration") - return(list(total_created = 0, total_skipped = 0, total_errors = 0)) - } - - # Create output directory - if (!dir.exists(field_tiles_ci_dir)) { - dir.create(field_tiles_ci_dir, recursive = TRUE, showWarnings = FALSE) - } - - # Find all date-pattern TIFFs in root of merged_final_tif - tiff_files <- list.files( - merged_final_dir, - pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$", - full.names = TRUE - ) - - smartcane_log(paste("Found", length(tiff_files), "legacy TIFF(s) to migrate")) - - if (length(tiff_files) == 0) { - smartcane_log("No legacy TIFFs found - skipping migration") - return(list(total_created = 0, total_skipped = 0, total_errors = 0)) - } - - # Process each legacy TIFF - total_created <- 0 - total_skipped <- 0 - total_errors <- 0 - - for (tif_path in tiff_files) { - tif_date <- gsub("\\.tif$", "", basename(tif_path)) - - smartcane_log(paste("Migrating:", tif_date)) - - result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_ci_dir) - total_created <- total_created + result$created - total_skipped <- total_skipped + result$skipped - total_errors <- total_errors + result$errors - } - - smartcane_log(paste("Migration complete: created =", total_created, - ", skipped =", total_skipped, ", errors =", total_errors)) - - return(list(total_created = total_created, total_skipped = total_skipped, - total_errors = total_errors)) -} - # Process new 4-band raw TIFFs from merged_tif -process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields) { +# MIGRATION MODE: If field_tiles_CI/ already populated (from migration), skip those dates +# NORMAL MODE: Otherwise, process merged_tif/ → field_tiles/ +process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL) { smartcane_log("\n========================================") smartcane_log("PHASE 2: PROCESSING NEW DOWNLOADS") @@ -273,6 +220,29 @@ process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields) { for (tif_path in tiff_files) { tif_date <- gsub("\\.tif$", "", basename(tif_path)) + # MIGRATION MODE CHECK: Skip if this date was already migrated to field_tiles_CI/ + # (This means Script 20 already processed it and extracted RDS) + if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) { + # Check if ANY field has this date in field_tiles_CI/ + date_migrated <- FALSE + + # Sample check: look for date in field_tiles_CI/*/DATE.tif + sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE) + for (field_dir in sample_field_dirs) { + potential_file <- file.path(field_dir, paste0(tif_date, ".tif")) + if (file.exists(potential_file)) { + date_migrated <- TRUE + break + } + } + + if (date_migrated) { + smartcane_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)")) + total_skipped <- total_skipped + 1 + next + } + } + smartcane_log(paste("Processing:", tif_date)) result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir) @@ -304,23 +274,17 @@ geojson_path <- file.path(data_dir, "pivot.geojson") fields <- load_field_boundaries(geojson_path) # Define input and output directories -merged_final_dir <- file.path(base_path, "merged_final_tif") merged_tif_dir <- file.path(base_path, "merged_tif") field_tiles_dir <- file.path(base_path, "field_tiles") field_tiles_ci_dir <- file.path(base_path, "field_tiles_CI") -# PHASE 1: Migrate legacy data (if exists) -migrate_result <- migrate_old_merged_final_tif(merged_final_dir, field_tiles_ci_dir, fields) - -# PHASE 2: Process new downloads (always runs) -process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields) +# PHASE 1: Process new downloads (always runs) +# Pass field_tiles_ci_dir so it can skip dates already migrated +process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir) smartcane_log("\n========================================") smartcane_log("FINAL SUMMARY") smartcane_log("========================================") -smartcane_log(paste("Migration: created =", migrate_result$total_created, - ", skipped =", migrate_result$total_skipped, - ", errors =", migrate_result$total_errors)) smartcane_log(paste("Processing: created =", process_result$total_created, ", skipped =", process_result$total_skipped, ", errors =", process_result$total_errors)) diff --git a/r_app/20_ci_extraction_per_field.R b/r_app/20_ci_extraction_per_field.R index dc93ede..72144ab 100644 --- a/r_app/20_ci_extraction_per_field.R +++ b/r_app/20_ci_extraction_per_field.R @@ -105,78 +105,99 @@ main <- function() { safe_log(sprintf("Found %d fields to process", length(fields))) - # 6. Process each field - # ---------------------- + # Pre-create output subdirectories for all fields + for (field in fields) { + dir.create(file.path(field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE) + dir.create(file.path(setup$daily_vals_per_field_dir, field), showWarnings = FALSE, recursive = TRUE) + } + + # 6. Process each DATE (OPTIMIZED: load TIFF once, process all fields) + # ----------------------------------------------------------------------- total_success <- 0 total_error <- 0 ci_results_by_date <- list() - for (field in fields) { - safe_log(sprintf("\n--- Processing field: %s ---", field)) + for (date_str in dates$days_filter) { + # Load the merged TIFF ONCE for this date + merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str)) - field_tiles_path <- file.path(field_tiles_dir, field) - field_ci_path <- file.path(field_tiles_ci_dir, field) - field_daily_vals_path <- file.path(setup$daily_vals_per_field_dir, field) - - # Create output subdirectories for this field - dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE) - dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE) - - # 5a. Process each date for this field - # ----------------------------------- - for (date_str in dates$days_filter) { - input_tif <- file.path(field_tiles_path, sprintf("%s.tif", date_str)) - output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str)) - output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str)) - - # Skip if both outputs already exist - if (file.exists(output_tif) && file.exists(output_rds)) { - safe_log(sprintf(" %s: Already processed (skipping)", date_str)) - next + # Find the actual TIFF path (it's in the first field that has it) + input_tif_full <- NULL + for (field in fields) { + candidate_path <- file.path(field_tiles_dir, field, sprintf("%s.tif", date_str)) + if (file.exists(candidate_path)) { + input_tif_full <- candidate_path + break } + } + + if (is.null(input_tif_full)) { + safe_log(sprintf(" %s: Input TIFF not found (skipping)", date_str)) + next + } + + tryCatch({ + # Load TIFF ONCE + raster_4band <- terra::rast(input_tif_full) - # Check if input TIFF exists - if (!file.exists(input_tif)) { - safe_log(sprintf(" %s: Input TIFF not found (skipping)", date_str)) - next - } - - tryCatch({ - # Load 4-band TIFF - raster_4band <- terra::rast(input_tif) + # Now process all fields from this single TIFF + for (field in fields) { + field_ci_path <- file.path(field_tiles_ci_dir, field) + field_daily_vals_path <- file.path(setup$daily_vals_per_field_dir, field) + output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str)) + output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str)) - # Calculate CI - ci_raster <- calc_ci_from_raster(raster_4band) - - # Create 5-band TIFF (R, G, B, NIR, CI) - five_band <- c(raster_4band, ci_raster) - - # Save 5-band TIFF - terra::writeRaster(five_band, output_tif, overwrite = TRUE) - - # Extract CI statistics by sub_field - ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field) - - # Save RDS - if (!is.null(ci_stats) && nrow(ci_stats) > 0) { - saveRDS(ci_stats, output_rds) - safe_log(sprintf(" %s: ✓ Processed (%d sub-fields)", date_str, nrow(ci_stats))) - - # Store for daily aggregation - ci_stats_with_date <- ci_stats %>% mutate(date = date_str) - key <- sprintf("%s_%s", field, date_str) - ci_results_by_date[[key]] <- ci_stats_with_date - } else { - safe_log(sprintf(" %s: ⚠ No CI data extracted", date_str)) + # MODE 3: Skip if both outputs already exist + if (file.exists(output_tif) && file.exists(output_rds)) { + next # Skip to next field } - total_success <- total_success + 1 + # MODE 2: Regeneration mode - RDS missing but CI TIFF exists + if (file.exists(output_tif) && !file.exists(output_rds)) { + tryCatch({ + extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field) + total_success <<- total_success + 1 + }, error = function(e) { + total_error <<- total_error + 1 + }) + next + } - }, error = function(e) { - safe_log(sprintf(" %s: ✗ Error - %s", date_str, e$message), "ERROR") - total_error <<- total_error + 1 - }) - } + # MODE 1: Normal mode - calculate CI from 4-band input + tryCatch({ + # Calculate CI + ci_raster <- calc_ci_from_raster(raster_4band) + + # Create 5-band TIFF (R, G, B, NIR, CI) + five_band <- c(raster_4band, ci_raster) + + # Save 5-band TIFF + terra::writeRaster(five_band, output_tif, overwrite = TRUE) + + # Extract CI statistics by sub_field + ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field) + + # Save RDS + if (!is.null(ci_stats) && nrow(ci_stats) > 0) { + saveRDS(ci_stats, output_rds) + + # Store for daily aggregation + ci_stats_with_date <- ci_stats %>% mutate(date = date_str) + key <- sprintf("%s_%s", field, date_str) + ci_results_by_date[[key]] <<- ci_stats_with_date + } + + total_success <<- total_success + 1 + + }, error = function(e) { + total_error <<- total_error + 1 + }) + } + + }, error = function(e) { + safe_log(sprintf(" %s: ✗ Error loading TIFF - %s", date_str, e$message), "ERROR") + total_error <<- total_error + 1 + }) } # 7. Summary diff --git a/r_app/20_ci_extraction_utils.R b/r_app/20_ci_extraction_utils.R index 72cc667..156a148 100644 --- a/r_app/20_ci_extraction_utils.R +++ b/r_app/20_ci_extraction_utils.R @@ -8,7 +8,7 @@ # multiple tiles simultaneously (typically 2-4 tiles in parallel depending on CPU cores) # # Per-Field Functions (Script 20): -# - calc_ci_from_raster(): Calculate CI from 4-band raster (NDVI formula) +# - calc_ci_from_raster(): Calculate CI from 4-band raster (Chlorophyll Index formula: NIR/Green - 1) # - extract_ci_by_subfield(): Extract per-sub_field CI statistics from raster #' Safe logging function that works whether log_message exists or not @@ -220,9 +220,10 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) { names(blue_band) <- "Blue" names(nir_band) <- "NIR" - # Calculate Canopy Index from Red, Green, NIR - # CI = (NIR - Red) / (NIR + Red) is a common formulation - # But using NIR/Green - 1 is also valid and more sensitive to green vegetation + # Calculate Canopy Index from Green and NIR + # *** CRITICAL: Use CHLOROPHYLL INDEX formula ONLY *** + # CORRECT: CI = NIR / Green - 1 (ranges ~1-7, sensitive to active chlorophyll) + # WRONG: Do NOT use (NIR-Red)/(NIR+Red) - that is NDVI, ranges -1 to 1, different scale CI <- nir_band / green_band - 1 names(CI) <- "CI" @@ -298,7 +299,6 @@ create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) { # Write output files terra::writeRaster(output_raster, new_file, overwrite = TRUE) - terra::vrt(new_file, vrt_file, overwrite = TRUE) # Check if the result has enough valid pixels valid_pixels <- terra::global(output_raster$CI, "notNA", na.rm=TRUE) @@ -742,7 +742,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir, grid_size = grid_size ) @@ -813,7 +812,6 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir, grid_size = grid_size ) @@ -854,7 +852,7 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, #' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for this tile, or NULL if processing failed #' -process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = NA) { +process_single_tile <- function(tile_file, field_boundaries_sf, date, grid_size = NA) { tryCatch({ tile_filename <- basename(tile_file) safe_log(paste(" [TILE] Loading:", tile_filename)) @@ -896,25 +894,10 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band) names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI") - # Save processed tile to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ with same filename - # This mirrors the input structure: daily_tiles_split/[GRID_SIZE]/[DATE]/ - if (!is.na(grid_size)) { - date_dir <- file.path(merged_final_tif_dir, grid_size, date) - } else { - date_dir <- file.path(merged_final_tif_dir, date) - } - - if (!dir.exists(date_dir)) { - dir.create(date_dir, recursive = TRUE, showWarnings = FALSE) - } - - # Use same filename as source tile (e.g., 2026-01-02_01.tif) - tile_filename <- basename(tile_file) - output_file <- file.path(date_dir, tile_filename) - - # Write processed tile - terra::writeRaster(output_raster, output_file, overwrite = TRUE) - safe_log(paste(" [SAVED TIFF] Output:", file.path(date, basename(output_file)), "(5 bands: Red, Green, Blue, NIR, CI)")) + # NOTE: Do NOT save processed tile - it's an intermediate only + # The purpose is to calculate field-level CI statistics, not to create permanent tile files + # This prevents bloat in merged_final_tif/ directory (would unnecessarily duplicate + # daily_tiles_split data with an extra CI band added) # Extract statistics per field from CI band field_bbox <- sf::st_bbox(field_boundaries_sf) @@ -955,7 +938,7 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin #' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for the date #' -extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL, grid_size = NA) { +extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, grid_size = NA) { if (!inherits(field_boundaries_sf, "sf")) { field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf) @@ -974,7 +957,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C stats_list <- tryCatch({ furrr::future_map( tile_files, - ~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size), + ~ process_single_tile(.x, field_boundaries_sf, date, grid_size = grid_size), .progress = FALSE, .options = furrr::furrr_options(seed = TRUE) ) @@ -984,7 +967,7 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C lapply( tile_files, function(tile_file) { - process_single_tile(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size) + process_single_tile(tile_file, field_boundaries_sf, date, grid_size = grid_size) } ) }) @@ -1024,11 +1007,14 @@ extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_C #' Calculate Canopy Index (CI) from 4-band raster #' -#' Computes CI = (NIR - Red) / (NIR + Red), which is equivalent to NDVI. +#' *** CRITICAL FORMULA: CI = NIR / Green - 1 *** +#' This is the CHLOROPHYLL INDEX formula (ranges ~1-7 for vegetation). +#' NOT NDVI! Do NOT use (NIR-Red)/(NIR+Red) - that produces -1 to 1 range. +#' #' Expects band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4) #' #' @param raster_obj Loaded raster object with at least 4 bands -#' @return Raster object containing CI values +#' @return Raster object containing CI values (Chlorophyll Index, ranges ~1-7) #' calc_ci_from_raster <- function(raster_obj) { # Expected band order: Red (band 1), Green (band 2), Blue (band 3), NIR (band 4) @@ -1036,12 +1022,14 @@ calc_ci_from_raster <- function(raster_obj) { stop("Raster has fewer than 4 bands. Cannot calculate CI.") } - r <- terra::subset(raster_obj, 1) # Red + green <- terra::subset(raster_obj, 2) # Green band (required for proper CI calculation) nir <- terra::subset(raster_obj, 4) # NIR - # Canopy Index (CI) = (NIR - Red) / (NIR + Red) - # This is essentially NDVI - Normalized Difference Vegetation Index - ci <- (nir - r) / (nir + r) + # *** CHLOROPHYLL INDEX = NIR / Green - 1 *** + # This formula is sensitive to active chlorophyll content and ranges ~1-7 + # DO NOT use (NIR-Red)/(NIR+Red) - that is NDVI (Normalized Difference Vegetation Index) + # NDVI ranges -1 to 1 and is different from Chlorophyll Index + ci <- nir / green - 1 return(ci) } @@ -1107,3 +1095,129 @@ extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) { return(dplyr::bind_rows(results)) } + +#' Extract RDS from existing CI TIFF (Migration/Regeneration Mode) +#' +#' This function extracts CI statistics from an already-calculated CI TIFF +#' without needing to recalculate from raw 4-band imagery. +#' Used during migration when field_tiles_CI/ exists but daily_vals/{FIELD}/ is missing. +#' +#' @param ci_tiff_path Path to the 5-band TIFF containing CI as the 5th band +#' @param output_rds_path Path where to save the output RDS file +#' @param field_boundaries_sf SF object with field/sub_field polygons +#' @param field_name Name of the field to extract +#' @return The RDS data frame (invisibly) and saves to disk +#' +extract_rds_from_ci_tiff <- function(ci_tiff_path, output_rds_path, field_boundaries_sf, field_name) { + tryCatch({ + # Load the 5-band TIFF + raster_5band <- terra::rast(ci_tiff_path) + + # Extract CI (5th band) + # Assuming structure: [1]=R, [2]=G, [3]=B, [4]=NIR, [5]=CI + ci_raster <- raster_5band[[5]] + + # Extract CI statistics by sub_field + ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field_name) + + # Save RDS + if (!is.null(ci_stats) && nrow(ci_stats) > 0) { + saveRDS(ci_stats, output_rds_path) + return(invisible(ci_stats)) + } else { + safe_log(sprintf("No CI statistics extracted from %s", ci_tiff_path), "WARNING") + return(invisible(NULL)) + } + + }, error = function(e) { + safe_log(sprintf("Error extracting RDS from CI TIFF: %s", e$message), "ERROR") + return(invisible(NULL)) + }) +} + +#' Regenerate ALL missing RDS files from existing CI TIFFs (Comprehensive Migration Mode) +#' +#' This function processes ALL dates in field_tiles_CI/ and extracts RDS for any missing daily_vals/ +#' files. No date window filtering - processes the entire historical archive. +#' +#' Used for one-time migration of old projects where field_tiles_CI/ is populated but daily_vals/ +#' RDS files are missing or incomplete. +#' +#' @param field_tiles_ci_dir Path to field_tiles_CI/ (input: pre-calculated CI TIFFs) +#' @param daily_vals_dir Path to daily_vals/ (output: RDS statistics files) +#' @param field_boundaries_sf SF object with field/sub_field polygons +#' @param fields Vector of field names to process +#' +#' @return List with summary stats: list(total_processed=N, total_skipped=M, total_errors=K) +#' +regenerate_all_missing_rds <- function(field_tiles_ci_dir, daily_vals_dir, field_boundaries_sf, fields) { + safe_log("\n========================================") + safe_log("MIGRATION MODE: REGENERATING ALL MISSING RDS") + safe_log("Processing ALL dates in field_tiles_CI/") + safe_log("========================================") + + total_processed <- 0 + total_skipped <- 0 + total_errors <- 0 + + # Iterate through each field + for (field in fields) { + field_ci_path <- file.path(field_tiles_ci_dir, field) + field_daily_vals_path <- file.path(daily_vals_dir, field) + + # Skip if field directory doesn't exist + if (!dir.exists(field_ci_path)) { + safe_log(sprintf(" Field %s: field_tiles_CI not found (skipping)", field)) + continue + } + + # Create output directory for RDS + dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE) + + # Find ALL CI TIFFs for this field (no date filtering) + ci_tiff_files <- list.files( + path = field_ci_path, + pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$", + full.names = TRUE + ) + + if (length(ci_tiff_files) == 0) { + safe_log(sprintf(" Field %s: No CI TIFFs found (skipping)", field)) + next + } + + safe_log(sprintf(" Field %s: Found %d CI TIFFs to process", field, length(ci_tiff_files))) + + # Process each CI TIFF + for (ci_tiff in ci_tiff_files) { + date_str <- tools::file_path_sans_ext(basename(ci_tiff)) + output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str)) + + # Skip if RDS already exists + if (file.exists(output_rds)) { + total_skipped <- total_skipped + 1 + next + } + + # Extract RDS from CI TIFF + tryCatch({ + extract_rds_from_ci_tiff(ci_tiff, output_rds, field_boundaries_sf, field) + safe_log(sprintf(" %s: ✓ RDS extracted", date_str)) + total_processed <- total_processed + 1 + }, error = function(e) { + safe_log(sprintf(" %s: ✗ Error - %s", date_str, e$message), "ERROR") + total_errors <<- total_errors + 1 + }) + } + } + + safe_log(sprintf("\nMigration complete: processed %d, skipped %d, errors %d", + total_processed, total_skipped, total_errors)) + + return(list( + total_processed = total_processed, + total_skipped = total_skipped, + total_errors = total_errors + )) +} + diff --git a/r_app/30_growth_model_utils.R b/r_app/30_growth_model_utils.R index b37bbed..32f0c34 100644 --- a/r_app/30_growth_model_utils.R +++ b/r_app/30_growth_model_utils.R @@ -23,75 +23,100 @@ safe_log <- function(message, level = "INFO") { } } -#' Load and prepare the combined CI data +#' Load and prepare the combined CI data (Per-Field Architecture) #' -#' @param data_dir Directory containing the combined CI data -#' @return Long-format dataframe with CI values by date +#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals) +#' @return Long-format dataframe with CI values by date and field #' -load_combined_ci_data <- function(data_dir) { - # Load all daily RDS files from daily_vals/ directory - daily_vals_dir <- file.path(data_dir, "..", "daily_vals") +load_combined_ci_data <- function(daily_vals_dir) { + # For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals + # Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds if (!dir.exists(daily_vals_dir)) { stop(paste("Daily values directory not found:", daily_vals_dir)) } - safe_log(paste("Loading CI data from daily files in:", daily_vals_dir)) + safe_log(paste("Loading per-field CI data from:", daily_vals_dir)) - # Find all daily RDS files recursively + # Find all daily RDS files recursively (per-field structure) + # IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories all_daily_files <- list.files( path = daily_vals_dir, - pattern = "\\.rds$", + pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$", # Only YYYY-MM-DD.rds format full.names = TRUE, recursive = TRUE ) + # Further filter: only keep files that are in a subdirectory (per-field structure) + # Exclude legacy files at the root level like "extracted_2024-02-29_whole_field.rds" + all_daily_files <- all_daily_files[basename(dirname(all_daily_files)) != "daily_vals"] + if (length(all_daily_files) == 0) { - stop(paste("No daily RDS files found in:", daily_vals_dir)) + stop(paste("No per-field daily RDS files found in:", daily_vals_dir)) } - safe_log(sprintf("Found %d daily RDS files to load", length(all_daily_files))) + safe_log(sprintf("Found %d per-field daily RDS files to load (filtered from legacy format)", length(all_daily_files))) - # Read and combine all daily RDS files - # Each file contains: field, sub_field, ci_mean, ci_median, ci_sd, ci_min, ci_max, ci_count - combined_data <- all_daily_files %>% - purrr::map(readRDS) %>% - purrr::list_rbind() - - # Extract date from file path: .../daily_vals/{FIELD}/{YYYY-MM-DD}.rds - combined_data <- combined_data %>% - dplyr::mutate( - file_path = NA_character_, # Will be filled by mapping - Date = NA_Date_ - ) - - # Add dates by mapping file paths to dates - for (i in seq_along(all_daily_files)) { - file_path <- all_daily_files[i] - date_str <- tools::file_path_sans_ext(basename(file_path)) - - # Match rows in combined_data that came from this file - # This is a simplification - in practice we'd need to track which rows came from which file - # For now, we'll rebuild the data with explicit date tracking - } - - # Better approach: rebuild with explicit date tracking + # Rebuild with explicit date and field tracking + # File structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds combined_long <- data.frame() for (file in all_daily_files) { - date_str <- tools::file_path_sans_ext(basename(file)) - rds_data <- readRDS(file) - rds_data <- rds_data %>% - dplyr::mutate(Date = lubridate::ymd(date_str)) - combined_long <- rbind(combined_long, rds_data) + tryCatch({ + # Extract date from filename: {YYYY-MM-DD}.rds + filename <- basename(file) + date_str <- tools::file_path_sans_ext(filename) + + # Parse date - handle various formats + parsed_date <- NA + if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) { + parsed_date <- as.Date(date_str, format = "%Y-%m-%d") + } else { + safe_log(sprintf("Warning: Could not parse date from filename: %s", filename), "WARNING") + next + } + + if (is.na(parsed_date)) { + safe_log(sprintf("Warning: Invalid date parsed from: %s", filename), "WARNING") + next + } + + # Read RDS file + rds_data <- tryCatch({ + readRDS(file) + }, error = function(e) { + safe_log(sprintf("Error reading RDS file %s: %s", file, e$message), "WARNING") + return(NULL) + }) + + if (is.null(rds_data) || nrow(rds_data) == 0) { + next + } + + # Add date column to the data + rds_data <- rds_data %>% + dplyr::mutate(Date = parsed_date) + + combined_long <- rbind(combined_long, rds_data) + + }, error = function(e) { + safe_log(sprintf("Error processing file %s: %s", file, e$message), "WARNING") + }) + } + + if (nrow(combined_long) == 0) { + safe_log("Warning: No valid CI data loaded from daily files", "WARNING") + return(data.frame()) } # Reshape to long format using ci_mean as the main CI value + # Only keep rows where ci_mean has valid data pivot_stats_long <- combined_long %>% dplyr::select(field, sub_field, ci_mean, Date) %>% dplyr::rename(value = ci_mean) %>% dplyr::mutate(value = as.numeric(value)) %>% - tidyr::drop_na(c("value", "Date")) %>% + # Keep rows even if ci_mean is NA or 0 (might be valid), but drop if Date is missing + tidyr::drop_na(Date) %>% dplyr::filter(!is.na(sub_field), !is.na(field)) %>% dplyr::filter(!is.infinite(value)) %>% dplyr::distinct() diff --git a/r_app/30_interpolate_growth_model.R b/r_app/30_interpolate_growth_model.R index 5ef5011..922a040 100644 --- a/r_app/30_interpolate_growth_model.R +++ b/r_app/30_interpolate_growth_model.R @@ -60,15 +60,18 @@ main <- function() { # ----------------------------------------------- setup <- setup_project_directories(project_dir) - safe_log(sprintf("Using cumulative CI directory: %s", setup$cumulative_CI_vals_dir)) + # For per-field architecture: read from daily_vals_per_field_dir (Script 20 per-field output) + daily_vals_dir <- setup$daily_vals_per_field_dir + safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir)) safe_log("Starting CI growth model interpolation") # 3. Load and process the data # ---------------------------- tryCatch({ - # Load the combined CI data (created by Script 20) - CI_data <- load_combined_ci_data(setup$cumulative_CI_vals_dir) + # Load the combined CI data (created by Script 20 per-field) + # Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds + CI_data <- load_combined_ci_data(daily_vals_dir) # Validate harvesting data if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { diff --git a/r_app/40_mosaic_creation_utils.R b/r_app/40_mosaic_creation_utils.R index 2852dc0..a602aec 100644 --- a/r_app/40_mosaic_creation_utils.R +++ b/r_app/40_mosaic_creation_utils.R @@ -110,24 +110,25 @@ date_list <- function(end_date, offset) { create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir, merged_final_dir, output_dir, file_name_tif, create_plots = FALSE) { - # Find VRT files for the specified date range - vrt_list <- find_vrt_files(daily_vrt_dir, dates) + # NOTE: VRT files are legacy code - we no longer create or use them + # Get dates directly from the dates parameter instead + dates_to_check <- dates$days_filter # Find final raster files for fallback raster_files_final <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$") - # Process the mosaic if VRT files are available - if (length(vrt_list) > 0) { - safe_log("VRT list created, assessing cloud cover for mosaic creation") + # Process the mosaic if we have dates to check + if (length(dates_to_check) > 0) { + safe_log("Processing dates, assessing cloud cover for mosaic creation") # Calculate aggregated cloud cover statistics (returns data frame for image selection) - cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir, field_boundaries) + cloud_coverage_stats <- count_cloud_coverage(dates_to_check, merged_final_dir, field_boundaries) # Create mosaic based on cloud cover assessment mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries) } else { - safe_log("No VRT files available for the date range, creating empty mosaic with NA values", "WARNING") + safe_log("No dates available for the date range, creating empty mosaic with NA values", "WARNING") # Create empty mosaic if no files are available if (length(raster_files_final) == 0) { @@ -179,24 +180,21 @@ find_vrt_files <- function(vrt_directory, dates) { #' Count missing pixels (clouds) in rasters - per field analysis using actual TIF files #' -#' @param vrt_list List of VRT file paths (used to extract dates for TIF file lookup) +#' @param dates_to_check Character vector of dates in YYYY-MM-DD format to check for cloud coverage #' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif) #' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only #' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection) #' -count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_boundaries = NULL) { - if (length(vrt_list) == 0) { - warning("No VRT files provided for cloud coverage calculation") +count_cloud_coverage <- function(dates_to_check, merged_final_dir = NULL, field_boundaries = NULL) { + if (length(dates_to_check) == 0) { + warning("No dates provided for cloud coverage calculation") return(NULL) } tryCatch({ - # Extract dates from VRT filenames to find corresponding TIF files - # VRT filenames are like "merged2025-12-18.vrt", TIF filenames are like "2025-12-18.tif" - tif_dates <- gsub(".*([0-9]{4}-[0-9]{2}-[0-9]{2}).*", "\\1", basename(vrt_list)) - - # Build list of actual TIF files to use - tif_files <- paste0(here::here(merged_final_dir), "/", tif_dates, ".tif") + # Build list of actual TIF files from dates + # TIF filenames are like "2025-12-18.tif" + tif_files <- paste0(here::here(merged_final_dir), "/", dates_to_check, ".tif") # Check which TIF files exist tif_exist <- file.exists(tif_files) @@ -286,7 +284,7 @@ count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_bounda } # Log results - safe_log(paste("Cloud coverage assessment completed for", length(vrt_list), "images")) + safe_log(paste("Cloud coverage assessment completed for", length(dates_to_check), "dates")) # Return aggregated data only return(aggregated_df) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 0138ff6..5acba9a 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -240,6 +240,9 @@ main <- function() { stop("Error loading parameters_project.R: ", e$message) }) + # Initialize project directories and configuration + setup <- setup_project_directories(project_dir) + # DETERMINE CLIENT TYPE AND KPI CONFIGURATION client_type <- get_client_type(project_dir) client_config <- get_client_kpi_config(client_type) @@ -249,9 +252,13 @@ main <- function() { message("Output Formats:", paste(client_config$outputs, collapse = ", ")) # Define paths for mosaic detection (used in PHASE 1) + # NEW: Support both per-field and legacy single-file mosaics base_project_path <- file.path("laravel_app", "storage", "app", project_dir) weekly_tile_max <- file.path(base_project_path, "weekly_tile_max") - weekly_mosaic <- file.path(base_project_path, "weekly_mosaic") + weekly_mosaic <- file.path(base_project_path, "weekly_mosaic") # NEW: Per-field structure + + # Also set up per-field daily RDS path for Script 80 historical data loading + daily_vals_dir <- file.path(base_project_path, "Data", "extracted_ci", "daily_vals") tryCatch({ source(here("r_app", "30_growth_model_utils.R")) @@ -282,7 +289,7 @@ main <- function() { dir.create(reports_dir_kpi, recursive = TRUE) } - cumulative_CI_vals_dir <- file.path(base_project_path, "combined_CI") + cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir # Load field boundaries and harvesting data (already loaded by parameters_project.R) if (!exists("field_boundaries_sf")) { @@ -357,9 +364,38 @@ main <- function() { } } - # PRIORITY 2: Fall back to single-file mosaic (projects with small ROI, legacy approach) + # PRIORITY 2: Check for per-field mosaics (NEW per-field architecture) if (is.na(mosaic_mode)) { - message(" No tiles found. Checking for single-file mosaic (legacy approach)...") + message(" No tiles found. Checking for per-field mosaics...") + # Check if weekly_mosaic has field subdirectories + if (dir.exists(weekly_mosaic)) { + field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + if (length(field_dirs) > 0) { + # Check if any field has the week pattern we're looking for + per_field_files <- c() + for (field in field_dirs) { + field_mosaic_dir <- file.path(weekly_mosaic, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files <- c(per_field_files, files) + } + } + + if (length(per_field_files) > 0) { + message(paste(" ✓ Using per-field mosaic approach")) + message(paste(" Found", length(per_field_files), "per-field mosaics")) + mosaic_mode <- "per-field" + mosaic_dir <- weekly_mosaic # Will be field subdirectories + } + } + } + } + + # PRIORITY 3: Fall back to single-file mosaic (legacy approach) + if (is.na(mosaic_mode)) { + message(" No per-field mosaics found. Checking for single-file mosaic (legacy approach)...") mosaic_dir <- weekly_mosaic single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) @@ -370,7 +406,8 @@ main <- function() { } else { stop(paste("ERROR: No mosaic files found for week", current_week, year, "\n Checked (1) tile-based:", file.path(weekly_tile_max, "*", "week_*.tif"), - "\n Checked (2) single-file:", file.path(weekly_mosaic, "week_*.tif"))) + "\n Checked (2) per-field:", file.path(weekly_mosaic, "*", "week_*.tif"), + "\n Checked (3) single-file:", file.path(weekly_mosaic, "week_*.tif"))) } } @@ -407,7 +444,8 @@ main <- function() { historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir, num_weeks = num_weeks_to_load, auto_generate = allow_auto_gen, - field_boundaries_sf = field_boundaries_sf) + field_boundaries_sf = field_boundaries_sf, + daily_vals_dir = daily_vals_dir) # Load harvest.xlsx for planting dates (season_start) message("\nLoading harvest data from harvest.xlsx for planting dates...") diff --git a/r_app/80_kpi_utils.R b/r_app/80_kpi_utils.R index 7f1a227..9a2fdea 100644 --- a/r_app/80_kpi_utils.R +++ b/r_app/80_kpi_utils.R @@ -197,27 +197,96 @@ calculate_week_numbers <- function(report_date = Sys.Date()) { #' Load weekly mosaic CI data #' @param week_num Week number #' @param year Year +# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures) +load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) { + # Check if this is per-field loading mode + is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field") + + if (is_per_field) { + # Per-field architecture: load this specific field's mosaic + per_field_dir <- attr(ci_raster_or_obj, "per_field_dir") + week_file <- attr(ci_raster_or_obj, "week_file") + field_mosaic_path <- file.path(per_field_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_mosaic <- terra::rast(field_mosaic_path) + # Extract CI band (5th band) if multi-band, otherwise use as-is + if (terra::nlyr(field_mosaic) >= 5) { + return(field_mosaic[[5]]) + } else { + return(field_mosaic[[1]]) + } + }, error = function(e) { + safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING") + return(NULL) + }) + } else { + safe_log(paste("Per-field mosaic not found for", field_name), "WARNING") + return(NULL) + } + } else { + # Single-file architecture: crop from loaded raster + if (!is.null(field_vect)) { + return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE)) + } else { + return(ci_raster_or_obj) + } + } +} + #' @param mosaic_dir Directory containing weekly mosaics #' @return Terra raster with CI band, or NULL if file not found load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { week_file <- sprintf("week_%02d_%d.tif", week_num, year) week_path <- file.path(mosaic_dir, week_file) - if (!file.exists(week_path)) { - safe_log(paste("Weekly mosaic not found:", week_path), "WARNING") - return(NULL) + # FIRST: Try to load single-file mosaic (legacy approach) + if (file.exists(week_path)) { + tryCatch({ + mosaic_raster <- terra::rast(week_path) + ci_raster <- mosaic_raster[[5]] # CI is the 5th band + names(ci_raster) <- "CI" + safe_log(paste("Loaded weekly mosaic (single-file):", week_file)) + return(ci_raster) + }, error = function(e) { + safe_log(paste("Error loading mosaic:", e$message), "ERROR") + return(NULL) + }) } - tryCatch({ - mosaic_raster <- terra::rast(week_path) - ci_raster <- mosaic_raster[[5]] # CI is the 5th band - names(ci_raster) <- "CI" - safe_log(paste("Loaded weekly mosaic:", week_file)) - return(ci_raster) - }, error = function(e) { - safe_log(paste("Error loading mosaic:", e$message), "ERROR") - return(NULL) - }) + # SECOND: Per-field architecture - store mosaic_dir path for later per-field loading + # Don't try to merge - just return the directory path so field-level functions can load per-field + if (dir.exists(mosaic_dir)) { + field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + # Check if any field has this week's mosaic + found_any <- FALSE + for (field in field_dirs) { + field_mosaic_path <- file.path(mosaic_dir, field, week_file) + if (file.exists(field_mosaic_path)) { + found_any <- TRUE + break + } + } + + if (found_any) { + safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year, + "- will load per-field on demand")) + # Return a special object that indicates per-field loading is needed + # Store the mosaic_dir path in the raster's metadata + dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA) + attr(dummy_raster, "per_field_dir") <- mosaic_dir + attr(dummy_raster, "week_file") <- week_file + attr(dummy_raster, "is_per_field") <- TRUE + return(dummy_raster) + } + } + + # If we get here, no mosaic found + safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING") + return(NULL) } # Function to prepare predictions with consistent naming and formatting @@ -265,12 +334,16 @@ calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) { # Extract field boundary field_vect <- field_boundaries_vect[i] - # crop ci_raster with field_vect and use that for ci_values - cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE) + # Load appropriate CI raster using helper function + cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect) # Extract CI values for this field using helper function - field_values <- extract_ci_values(cropped_raster, field_vect) - valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] + if (!is.null(cropped_raster)) { + field_values <- extract_ci_values(cropped_raster, field_vect) + valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] + } else { + valid_values <- c() + } # If all valid values are 0 (cloud), fill with NA row if (length(valid_values) == 0 || all(valid_values == 0)) { @@ -408,9 +481,18 @@ calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) # Extract field boundary field_vect <- field_boundaries_vect[i] - # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) - previous_values <- extract_ci_values(previous_ci, field_vect) + # Load appropriate CI rasters using helper function + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & @@ -691,9 +773,18 @@ calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundari sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] - # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) - previous_values <- extract_ci_values(previous_ci, field_vect) + # Load appropriate CI rasters using helper function + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & @@ -851,9 +942,18 @@ calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundarie next # Skip to next field } - # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) - previous_values <- extract_ci_values(previous_ci, field_vect) + # Load appropriate CI rasters using helper function + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & @@ -934,8 +1034,15 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] + # Load appropriate CI raster using helper function + field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect) + # Extract CI values using helper function - ci_values <- extract_ci_values(ci_raster, field_vect) + if (!is.null(field_ci)) { + ci_values <- extract_ci_values(field_ci, field_vect) + } else { + ci_values <- c() + } valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] if (length(valid_values) > 1) { diff --git a/r_app/80_weekly_stats_utils.R b/r_app/80_weekly_stats_utils.R index baaf1b1..0f24a36 100644 --- a/r_app/80_weekly_stats_utils.R +++ b/r_app/80_weekly_stats_utils.R @@ -720,6 +720,86 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, return(current_stats) } +# ============================================================================ +# LOAD PER-FIELD DAILY RDS DATA (NEW ARCHITECTURE) +# ============================================================================ + +#' Load per-field daily CI data from daily_vals/ directory +#' +#' Reads per-field daily RDS files (output from Script 20): +#' daily_vals/{FIELD}/{YYYY-MM-DD}.rds +#' +#' Filters to dates matching the week specified, and returns combined data for all fields. +#' +#' @param week_num ISO week number (1-53) +#' @param year ISO week year +#' @param daily_vals_dir Directory containing daily_vals/{FIELD}/ structure +#' @param field_boundaries_sf Field boundaries (for validation) +#' @return Data frame with columns: field, sub_field, Date, ci_mean, ci_sd, ... (per-field daily data) +#' +load_per_field_daily_rds <- function(week_num, year, daily_vals_dir, field_boundaries_sf = NULL) { + + if (!dir.exists(daily_vals_dir)) { + warning(paste("daily_vals directory not found:", daily_vals_dir)) + return(NULL) + } + + # Calculate week date range + # Create a date in the specified ISO week + jan_4 <- as.Date(paste0(year, "-01-04")) + week_start <- jan_4 - (as.numeric(format(jan_4, "%w")) - 2) * 86400 + (week_num - 1) * 7 * 86400 + week_end <- week_start + 6 + + # List all field directories + field_dirs <- list.dirs(daily_vals_dir, full.names = FALSE, recursive = FALSE) + + if (length(field_dirs) == 0) { + warning(paste("No field subdirectories found in:", daily_vals_dir)) + return(NULL) + } + + combined_data <- data.frame() + + # Loop through each field and load matching RDS files for this week + for (field in field_dirs) { + field_path <- file.path(daily_vals_dir, field) + + # Find all RDS files in this field directory + rds_files <- list.files(field_path, pattern = "\\.rds$", full.names = TRUE) + + if (length(rds_files) == 0) { + next + } + + # Filter to files within the week date range + for (rds_file in rds_files) { + # Extract date from filename: {FIELD}/{YYYY-MM-DD}.rds + date_str <- tools::file_path_sans_ext(basename(rds_file)) + file_date <- tryCatch(as.Date(date_str), error = function(e) NA) + + if (is.na(file_date) || file_date < week_start || file_date > week_end) { + next + } + + # Load RDS file + tryCatch({ + rds_data <- readRDS(rds_file) + rds_data$Date <- file_date + combined_data <- rbind(combined_data, rds_data) + }, error = function(e) { + warning(paste("Could not load RDS file:", rds_file, "-", e$message)) + }) + } + } + + if (nrow(combined_data) == 0) { + warning(paste("No RDS data found for week", week_num, "in", daily_vals_dir)) + return(NULL) + } + + return(combined_data) +} + # ============================================================================ # LOAD OR CALCULATE WEEKLY STATISTICS # ============================================================================ @@ -755,7 +835,75 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo return(stats_df) } -load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { +load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) { + + # NEW ARCHITECTURE: Try per-field daily RDS first + # If not available, fall back to consolidated RDS + + # Determine daily_vals_dir if not provided + if (is.null(daily_vals_dir)) { + daily_vals_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") + } + + message(paste("Loading historical data from:", ifelse(dir.exists(daily_vals_dir), "per-field daily RDS", "consolidated RDS"))) + + historical_data <- list() + loaded_weeks <- c() + missing_weeks <- c() + + # Try per-field daily RDS first + use_per_field <- dir.exists(daily_vals_dir) + + if (use_per_field) { + message(paste(" Attempting to load from per-field RDS in:", daily_vals_dir)) + + for (lookback in 0:(num_weeks - 1)) { + target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) + target_week <- target$week + target_year <- target$year + + # Load from per-field daily RDS + per_field_data <- load_per_field_daily_rds(target_week, target_year, daily_vals_dir, field_boundaries_sf) + + if (!is.null(per_field_data) && nrow(per_field_data) > 0) { + # Aggregate to field-week level + week_stats <- per_field_data %>% + dplyr::group_by(field) %>% + dplyr::summarise( + Field_id = dplyr::first(field), + Mean_CI = mean(ci_mean, na.rm = TRUE), + CI_SD = mean(ci_sd, na.rm = TRUE), + CV = mean(ci_sd / ci_mean, na.rm = TRUE), + .groups = "drop" + ) + + historical_data[[lookback + 1]] <- list( + week = target_week, + year = target_year, + data = week_stats + ) + loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + } else { + missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + } + } + } + + if (length(historical_data) == 0) { + message(paste("Error: No historical data found")) + return(NULL) + } + + message(paste("✓ Loaded", length(historical_data), "weeks:", paste(loaded_weeks, collapse = ", "))) + + return(historical_data) +} + +#' [OLD CONSOLIDATED RDS FALLBACK - KEPT FOR REFERENCE] +#' This function is now replaced by per-field RDS loading above. +#' Keeping it as a comment for potential fallback logic. + +load_historical_field_data_consolidated <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { historical_data <- list() loaded_weeks <- c() missing_weeks <- c() diff --git a/r_app/90_CI_report_with_kpis_simple.Rmd b/r_app/90_CI_report_with_kpis_simple.Rmd index 2353bab..8d2135f 100644 --- a/r_app/90_CI_report_with_kpis_simple.Rmd +++ b/r_app/90_CI_report_with_kpis_simple.Rmd @@ -52,6 +52,7 @@ suppressPackageStartupMessages({ library(knitr) library(tidyr) library(flextable) + library(officer) }) # Load custom utility functions @@ -352,50 +353,17 @@ safe_log(paste("Week range:", week_start, "to", week_end)) ``` ```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} -# Load CI index data with error handling +# Load CI quadrant data for field-level analysis tryCatch({ CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) - safe_log("Successfully loaded CI quadrant data") }, error = function(e) { stop("Error loading CI quadrant data: ", e$message) }) -# Get file paths for different weeks using the utility function -tryCatch({ - path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0) - path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1) - path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2) - path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3) - - # Log the calculated paths - safe_log("Required mosaic paths:") - safe_log(paste("Path to current week:", path_to_week_current)) - safe_log(paste("Path to week minus 1:", path_to_week_minus_1)) - safe_log(paste("Path to week minus 2:", path_to_week_minus_2)) - safe_log(paste("Path to week minus 3:", path_to_week_minus_3)) - - # Validate that files exist - if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current) - if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1) - if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2) - if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3) - - # Load raster data with terra functions - CI <- terra::rast(path_to_week_current)$CI - CI_m1 <- terra::rast(path_to_week_minus_1)$CI - CI_m2 <- terra::rast(path_to_week_minus_2)$CI - CI_m3 <- terra::rast(path_to_week_minus_3)$CI - - # DEBUG: Check which weeks were actually loaded and their data ranges - safe_log(paste("DEBUG - CI (current) range:", paste(terra::minmax(CI)[,1], collapse=" to "))) - safe_log(paste("DEBUG - CI_m1 (week-1) range:", paste(terra::minmax(CI_m1)[,1], collapse=" to "))) - safe_log(paste("DEBUG - CI_m2 (week-2) range:", paste(terra::minmax(CI_m2)[,1], collapse=" to "))) - safe_log(paste("DEBUG - CI_m3 (week-3) range:", paste(terra::minmax(CI_m3)[,1], collapse=" to "))) - -}, error = function(e) { - stop("Error loading raster data: ", e$message) -}) +# NOTE: Overview maps skipped for this report +# Individual field sections load their own per-field mosaics directly +``` ``` ```{r compute_benchmarks_once, include=FALSE} @@ -456,7 +424,7 @@ if (exists("summary_tables") && !is.null(summary_tables)) { ## Executive Summary - Key Performance Indicators -```{r combined_kpi_table, echo=FALSE} +```{r combined_kpi_table, echo=FALSE, results='asis'} # Combine all KPI tables into a single table with standardized column names display_names <- c( field_uniformity_summary = "Field Uniformity", @@ -510,7 +478,7 @@ ft ## Field Alerts -```{r field_alerts_table, echo=FALSE} +```{r field_alerts_table, echo=FALSE, results='asis'} # Generate alerts for all fields generate_field_alerts <- function(field_details_table) { if (is.null(field_details_table) || nrow(field_details_table) == 0) { @@ -617,80 +585,12 @@ if (exists("field_details_table") && !is.null(field_details_table)) { ``` ```{r data, message=TRUE, warning=TRUE, include=FALSE} -# Load CI index data with error handling -tryCatch({ - CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) - safe_log("Successfully loaded CI quadrant data") -}, error = function(e) { - stop("Error loading CI quadrant data: ", e$message) -}) - -# Get file paths for different weeks using the utility function -tryCatch({ - path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0) - path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1) - path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2) - path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3) - - # Log the calculated paths - safe_log("Required mosaic paths:") - safe_log(paste("Path to current week:", path_to_week_current)) - safe_log(paste("Path to week minus 1:", path_to_week_minus_1)) - safe_log(paste("Path to week minus 2:", path_to_week_minus_2)) - safe_log(paste("Path to week minus 3:", path_to_week_minus_3)) - - # Validate that files exist - if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current) - if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1) - if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2) - if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3) - - # Load raster data with terra functions - CI <- terra::rast(path_to_week_current)$CI - CI_m1 <- terra::rast(path_to_week_minus_1)$CI - CI_m2 <- terra::rast(path_to_week_minus_2)$CI - CI_m3 <- terra::rast(path_to_week_minus_3)$CI - -}, error = function(e) { - stop("Error loading raster data: ", e$message) -}) +# Verify CI quadrant data is loaded from load_ci_data chunk +if (!exists("CI_quadrant") || is.null(CI_quadrant)) { + stop("CI_quadrant data not available - check load_ci_data chunk") +} +safe_log("CI quadrant data verified for field-level analysis") ``` - -```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE} -# Calculate difference rasters for comparisons -# When one week has NA values, the difference will also be NA (not zero) -# Initialize placeholders first to ensure they exist -last_week_dif_raster_abs <- NULL -three_week_dif_raster_abs <- NULL - -tryCatch({ - # Always calculate differences - NA values will propagate naturally - # This way empty weeks (all NA) result in NA differences, not misleading zeros - last_week_dif_raster_abs <- (CI - CI_m1) - three_week_dif_raster_abs <- (CI - CI_m3) - - safe_log("Calculated difference rasters (NA values preserved)") - -}, error = function(e) { - safe_log(paste("Error calculating difference rasters:", e$message), "ERROR") - # Fallback: create NA placeholders if calculation fails - if (is.null(last_week_dif_raster_abs)) { - last_week_dif_raster_abs <- CI * NA - } - if (is.null(three_week_dif_raster_abs)) { - three_week_dif_raster_abs <- CI * NA - } -}) - -# Final safety check - ensure variables exist in global environment -if (is.null(last_week_dif_raster_abs)) { - last_week_dif_raster_abs <- CI * NA - safe_log("Created NA placeholder for last_week_dif_raster_abs", "WARNING") -} -if (is.null(three_week_dif_raster_abs)) { - three_week_dif_raster_abs <- CI * NA - safe_log("Created NA placeholder for three_week_dif_raster_abs", "WARNING") -} ``` ```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE} @@ -710,76 +610,6 @@ tryCatch({ stop("Error loading field boundaries: ", e$message) }) ``` -\newpage - -## Chlorophyll Index (CI) Overview Map - Current Week -```{r render_ci_overview_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE} -# Create overview chlorophyll index map -tryCatch({ - # Choose palette based on colorblind_friendly parameter - ci_palette <- if (colorblind_friendly) "viridis" else "brewer.rd_yl_gn" - - # Base shape - map <- tmap::tm_shape(CI, unit = "m") - - # Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison) - map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = ci_palette, - limits = c(1, 8)), - col.legend = tm_legend(title = "Chlorophyll Index (CI)", - orientation = "landscape", - position = tm_pos_out("center", "bottom"))) - # Complete the map with layout and other elements - map <- map + - tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_shape(AllPivots0) + - tmap::tm_borders(col = "black") + - tmap::tm_text("sub_field", size = 0.6, col = "black") - - # Print the map - print(map) -}, error = function(e) { - safe_log(paste("Error creating CI overview map:", e$message), "ERROR") - plot(1, type="n", axes=FALSE, xlab="", ylab="") - text(1, 1, "Error creating CI overview map", cex=1.5) -}) - -``` -\newpage - -## Weekly Chlorophyll Index Difference Map -```{r render_ci_difference_map, echo=FALSE, fig.height=7, fig.width=10, dpi=300, dev='png', message=FALSE, warning=FALSE} -# Create chlorophyll index difference map -tryCatch({ - # Choose palette based on colorblind_friendly parameter - diff_palette <- if (colorblind_friendly) "plasma" else "brewer.rd_yl_gn" - - # Base shape - map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m") - - # Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale) - map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = diff_palette, - midpoint = 0, - limits = c(-3, 3)), - col.legend = tm_legend(title = "Chlorophyll Index (CI) Change", - orientation = "landscape", - position = tm_pos_out("center", "bottom"))) - # Complete the map with layout and other elements - map <- map + - tmap::tm_scalebar(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") + - tmap::tm_shape(AllPivots0) + - tmap::tm_borders(col = "black") + - tmap::tm_text("sub_field", size = 0.6, col = "black") - - # Print the map - print(map) -}, error = function(e) { - safe_log(paste("Error creating CI difference map:", e$message), "ERROR") - plot(1, type="n", axes=FALSE, xlab="", ylab="") - text(1, 1, "Error creating CI difference map", cex=1.5) -}) -``` # Section 2: Field-by-Field Analysis @@ -805,6 +635,23 @@ tryCatch({ dplyr::group_by(field) %>% dplyr::summarise(.groups = 'drop') + # Use per-field weekly mosaic directory path from parameters_project.R + weekly_mosaic_per_field_dir <- weekly_CI_mosaic + + # Helper to get week/year from a date + get_week_year <- function(date) { + list( + week = as.numeric(format(date, "%V")), + year = as.numeric(format(date, "%G")) + ) + } + + # Get week/year for current and historical weeks (local to field section) + current_ww <- get_week_year(as.Date(today)) + minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1)) + minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2)) + minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3)) + # Generate plots for each field for(i in seq_along(AllPivots_merged$field)) { field_name <- AllPivots_merged$field[i] @@ -820,15 +667,71 @@ tryCatch({ cat("\\newpage\n\n") } - # Call ci_plot with explicit parameters (ci_plot will generate its own header) + # Load per-field mosaics directly for this field + field_CI <- NULL + field_CI_m1 <- NULL + field_CI_m2 <- NULL + field_CI_m3 <- NULL + + tryCatch({ + # Load per-field mosaic for current week + per_field_path_current <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year + ) + if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) { + field_CI <- terra::rast(per_field_path_current)[["CI"]] + } + + # Load per-field mosaic for week-1 + per_field_path_m1 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year + ) + if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) { + field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]] + } + + # Load per-field mosaic for week-2 + per_field_path_m2 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year + ) + if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) { + field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]] + } + + # Load per-field mosaic for week-3 + per_field_path_m3 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year + ) + if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) { + field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]] + } + + safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG") + + }, error = function(e) { + safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING") + }) + + # Calculate difference rasters from per-field data (local to this field) + last_week_dif_raster_field <- NULL + three_week_dif_raster_field <- NULL + + if (!is.null(field_CI) && !is.null(field_CI_m1)) { + last_week_dif_raster_field <- field_CI - field_CI_m1 + } + if (!is.null(field_CI) && !is.null(field_CI_m3)) { + three_week_dif_raster_field <- field_CI - field_CI_m3 + } + + # Call ci_plot with field-specific rasters ci_plot( pivotName = field_name, field_boundaries = AllPivots0, - current_ci = CI, - ci_minus_1 = CI_m1, - ci_minus_2 = CI_m2, - last_week_diff = last_week_dif_raster_abs, - three_week_diff = three_week_dif_raster_abs, + current_ci = field_CI, + ci_minus_1 = field_CI_m1, + ci_minus_2 = field_CI_m2, + last_week_diff = last_week_dif_raster_field, + three_week_diff = three_week_dif_raster_field, harvesting_data = harvesting_data, week = week, week_minus_1 = week_minus_1, @@ -927,7 +830,7 @@ tryCatch({ The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis. -```{r detailed_field_table, echo=FALSE} +```{r detailed_field_table, echo=FALSE, results='asis'} # Load CI quadrant data to get field ages CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) @@ -1068,7 +971,7 @@ Comparing the current season to these lines helps assess whether crop growth is \newpage ## Report Metadata -```{r report_metadata, echo=FALSE} +```{r report_metadata, echo=FALSE, results='asis'} metadata_info <- data.frame( Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"), Value = c( diff --git a/r_app/90_CI_report_with_kpis_simple_NO_TABLES.Rmd b/r_app/90_CI_report_with_kpis_simple_NO_TABLES.Rmd new file mode 100644 index 0000000..dc95c3e --- /dev/null +++ b/r_app/90_CI_report_with_kpis_simple_NO_TABLES.Rmd @@ -0,0 +1,584 @@ +--- +params: + ref: "word-styles-reference-var1.docx" + output_file: CI_report.docx + report_date: "2025-09-30" + data_dir: "aura" + mail_day: "Wednesday" + borders: FALSE + ci_plot_type: "both" # options: "absolute", "cumulative", "both" + colorblind_friendly: TRUE # use colorblind-friendly palettes (viridis/plasma) + facet_by_season: FALSE # facet CI trend plots by season instead of overlaying + x_axis_unit: "days" # x-axis unit for trend plots: "days" or "weeks" +output: + word_document: + reference_docx: !expr file.path("word-styles-reference-var1.docx") + toc: no +editor_options: + chunk_output_type: console +--- + +```{r setup_parameters, include=FALSE} +# Set up basic report parameters from input values +report_date <- params$report_date +mail_day <- params$mail_day +borders <- params$borders +ci_plot_type <- params$ci_plot_type +colorblind_friendly <- params$colorblind_friendly +facet_by_season <- params$facet_by_season +x_axis_unit <- params$x_axis_unit +``` + +```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE} +# Configure knitr options +knitr::opts_chunk$set(warning = FALSE, message = FALSE) + +# Set flag for reporting scripts to use pivot.geojson instead of pivot_2.geojson +reporting_script <- TRUE + +# Load all packages at once with suppressPackageStartupMessages +suppressPackageStartupMessages({ + library(here) + library(sf) + library(terra) + library(tidyverse) + library(tmap) + library(lubridate) + library(zoo) + library(rsample) + library(caret) + library(randomForest) + library(CAST) + library(knitr) + library(tidyr) + library(flextable) + library(officer) +}) + +# Load custom utility functions +tryCatch({ + source("report_utils.R") +}, error = function(e) { + message(paste("Error loading report_utils.R:", e$message)) + # Try alternative path if the first one fails + tryCatch({ + source(here::here("r_app", "report_utils.R")) + }, error = function(e) { + stop("Could not load report_utils.R from either location: ", e$message) + }) +}) + +# Function to determine field priority level based on CV and Moran's I +# Returns: 1=Urgent, 2=Monitor, 3=No stress +get_field_priority_level <- function(cv, morans_i) { + # Handle NA values + if (is.na(cv) || is.na(morans_i)) return(3) # Default to no stress + + # Determine priority based on thresholds + if (cv < 0.1) { + if (morans_i < 0.7) { + return(3) # No stress + } else if (morans_i <= 0.9) { + return(2) # Monitor (young field with some clustering) + } else { + return(1) # Urgent + } + } else if (cv <= 0.15) { + if (morans_i < 0.7) { + return(2) # Monitor + } else { + return(1) # Urgent + } + } else { # cv > 0.15 + return(1) # Urgent + } +} +``` + +```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE} +# Set the project directory from parameters +project_dir <- params$data_dir + +# Source project parameters with error handling +tryCatch({ + source(here::here("r_app", "parameters_project.R")) +}, error = function(e) { + stop("Error loading parameters_project.R: ", e$message) +}) + +# Log initial configuration +safe_log("Starting the R Markdown script with KPIs - NO TABLES VERSION") +safe_log(paste("mail_day params:", params$mail_day)) +safe_log(paste("report_date params:", params$report_date)) +safe_log(paste("mail_day variable:", mail_day)) +``` + +```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE} +## SIMPLE KPI LOADING - robust lookup with fallbacks +# Primary expected directory inside the laravel storage +kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis") +date_suffix <- format(as.Date(report_date), "%Y%m%d") + +# Calculate current week from report_date using ISO 8601 week numbering +current_week <- as.numeric(format(as.Date(report_date), "%V")) +current_year <- as.numeric(format(as.Date(report_date), "%G")) +week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year) + +# Candidate filenames we expect (exact and common variants) +expected_summary_names <- c( + paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"), + paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"), + paste0(project_dir, "_kpi_summary_tables.rds"), + "kpi_summary_tables.rds", + paste0("kpi_summary_tables_", week_suffix, ".rds"), + paste0("kpi_summary_tables_", date_suffix, ".rds") +) + +expected_field_details_names <- c( + paste0(project_dir, "_field_details_", week_suffix, ".rds"), + paste0(project_dir, "_field_details_", date_suffix, ".rds"), + paste0(project_dir, "_field_details.rds"), + "field_details.rds" +) + +# Helper to attempt loading a file from the directory or fallback to a workspace-wide search +try_load_from_dir <- function(dir, candidates) { + if (!dir.exists(dir)) return(NULL) + for (name in candidates) { + f <- file.path(dir, name) + if (file.exists(f)) return(f) + } + return(NULL) +} + +# Try primary directory first +summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names) +field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names) + +# If not found, perform a workspace-wide search (slower) limited to laravel_app storage +if (is.null(summary_file) || is.null(field_details_file)) { + safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files")) + # List rds files under laravel_app/storage/app recursively + files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE) + # Try to match by expected names + if (is.null(summary_file)) { + matched <- files[basename(files) %in% expected_summary_names] + if (length(matched) > 0) summary_file <- matched[1] + } + if (is.null(field_details_file)) { + matched2 <- files[basename(files) %in% expected_field_details_names] + if (length(matched2) > 0) field_details_file <- matched2[1] + } +} + +# Final checks and load with safe error messages +kpi_files_exist <- FALSE +if (!is.null(summary_file) && file.exists(summary_file)) { + safe_log(paste("Loading KPI summary from:", summary_file)) + summary_tables <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL }) + if (!is.null(summary_tables)) kpi_files_exist <- TRUE +} else { + safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING") +} + +if (!is.null(field_details_file) && file.exists(field_details_file)) { + safe_log(paste("Loading field details from:", field_details_file)) + field_details_table <- tryCatch(readRDS(field_details_file), error = function(e) { safe_log(paste("Failed to read field details RDS:", e$message), "ERROR"); NULL }) + if (!is.null(field_details_table)) kpi_files_exist <- kpi_files_exist && TRUE +} else { + safe_log(paste("Field details file not found. Searched:", paste(expected_field_details_names, collapse=", ")), "WARNING") +} + +if (kpi_files_exist) { + safe_log("✓ KPI summary tables loaded successfully") +} else { + safe_log("KPI files could not be located or loaded. KPI sections will be skipped.", "WARNING") +} + +#' Generate field-specific KPI summary for display in reports +#' @param field_name Name of the field to summarize +#' @param field_details_table Data frame with field-level KPI details +#' @return Formatted text string with field KPI summary +generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadrant) { + tryCatch({ + # Get field age from CI quadrant data for the CURRENT SEASON only + # First identify the current season for this field + current_season <- CI_quadrant %>% + filter(field == field_name, Date <= as.Date(report_date)) %>% + group_by(season) %>% + summarise(season_end = max(Date), .groups = 'drop') %>% + filter(season == max(season)) %>% + pull(season) + + # Get the most recent DOY from the current season + field_age <- CI_quadrant %>% + filter(field == field_name, season == current_season) %>% + pull(DOY) %>% + max(na.rm = TRUE) + + # Filter data for this specific field + field_data <- field_details_table %>% + filter(Field == field_name) + + if (nrow(field_data) == 0) { + return(paste("**Field", field_name, "KPIs:** Data not available")) + } + + # Aggregate sub-field data for field-level summary + # For categorical data, take the most common value or highest risk level + field_summary <- field_data %>% + summarise( + field_size = sum(`Field Size (ha)`, na.rm = TRUE), + uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"), + avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)), + max_gap_score = max(`Gap Score`, na.rm = TRUE), + highest_decline_risk = case_when( + any(`Decline Risk` == "Very-high") ~ "Very-high", + any(`Decline Risk` == "High") ~ "High", + any(`Decline Risk` == "Moderate") ~ "Moderate", + any(`Decline Risk` == "Low") ~ "Low", + TRUE ~ "Unknown" + ), + highest_weed_risk = case_when( + any(`Weed Risk` == "High") ~ "High", + any(`Weed Risk` == "Moderate") ~ "Moderate", + any(`Weed Risk` == "Low") ~ "Low", + TRUE ~ "Unknown" + ), + avg_mean_ci = mean(`Mean CI`, na.rm = TRUE), + avg_cv = mean(`CV Value`, na.rm = TRUE), + .groups = 'drop' + ) + + # Apply age-based filtering to yield forecast + if (is.na(field_age) || field_age < 240) { + field_summary$avg_yield_forecast <- NA_real_ + } + + # Format the summary text + yield_text <- if (is.na(field_summary$avg_yield_forecast)) { + "Yield Forecast: NA" + } else { + paste0("Yield Forecast: ", round(field_summary$avg_yield_forecast, 1), " t/ha") + } + + kpi_text <- paste0( + "Size: ", round(field_summary$field_size, 1), " ha • Growth Uniformity: ", field_summary$uniformity_levels, + " • ", yield_text, " • Gap Score: ", round(field_summary$max_gap_score, 1), + " • Decline Risk: ", field_summary$highest_decline_risk, " • Weed Risk: ", field_summary$highest_weed_risk, + " • Mean CI: ", round(field_summary$avg_mean_ci, 2) + ) + + kpi_text <- paste0("", kpi_text, "") + + return(kpi_text) + + }, error = function(e) { + safe_log(paste("Error generating KPI summary for field", field_name, ":", e$message), "ERROR") + return(paste("**Field", field_name, "KPIs:** Error generating summary")) + }) +} +``` + +```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE} +# Set locale for consistent date formatting +Sys.setlocale("LC_TIME", "C") + +# Initialize date variables from parameters +today <- as.character(report_date) +mail_day_as_character <- as.character(mail_day) + +# Calculate report dates and weeks using ISO 8601 week numbering +report_date_obj <- as.Date(today) +current_week <- as.numeric(format(report_date_obj, "%V")) +year <- as.numeric(format(report_date_obj, "%Y")) + +# Calculate dates for weekly analysis +week_start <- report_date_obj - ((as.numeric(format(report_date_obj, "%w")) + 1) %% 7) +week_end <- week_start + 6 + +# Calculate week days (copied from 05 script for compatibility) +report_date_as_week_day <- weekdays(lubridate::ymd(today)) +days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday") + +# Calculate initial week number +week <- lubridate::week(today) +safe_log(paste("Initial week calculation:", week, "today:", today)) + +# Calculate previous dates for comparisons +today_minus_1 <- as.character(lubridate::ymd(today) - 7) +today_minus_2 <- as.character(lubridate::ymd(today) - 14) +today_minus_3 <- as.character(lubridate::ymd(today) - 21) + +# Adjust week calculation based on mail day +if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) { + safe_log("Adjusting weeks because of mail day") + week <- lubridate::week(today) + 1 + today_minus_1 <- as.character(lubridate::ymd(today)) + today_minus_2 <- as.character(lubridate::ymd(today) - 7) + today_minus_3 <- as.character(lubridate::ymd(today) - 14) +} + +# Calculate week numbers for previous weeks +week_minus_1 <- week - 1 +week_minus_2 <- week - 2 +week_minus_3 <- week - 3 + +# Format current week with leading zeros +week <- sprintf("%02d", week) + +safe_log(paste("Report week:", current_week, "Year:", year)) +safe_log(paste("Week range:", week_start, "to", week_end)) +``` + +```{r load_ci_data, message=FALSE, warning=FALSE, include=FALSE} +# Load CI quadrant data for field-level analysis +tryCatch({ + CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) + safe_log("Successfully loaded CI quadrant data") +}, error = function(e) { + stop("Error loading CI quadrant data: ", e$message) +}) + +# NOTE: Overview maps skipped for this report +# Individual field sections load their own per-field mosaics directly +``` +``` + +```{r compute_benchmarks_once, include=FALSE} +# Compute CI benchmarks once for the entire estate +benchmarks <- compute_ci_benchmarks(CI_quadrant, project_dir, c(10, 50, 90)) +if (!is.null(benchmarks)) { + safe_log("Benchmarks computed successfully for the report") +} else { + safe_log("Failed to compute benchmarks", "WARNING") +} +``` + +## Report Summary + +**Farm Location:** `r toupper(project_dir)` Estate +**Report Period:** Week `r current_week` of `r year` +**Data Source:** Planet Labs Satellite Imagery +**Analysis Type:** Chlorophyll Index (CI) Monitoring +**Report Generated on:** `r format(Sys.time(), "%B %d, %Y at %H:%M")` + +**NOTE: THIS IS A NO-TABLES VERSION FOR DIAGNOSTIC PURPOSES - MAPS AND GRAPHS ONLY** + +\newpage + +# Section 2: Field-by-Field Analysis + +## Overview of Field-Level Insights +This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions. + +**Key Elements per Field:** +- Current and historical CI maps +- Week-over-week change visualizations +- Cumulative growth trends +- Field-specific KPI summaries + +*Navigate to the following pages for individual field reports.* + +\newpage + +```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE} +# Load field boundaries from parameters +tryCatch({ + AllPivots0 <- field_boundaries_sf %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names + safe_log("Successfully loaded field boundaries") + + # Prepare merged field list for use in summaries + AllPivots_merged <- AllPivots0 %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names + dplyr::group_by(field) %>% + dplyr::summarise(.groups = 'drop') + +}, error = function(e) { + stop("Error loading field boundaries: ", e$message) +}) +``` + +```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, dpi=300, dev='png', message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'} +# Generate detailed visualizations for each field +tryCatch({ + # Merge field polygons for processing and filter out NA field names + AllPivots_merged <- AllPivots0 %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA fields + dplyr::group_by(field) %>% + dplyr::summarise(.groups = 'drop') + + # Use per-field weekly mosaic directory path from parameters_project.R + weekly_mosaic_per_field_dir <- weekly_CI_mosaic + + # Helper to get week/year from a date + get_week_year <- function(date) { + list( + week = as.numeric(format(date, "%V")), + year = as.numeric(format(date, "%G")) + ) + } + + # Get week/year for current and historical weeks (local to field section) + current_ww <- get_week_year(as.Date(today)) + minus_1_ww <- get_week_year(as.Date(today) - lubridate::weeks(1)) + minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2)) + minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3)) + + # Debug: check how many fields we're iterating + safe_log(paste("Starting visualization loop for", nrow(AllPivots_merged), "fields"), "DEBUG") + safe_log(paste("Fields to process:", paste(AllPivots_merged$field, collapse=", ")), "DEBUG") + + # Generate plots for each field + for(i in seq_along(AllPivots_merged$field)) { + field_name <- AllPivots_merged$field[i] + safe_log(paste("Processing field", i, "of", nrow(AllPivots_merged), ":", field_name), "DEBUG") + + # Skip if field_name is still NA (double check) + if(is.na(field_name)) { + safe_log(paste("Skipping field", i, "- NA name"), "DEBUG") + next + } + + tryCatch({ + # Add page break before each field (except the first one) + if(i > 1) { + cat("\\newpage\n\n") + } + + # Load per-field mosaics directly for this field + field_CI <- NULL + field_CI_m1 <- NULL + field_CI_m2 <- NULL + field_CI_m3 <- NULL + + tryCatch({ + # Load per-field mosaic for current week + per_field_path_current <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, current_ww$week, current_ww$year + ) + safe_log(paste("Looking for mosaic at:", per_field_path_current, "exists?", file.exists(per_field_path_current %||% "")), "DEBUG") + if (!is.null(per_field_path_current) && file.exists(per_field_path_current)) { + field_CI <- terra::rast(per_field_path_current)[["CI"]] + safe_log(paste("Successfully loaded field_CI for", field_name), "DEBUG") + } else { + safe_log(paste("Could not load field_CI for", field_name, "- file not found"), "DEBUG") + } + + # Load per-field mosaic for week-1 + per_field_path_m1 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_1_ww$week, minus_1_ww$year + ) + if (!is.null(per_field_path_m1) && file.exists(per_field_path_m1)) { + field_CI_m1 <- terra::rast(per_field_path_m1)[["CI"]] + } + + # Load per-field mosaic for week-2 + per_field_path_m2 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_2_ww$week, minus_2_ww$year + ) + if (!is.null(per_field_path_m2) && file.exists(per_field_path_m2)) { + field_CI_m2 <- terra::rast(per_field_path_m2)[["CI"]] + } + + # Load per-field mosaic for week-3 + per_field_path_m3 <- get_per_field_mosaic_path( + weekly_mosaic_per_field_dir, field_name, minus_3_ww$week, minus_3_ww$year + ) + if (!is.null(per_field_path_m3) && file.exists(per_field_path_m3)) { + field_CI_m3 <- terra::rast(per_field_path_m3)[["CI"]] + } + + safe_log(paste("Loaded per-field mosaics for", field_name), "DEBUG") + + }, error = function(e) { + safe_log(paste("Could not load per-field mosaics for", field_name, ":", e$message), "WARNING") + }) + + # Calculate difference rasters from per-field data (local to this field) + last_week_dif_raster_field <- NULL + three_week_dif_raster_field <- NULL + + if (!is.null(field_CI) && !is.null(field_CI_m1)) { + last_week_dif_raster_field <- field_CI - field_CI_m1 + } + if (!is.null(field_CI) && !is.null(field_CI_m3)) { + three_week_dif_raster_field <- field_CI - field_CI_m3 + } + + # Call ci_plot with field-specific rasters + ci_plot( + pivotName = field_name, + field_boundaries = AllPivots0, + current_ci = field_CI, + ci_minus_1 = field_CI_m1, + ci_minus_2 = field_CI_m2, + last_week_diff = last_week_dif_raster_field, + three_week_diff = three_week_dif_raster_field, + harvesting_data = harvesting_data, + week = week, + week_minus_1 = week_minus_1, + week_minus_2 = week_minus_2, + week_minus_3 = week_minus_3, + borders = borders, + colorblind_friendly = colorblind_friendly + ) + + cat("\n\n") + + # Special handling for ESA project field 00f25 - remove duplicate DOY values + if (project_dir == "esa" && field_name == "00F25") { + ci_quadrant_data <- CI_quadrant %>% + filter(field == "00F25") %>% + arrange(DOY) %>% + group_by(DOY) %>% + slice(1) %>% + ungroup() + } else { + ci_quadrant_data <- CI_quadrant + } + + # Call cum_ci_plot with explicit parameters + cum_ci_plot( + pivotName = field_name, + ci_quadrant_data = ci_quadrant_data, + plot_type = ci_plot_type, + facet_on = facet_by_season, + x_unit = x_axis_unit, + colorblind_friendly = colorblind_friendly, + show_benchmarks = TRUE, + estate_name = project_dir, + benchmark_percentiles = c(10, 50, 90), + benchmark_data = benchmarks + ) + + cat("\n\n") + + # Add field-specific KPI summary under the graphs + if (exists("field_details_table") && !is.null(field_details_table)) { + kpi_summary <- generate_field_kpi_summary(field_name, field_details_table, CI_quadrant) + cat(kpi_summary) + cat("\n\n") + } + + }, error = function(e) { + safe_log(paste("Error generating plots for field", field_name, ":", e$message), "ERROR") + cat("\\newpage\n\n") + cat("# Error generating plots for field ", field_name, "\n\n") + cat(e$message, "\n\n") + }) + } +}, error = function(e) { + safe_log(paste("Error in field visualization section:", e$message), "ERROR") + cat("Error generating field plots. See log for details.\n\n") +}) +``` + +\newpage + +# END OF NO-TABLES DIAGNOSTIC REPORT + +This diagnostic report contains only maps and graphs to help identify if the visualization system is working correctly. + +*Generated for diagnostic purposes* diff --git a/r_app/experiments/ci_extraction_and_yield_prediction.R b/r_app/experiments/ci_extraction_and_yield_prediction.R index 83f233b..3334498 100644 --- a/r_app/experiments/ci_extraction_and_yield_prediction.R +++ b/r_app/experiments/ci_extraction_and_yield_prediction.R @@ -156,9 +156,10 @@ calculate_ci <- function(raster_obj) { red_band <- raster_obj[[3]] nir_band <- raster_obj[[4]] - # CI formula: (NIR / Red) - 1 - # This highlights chlorophyll content in vegetation - ci_raster <- (nir_band / red_band) - 1 + # CI formula: (NIR / Green) - 1, NOT (NIR / Red) - 1 + # *** CRITICAL: Use GREEN band for Chlorophyll Index, NOT RED *** + # GREEN band is essential for proper chlorophyll-sensitive calculation + ci_raster <- (nir_band / green_band) - 1 # Filter extreme values that may result from division operations ci_raster[ci_raster > 10] <- 10 # Cap max value diff --git a/r_app/experiments/crop_messaging/young_field_analysis.R b/r_app/experiments/crop_messaging/young_field_analysis.R index e3ef279..5319a37 100644 --- a/r_app/experiments/crop_messaging/young_field_analysis.R +++ b/r_app/experiments/crop_messaging/young_field_analysis.R @@ -68,8 +68,9 @@ calculate_enhanced_indices <- function(red, green, blue, nir) { grvi <- green / red names(grvi) <- "GRVI" - # 6. Chlorophyll Index (current CI - for comparison) - ci <- nir / red - 1 + # 6. Chlorophyll Index (CI = NIR / Green - 1, NOT NIR/Red) + # *** CRITICAL: Correct formula uses GREEN band, not RED *** + ci <- nir / green - 1 names(ci) <- "CI" return(list( diff --git a/r_app/kpi_utils.R b/r_app/kpi_utils.R index 4519df7..dccae1b 100644 --- a/r_app/kpi_utils.R +++ b/r_app/kpi_utils.R @@ -62,25 +62,94 @@ calculate_week_numbers <- function(report_date = Sys.Date()) { #' @param year Year #' @param mosaic_dir Directory containing weekly mosaics #' @return Terra raster with CI band, or NULL if file not found +# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures) +load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) { + # Check if this is per-field loading mode + is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field") + + if (is_per_field) { + # Per-field architecture: load this specific field's mosaic + per_field_dir <- attr(ci_raster_or_obj, "per_field_dir") + week_file <- attr(ci_raster_or_obj, "week_file") + field_mosaic_path <- file.path(per_field_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_mosaic <- terra::rast(field_mosaic_path) + # Extract CI band (5th band) if multi-band, otherwise use as-is + if (terra::nlyr(field_mosaic) >= 5) { + return(field_mosaic[[5]]) + } else { + return(field_mosaic[[1]]) + } + }, error = function(e) { + safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING") + return(NULL) + }) + } else { + safe_log(paste("Per-field mosaic not found for", field_name), "WARNING") + return(NULL) + } + } else { + # Single-file architecture: crop from loaded raster + if (!is.null(field_vect)) { + return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE)) + } else { + return(ci_raster_or_obj) + } + } +} + load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { week_file <- sprintf("week_%02d_%d.tif", week_num, year) week_path <- file.path(mosaic_dir, week_file) - if (!file.exists(week_path)) { - safe_log(paste("Weekly mosaic not found:", week_path), "WARNING") - return(NULL) + # FIRST: Try to load single-file mosaic (legacy approach) + if (file.exists(week_path)) { + tryCatch({ + mosaic_raster <- terra::rast(week_path) + ci_raster <- mosaic_raster[[5]] # CI is the 5th band + names(ci_raster) <- "CI" + safe_log(paste("Loaded weekly mosaic (single-file):", week_file)) + return(ci_raster) + }, error = function(e) { + safe_log(paste("Error loading mosaic:", e$message), "ERROR") + return(NULL) + }) } - tryCatch({ - mosaic_raster <- terra::rast(week_path) - ci_raster <- mosaic_raster[[5]] # CI is the 5th band - names(ci_raster) <- "CI" - safe_log(paste("Loaded weekly mosaic:", week_file)) - return(ci_raster) - }, error = function(e) { - safe_log(paste("Error loading mosaic:", e$message), "ERROR") - return(NULL) - }) + # SECOND: Per-field architecture - store mosaic_dir path for later per-field loading + # Don't try to merge - just return the directory path so field-level functions can load per-field + if (dir.exists(mosaic_dir)) { + field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + # Check if any field has this week's mosaic + found_any <- FALSE + for (field in field_dirs) { + field_mosaic_path <- file.path(mosaic_dir, field, week_file) + if (file.exists(field_mosaic_path)) { + found_any <- TRUE + break + } + } + + if (found_any) { + safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year, + "- will load per-field on demand")) + # Return a special object that indicates per-field loading is needed + # Store the mosaic_dir path in the raster's metadata + dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA) + attr(dummy_raster, "per_field_dir") <- mosaic_dir + attr(dummy_raster, "week_file") <- week_file + attr(dummy_raster, "is_per_field") <- TRUE + return(dummy_raster) + } + } + + # If we get here, no mosaic found + safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING") + return(NULL) } # Function to prepare predictions with consistent naming and formatting @@ -128,12 +197,16 @@ calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) { # Extract field boundary field_vect <- field_boundaries_vect[i] - # crop ci_raster with field_vect and use that for ci_values - cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE) + # Load appropriate CI raster using helper function + cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect) # Extract CI values for this field using helper function - field_values <- extract_ci_values(cropped_raster, field_vect) - valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] + if (!is.null(cropped_raster)) { + field_values <- extract_ci_values(cropped_raster, field_vect) + valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] + } else { + valid_values <- c() + } # If all valid values are 0 (cloud), fill with NA row if (length(valid_values) == 0 || all(valid_values == 0)) { @@ -271,9 +344,18 @@ calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) # Extract field boundary field_vect <- field_boundaries_vect[i] - # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) - previous_values <- extract_ci_values(previous_ci, field_vect) + # Load appropriate CI rasters using helper function + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & @@ -554,8 +636,18 @@ calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundari sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] - # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) + # Load appropriate CI rasters using helper function + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } previous_values <- extract_ci_values(previous_ci, field_vect) # Clean values @@ -715,8 +807,17 @@ calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundarie } # Extract CI values for both weeks (using helper to get CI band only) - current_values <- extract_ci_values(current_ci, field_vect) - previous_values <- extract_ci_values(previous_ci, field_vect) + current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) + previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) + + # Extract CI values for both weeks + if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { + current_values <- extract_ci_values(current_field_ci, field_vect) + previous_values <- extract_ci_values(previous_field_ci, field_vect) + } else { + current_values <- c() + previous_values <- c() + } # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & @@ -797,8 +898,15 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] + # Load appropriate CI raster using helper function + field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect) + # Extract CI values using helper function - ci_values <- extract_ci_values(ci_raster, field_vect) + if (!is.null(field_ci)) { + ci_values <- extract_ci_values(field_ci, field_vect) + } else { + ci_values <- c() + } valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] if (length(valid_values) > 1) { diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 3843eb5..bd7c9fa 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -210,24 +210,18 @@ detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_ # 4. Define project directory structure # ----------------------------------- -setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") { +setup_project_directories <- function(project_dir, data_source = "merged_tif") { # Base directories laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir) - # Determine which TIF source folder to use based on data_source parameter - # Default is merged_tif_8b for newer data with cloud masking (8-band + UDM) - # Alternative: merged_tif for 4-band legacy data - merged_tif_folder <- here(laravel_storage_dir, data_source) + # Use standard merged_tif directory for all projects + merged_tif_folder <- here(laravel_storage_dir, "merged_tif") - # Detect tile mode based on metadata from script 10 or file patterns - merged_final_dir <- here(laravel_storage_dir, "merged_final_tif") + # Detect tile mode based on file patterns daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split") - tile_detection <- detect_tile_structure_from_merged_final( - merged_final_tif_dir = merged_final_dir, - daily_tiles_split_dir = daily_tiles_split_dir - ) - use_tile_mosaic <- tile_detection$has_tiles + # Simplified: only check daily_tiles_split for per-field structure + use_tile_mosaic <- dir.exists(daily_tiles_split_dir) && length(list.dirs(daily_tiles_split_dir, full.names = FALSE, recursive = FALSE)) > 0 # Main subdirectories dirs <- list( @@ -235,14 +229,12 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b" logs = here(laravel_storage_dir, "logs"), data = here(laravel_storage_dir, "Data"), tif = list( - merged = merged_tif_folder, # Use data_source parameter to select folder - final = merged_final_dir + merged = merged_tif_folder ), # New per-field directory structure (Script 10 output) field_tiles = here(laravel_storage_dir, "field_tiles"), field_tiles_ci = here(laravel_storage_dir, "field_tiles_CI"), weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"), - weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"), extracted_ci = list( base = here(laravel_storage_dir, "Data", "extracted_ci"), daily = here(laravel_storage_dir, "Data", "extracted_ci", "daily_vals"), @@ -275,17 +267,9 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif_8b" daily_vals_per_field_dir = dirs$extracted_ci$daily_per_field, # Field boundaries path for all scripts field_boundaries_path = here(laravel_storage_dir, "Data", "pivot.geojson"), - weekly_CI_mosaic = if (use_tile_mosaic) dirs$weekly_tile_max else dirs$weekly_mosaic, # SMART: Route based on tile detection + weekly_CI_mosaic = dirs$weekly_mosaic, # Per-field weekly mosaics (per-field architecture) daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction - weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output) use_tile_mosaic = use_tile_mosaic, # Flag indicating if tiles are used for this project - tile_detection_info = list( - has_tiles = tile_detection$has_tiles, - detected_source = tile_detection$source, - detected_count = tile_detection$total_files, - grid_size = tile_detection$grid_size %||% "unknown", - sample_tiles = head(tile_detection$detected_tiles, 3) - ), harvest_dir = dirs$harvest, extracted_CI_dir = dirs$extracted_ci$base )) @@ -536,42 +520,21 @@ format_week_label <- function(date, separator = "_") { sprintf("week%02d%s%d", wwy$week, separator, wwy$year) } -# Auto-detect mosaic mode (tiled vs single-file) -# Returns: "tiled", "single-file", or "unknown" +# Auto-detect mosaic mode +# For per-field architecture, always returns "single-file" (weekly_mosaic/{FIELD}/week_*.tif) detect_mosaic_mode <- function(project_dir) { - # Check for tile-based approach: weekly_tile_max/{grid_size}/week_*.tif - weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") - if (dir.exists(weekly_tile_max)) { - subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - return("tiled") - } - } - - # Check for single-file approach: weekly_mosaic/week_*.tif + # Per-field architecture uses single-file mosaics organized per-field weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") if (dir.exists(weekly_mosaic)) { - files <- list.files(weekly_mosaic, pattern = "^week_.*\\.tif$") - if (length(files) > 0) { - return("single-file") - } + return("single-file") # Per-field structure } - return("unknown") } # Auto-detect grid size from tile directory structure -# Returns: e.g., "5x5", "10x10", or "unknown" +# For per-field architecture, returns "unknown" since grid-based organization is legacy detect_grid_size <- function(project_dir) { - weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") - if (dir.exists(weekly_tile_max)) { - subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - return(grid_patterns[1]) # Return first match (usually only one) - } - } + # Per-field architecture doesn't use grid-based organization anymore return("unknown") } @@ -582,20 +545,8 @@ get_project_storage_path <- function(project_dir, subdir = NULL) { } get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { - if (mosaic_mode == "auto") { - mosaic_mode <- detect_mosaic_mode(project_dir) - } - - if (mosaic_mode == "tiled") { - grid_size <- detect_grid_size(project_dir) - if (grid_size != "unknown") { - get_project_storage_path(project_dir, file.path("weekly_tile_max", grid_size)) - } else { - get_project_storage_path(project_dir, "weekly_tile_max/5x5") # Fallback default - } - } else { - get_project_storage_path(project_dir, "weekly_mosaic") - } + # Per-field architecture always uses weekly_mosaic (single-file, per-field organization) + get_project_storage_path(project_dir, "weekly_mosaic") } get_kpi_dir <- function(project_dir, client_type) { @@ -715,22 +666,8 @@ RSCRIPT_PATH <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" # Detect data source (merged_tif vs merged_tif_8b) based on availability # Returns the first available source; defaults to merged_tif_8b if neither exists detect_data_source <- function(project_dir) { - storage_dir <- get_project_storage_path(project_dir) - - # Preferred order: check merged_tif first, fall back to merged_tif_8b - for (source in c("merged_tif", "merged_tif_8b")) { - source_dir <- file.path(storage_dir, source) - if (dir.exists(source_dir)) { - tifs <- list.files(source_dir, pattern = "\\.tif$") - if (length(tifs) > 0) { - smartcane_log(sprintf("Detected data source: %s (%d TIF files)", source, length(tifs))) - return(source) - } - } - } - - smartcane_warn(sprintf("No data source found for %s - defaulting to merged_tif_8b", project_dir)) - return("merged_tif_8b") + # Data source is always merged_tif for consistency + return("merged_tif") } # Check KPI completeness for a reporting period @@ -785,7 +722,7 @@ check_kpi_completeness <- function(project_dir, client_type, end_date, reporting # 9. Initialize the project # ---------------------- # Export project directories and settings -initialize_project <- function(project_dir, data_source = "merged_tif_8b") { +initialize_project <- function(project_dir, data_source = "merged_tif") { # Set up directory structure, passing data_source to select TIF folder dirs <- setup_project_directories(project_dir, data_source = data_source) @@ -816,7 +753,7 @@ if (exists("project_dir")) { log_message(paste("Initializing project with directory:", project_dir)) # Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default - data_src <- if (exists("data_source")) data_source else "merged_tif_8b" + data_src <- if (exists("data_source")) data_source else "merged_tif" log_message(paste("Using data source directory:", data_src)) project_config <- initialize_project(project_dir, data_source = data_src) diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 8cabcbf..822293f 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -244,14 +244,14 @@ ci_plot <- function(pivotName, # Filter for the specific pivot AllPivots2 <- field_boundaries %>% dplyr::filter(field %in% pivotName) - # Create crop masks for different timepoints using terra functions - singlePivot <- terra::crop(current_ci, pivotShape) %>% terra::mask(., pivotShape) - singlePivot_m1 <- terra::crop(ci_minus_1, pivotShape) %>% terra::mask(., pivotShape) - singlePivot_m2 <- terra::crop(ci_minus_2, pivotShape) %>% terra::mask(., pivotShape) + # Per-field mosaics are already cropped to field boundaries, so use directly without cropping + singlePivot <- current_ci + singlePivot_m1 <- ci_minus_1 + singlePivot_m2 <- ci_minus_2 - # Create difference maps - abs_CI_last_week <- terra::crop(last_week_diff, pivotShape) %>% terra::mask(., pivotShape) - abs_CI_three_week <- terra::crop(three_week_diff, pivotShape) %>% terra::mask(., pivotShape) + # Use difference maps directly (already field-specific) + abs_CI_last_week <- last_week_diff + abs_CI_three_week <- three_week_diff # Get planting date planting_date <- harvesting_data %>% @@ -822,3 +822,144 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c }) } +#' Aggregate per-field weekly mosaics into a farm-level mosaic +#' +#' Reads all per-field mosaic TIFs for a given week and merges them into a single farm-level mosaic. +#' This is used for overview maps in the report (Script 90). +#' +#' Per-field mosaics already have proper geospatial metadata (CRS, geotransform) from Script 10, +#' so terra can align them automatically without needing field boundaries or extent information. +#' +#' @param weekly_mosaic_dir Path to weekly_mosaic directory (e.g., "laravel_app/storage/app/{project}/weekly_mosaic") +#' @param target_week ISO week number (e.g., 52) +#' @param target_year ISO year (e.g., 2025) +#' @return SpatRaster object (5-band: R,G,B,NIR,CI) or NULL if no fields found +#' +#' @details +#' Per-field mosaics are located at: weekly_mosaic/{FIELD}/week_WW_YYYY.tif +#' This function: +#' 1. Finds all per-field subdirectories +#' 2. Loads each field's weekly mosaic +#' 3. Merges to a single raster using terra::mosaic() (alignment handled automatically by metadata) +#' 4. Returns combined 5-band raster for visualization +#' +aggregate_per_field_mosaics_to_farm_level <- function( + weekly_mosaic_dir, + target_week, + target_year +) { + + tryCatch({ + + # Validate directory exists + if (!dir.exists(weekly_mosaic_dir)) { + safe_log(paste("Weekly mosaic directory not found:", weekly_mosaic_dir), "WARNING") + return(NULL) + } + + # Find all per-field subdirectories (non-TIF files at top level) + all_items <- list.files(weekly_mosaic_dir, full.names = FALSE) + field_dirs <- all_items[ + !grepl("\\.tif$", all_items, ignore.case = TRUE) & + dir.exists(file.path(weekly_mosaic_dir, all_items)) + ] + + if (length(field_dirs) == 0) { + safe_log(paste("No per-field directories found in", weekly_mosaic_dir), "WARNING") + return(NULL) + } + + safe_log(paste("Found", length(field_dirs), "field directories. Aggregating week", + sprintf("%02d", target_week), target_year), "INFO") + + # Collect rasters from each field + raster_list <- list() + + for (field_dir in field_dirs) { + field_mosaic_path <- file.path( + weekly_mosaic_dir, + field_dir, + paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif") + ) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + r <- terra::rast(field_mosaic_path) + raster_list[[field_dir]] <- r + safe_log(paste("Loaded mosaic for field:", field_dir), "DEBUG") + }, error = function(e) { + safe_log(paste("Could not load mosaic for field", field_dir, ":", e$message), "WARNING") + }) + } + } + + if (length(raster_list) == 0) { + safe_log(paste("No field mosaics found for week", sprintf("%02d", target_week), target_year), "WARNING") + return(NULL) + } + + safe_log(paste("Successfully loaded mosaics for", length(raster_list), "fields"), "INFO") + + # Create a SpatRasterCollection and mosaic using correct terra syntax + tryCatch({ + rsrc <- terra::sprc(raster_list) + safe_log(paste("Created SpatRasterCollection with", length(raster_list), "rasters"), "DEBUG") + + # Mosaic the rasters - this merges them into a single continuous raster + farm_mosaic <- terra::mosaic(rsrc) + + safe_log(paste("Aggregated", length(raster_list), "per-field mosaics into farm-level mosaic"), "INFO") + + # Verify mosaic was created successfully + if (is.null(farm_mosaic)) { + stop("mosaic() returned NULL") + } + + return(farm_mosaic) + + }, error = function(e) { + safe_log(paste("Error during mosaic creation:", e$message), "ERROR") + return(NULL) + }) + + }, error = function(e) { + safe_log(paste("Error aggregating per-field mosaics:", e$message), "ERROR") + return(NULL) + }) +} + + +#' Get per-field mosaic path (new per-field architecture) +#' +#' Returns the path to a per-field weekly mosaic for direct visualization. +#' Replaces the old cropping workflow: now we load the field's own mosaic instead of cropping farm-level. +#' +#' @param weekly_mosaic_dir Path to weekly_mosaic directory +#' @param field_name Name of the field +#' @param target_week ISO week number +#' @param target_year ISO year +#' @return Path to field-specific mosaic TIF, or NULL if not found +#' +get_per_field_mosaic_path <- function( + weekly_mosaic_dir, + field_name, + target_week, + target_year +) { + + path <- file.path( + weekly_mosaic_dir, + field_name, + paste0("week_", sprintf("%02d", target_week), "_", target_year, ".tif") + ) + + if (file.exists(path)) { + return(path) + } else { + safe_log(paste("Per-field mosaic not found for field", field_name, + "week", sprintf("%02d", target_week), target_year), "WARNING") + return(NULL) + } +} + + diff --git a/r_app/run_full_pipeline.R b/r_app/run_full_pipeline.R index 30f1819..4a06d14 100644 --- a/r_app/run_full_pipeline.R +++ b/r_app/run_full_pipeline.R @@ -32,32 +32,25 @@ # *** EDIT THESE VARIABLES *** end_date <- as.Date("2026-01-07") # or specify: as.Date("2026-01-27") , Sys.Date() project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba" -data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" +data_source <- "merged_tif" # Standard data source directory force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist # *************************** +# Define Rscript path for running external R scripts via system() +RSCRIPT_PATH <- file.path("C:", "Program Files", "R", "R-4.4.3", "bin", "x64", "Rscript.exe") + # Load client type mapping from parameters_project.R source("r_app/parameters_project.R") client_type <- get_client_type(project_dir) cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type)) -# ============================================================================== -# DETECT WHICH DATA SOURCE IS AVAILABLE (merged_tif vs merged_tif_8b) -# ============================================================================== -# Check which merged_tif folder actually has files for this project -# Uses centralized detection function from parameters_project.R -# NOTE: Old code below commented out - now handled by detect_data_source() -# laravel_storage_dir <- file.path("laravel_app", "storage", "app", project_dir) -# merged_tif_path <- file.path(laravel_storage_dir, "merged_tif") -data_source_used <- detect_data_source(project_dir) - # ============================================================================== # DETERMINE REPORTING WINDOW (auto-calculated based on KPI requirements) # ============================================================================== # Script 80 (KPIs) needs N weeks of historical data for trend analysis and reporting # We calculate this automatically based on client type -reporting_weeks_needed <- 4 # Default: KPIs need current week + 3 weeks history for trends -offset <- (reporting_weeks_needed - 1) * 7 # Convert weeks to days +reporting_weeks_needed <- 1 # Default: KPIs need current week of data for trends +offset <- reporting_weeks_needed * 7 # Convert weeks to days (minimum 7 days for 1 week) cat(sprintf("\n[INFO] Reporting window: %d weeks (%d days of data)\n", reporting_weeks_needed, offset)) wwy_current <- get_iso_week_year(end_date) @@ -99,7 +92,8 @@ for (i in 1:nrow(weeks_needed)) { check_date <- weeks_needed[i, "date"] # Pattern must be flexible to match both: - # - Single-file: week_51_2025.tif + # - Single-file: week_51_2025.tif (top-level) + # - Single-file per-field: week_51_2025.tif (in {FIELD}/ subdirectories) # - Tiled: week_51_2025_01.tif, week_51_2025_02.tif, etc. week_pattern_check <- sprintf("week_%02d_%d", week_num, year_num) files_this_week <- c() @@ -107,12 +101,15 @@ for (i in 1:nrow(weeks_needed)) { if (mosaic_mode == "tiled") { mosaic_dir_check <- get_mosaic_dir(project_dir, mosaic_mode = "tiled") if (dir.exists(mosaic_dir_check)) { - files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check) + # NEW: Support per-field architecture - search recursively for mosaics in field subdirectories + files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check, recursive = TRUE, full.names = FALSE) } } else if (mosaic_mode == "single-file") { mosaic_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") if (dir.exists(mosaic_dir_check)) { - files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check) + # NEW: Support per-field architecture - search recursively for mosaics in field subdirectories + # Check both top-level (legacy) and field subdirectories (per-field architecture) + files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check, recursive = TRUE, full.names = FALSE) } } @@ -302,19 +299,31 @@ cat("\n========== DOWNLOADING PLANET IMAGES (MISSING DATES ONLY) ==========\n") tryCatch( { # Setup paths + # NOTE: All downloads go to merged_tif/ regardless of project + # (data_source variable is used later by Script 20 for reading, but downloads always go to merged_tif) base_path <- file.path("laravel_app", "storage", "app", project_dir) - merged_tifs_dir <- file.path(base_path, data_source) + merged_tifs_dir <- file.path(base_path, "merged_tif") # Always check merged_tif for downloads + + cat(sprintf("[DEBUG] Checking for existing files in: %s\n", merged_tifs_dir)) + cat(sprintf("[DEBUG] Directory exists: %s\n", dir.exists(merged_tifs_dir))) - # Get existing dates from raw TIFFs + # Get existing dates from raw TIFFs in merged_tif/ existing_tiff_files <- list.files(merged_tifs_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.tif$") existing_tiff_dates <- sub("\\.tif$", "", existing_tiff_files) + + cat(sprintf("[DEBUG] Found %d existing TIFF files\n", length(existing_tiff_files))) + if (length(existing_tiff_files) > 0) { + cat(sprintf("[DEBUG] Sample files: %s\n", paste(head(existing_tiff_files, 3), collapse=", "))) + } # Get existing dates from tiles (better indicator of completion for tiled projects) existing_tile_dates <- tiles_dates - # For single-file projects, use raw TIFF files as the indicator instead - # This prevents re-downloading data that already exists - if (mosaic_mode == "single-file" && length(existing_tiff_dates) > 0) { + # CRITICAL FIX: Always use TIFF dates for checking existing files + # This is the source of truth - if merged_tif/ has a file, don't re-download it + # We don't download again if the file exists, regardless of whether tiles have been created yet + if (length(existing_tiff_dates) > 0) { + cat(sprintf("[DEBUG] Using TIFF dates for existence check (found %d existing files)\n", length(existing_tiff_dates))) existing_tile_dates <- existing_tiff_dates } @@ -375,56 +384,43 @@ tryCatch( ) # ============================================================================== -# SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs +# SCRIPT 10: CREATE PER-FIELD TIFFs # ============================================================================== if (pipeline_success && !skip_10) { - cat("\n========== RUNNING SCRIPT 10: CREATE MASTER GRID AND SPLIT TIFFs ==========\n") + cat("\n========== RUNNING SCRIPT 10: CREATE PER-FIELD TIFFs ==========\n") tryCatch( { - # CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.) - saved_end_date <- end_date - saved_offset <- offset # Use FULL offset for tiling (not dynamic_offset) - saved_project_dir <- project_dir - saved_data_source <- data_source + # Run Script 10 via system() - NEW per-field version + # Arguments: project_dir + cmd <- sprintf( + '"%s" --vanilla r_app/10_create_per_field_tiffs.R "%s"', + RSCRIPT_PATH, + project_dir + ) + result <- system(cmd) - # Set environment variables for the script (Script 10 uses these for filtering) - assign("PROJECT", project_dir, envir = .GlobalEnv) - assign("end_date", end_date, envir = .GlobalEnv) - assign("offset", offset, envir = .GlobalEnv) # Full reporting window - - # Suppress verbose per-date output, show only summary - sink(nullfile()) - source("r_app/10_create_master_grid_and_split_tiffs.R") - sink() - - # CRITICAL: Restore global variables after sourcing Script 10 - end_date <- saved_end_date - offset <- saved_offset - project_dir <- saved_project_dir - data_source <- saved_data_source - - # Verify output - auto-detect grid size - grid_size <- detect_grid_size(project_dir) - tiles_dir <- if (grid_size != "unknown") { - file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", grid_size) - } else { - file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split", "5x5") + if (result != 0) { + stop("Script 10 exited with error code:", result) } - if (dir.exists(tiles_dir)) { - subdirs <- list.dirs(tiles_dir, full.names = FALSE, recursive = FALSE) - cat(sprintf("✓ Script 10 completed - created tiles for %d dates\n", length(subdirs))) + + # Verify output - check per-field structure + field_tiles_dir <- file.path("laravel_app", "storage", "app", project_dir, "field_tiles") + if (dir.exists(field_tiles_dir)) { + fields <- list.dirs(field_tiles_dir, full.names = FALSE, recursive = FALSE) + fields <- fields[fields != ""] + total_files <- sum(sapply(file.path(field_tiles_dir, fields), function(f) length(list.files(f, pattern = "\\.tif$")))) + cat(sprintf("✓ Script 10 completed - created per-field TIFFs (%d fields, %d files)\n", length(fields), total_files)) } else { cat("✓ Script 10 completed\n") } }, error = function(e) { - sink() cat("✗ Error in Script 10:", e$message, "\n") pipeline_success <<- FALSE } ) } else if (skip_10) { - cat("\n========== SKIPPING SCRIPT 10 (tiles already exist) ==========\n") + cat("\n========== SKIPPING SCRIPT 10 (per-field TIFFs already exist) ==========\n") } # ============================================================================== @@ -435,12 +431,12 @@ if (pipeline_success && !skip_20) { tryCatch( { # Run Script 20 via system() to pass command-line args just like from terminal - # Arguments: end_date offset project_dir data_source + # Arguments: project_dir end_date offset # Use FULL offset so CI extraction covers entire reporting window (not just new data) cmd <- sprintf( - '"%s" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"', + '"%s" --vanilla r_app/20_ci_extraction_per_field.R "%s" "%s" %d', RSCRIPT_PATH, - format(end_date, "%Y-%m-%d"), offset, project_dir, data_source + project_dir, format(end_date, "%Y-%m-%d"), offset ) result <- system(cmd) @@ -507,12 +503,12 @@ if (pipeline_success && !skip_30) { tryCatch( { # Run Script 30 via system() to pass command-line args just like from terminal - # Script 30 expects: project_dir data_source as arguments - # Pass the same data_source that Script 20 is using + # Script 30 expects: project_dir only + # Per-field version reads CI data from Script 20 per-field output location cmd <- sprintf( - '"%s" --vanilla r_app/30_interpolate_growth_model.R "%s" "%s"', + '"%s" --vanilla r_app/30_interpolate_growth_model.R "%s"', RSCRIPT_PATH, - project_dir, data_source_used + project_dir ) result <- system(cmd) @@ -601,11 +597,11 @@ if (pipeline_success && !skip_40) { { # Run Script 40 with offset=7 (one week only) for this specific week # The end_date is the last day of the week, and offset=7 covers the full 7-day week - # IMPORTANT: Pass data_source so Script 40 uses the correct folder (not auto-detect which can be wrong) + # Arguments: end_date offset project_dir cmd <- sprintf( - '"%s" --vanilla r_app/40_mosaic_creation.R "%s" 7 "%s" "" "%s"', + '"%s" --vanilla r_app/40_mosaic_creation_per_field.R "%s" 7 "%s"', RSCRIPT_PATH, - format(week_end_date, "%Y-%m-%d"), project_dir, data_source + format(week_end_date, "%Y-%m-%d"), project_dir ) result <- system(cmd) @@ -626,7 +622,8 @@ if (pipeline_success && !skip_40) { mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") if (dir.exists(mosaic_dir)) { week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) - mosaic_files <- list.files(mosaic_dir, pattern = week_pattern) + # NEW: Support per-field architecture - search recursively for mosaics in field subdirectories + mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE) mosaic_created <- length(mosaic_files) > 0 } } @@ -743,7 +740,8 @@ if (dir.exists(kpi_dir)) { # Check for any KPI file from that week week_pattern <- sprintf("week%02d_%d", week_num, year_num) - kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern) + # NEW: Support per-field architecture - search recursively for KPI files in field subdirectories + kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE) if (length(kpi_files_this_week) == 0) { kpis_complete <- FALSE