From f94a6317bda0621a8e61a15e161011db27bad013 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Tue, 3 Feb 2026 16:41:04 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20Integrate=202=CF=83=20gap=20filling=20K?= =?UTF-8?q?PI=20into=20weekly=20field=20analysis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Changed gap calculation from Q25 to 2σ below median method (kpi_utils.R) - Integrated gap filling into script 80 with tile-based processing - Added Gap_score column to field analysis output (Excel/CSV/RDS) - Fixed memory issues by processing 25 tiles individually instead of merging - Fixed Field_id matching to ensure gap scores populate correctly Gap scores now calculate for all 1185 fields with range 0-11.25% Works with tile-based mosaics (Angata 5x5 grid) without memory errors --- r_app/80_calculate_kpis.R | 137 +++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 3 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index ed2c330..10b7c86 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -134,6 +134,12 @@ tryCatch({ stop("Error loading 80_report_building_utils.R: ", e$message) }) +tryCatch({ + source(here("r_app", "kpi_utils.R")) +}, error = function(e) { + stop("Error loading kpi_utils.R: ", e$message) +}) + # ============================================================================ # PHASE AND STATUS TRIGGER DEFINITIONS # ============================================================================ @@ -426,7 +432,115 @@ main <- function() { }) # ============================================================================ - # Build final output dataframe with all 21 columns + # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R) + # ============================================================================ + + message("\nCalculating gap filling scores (2σ method)...") + + # Try single merged mosaic first, then fall back to merging tiles + week_mosaic_file <- file.path(mosaic_dir, sprintf("week_%02d_%d.tif", current_week, year)) + + gap_scores_df <- NULL + + if (file.exists(week_mosaic_file)) { + # Single merged mosaic exists - use it directly + tryCatch({ + current_week_raster <- terra::rast(week_mosaic_file) + current_ci_band <- current_week_raster[[5]] # CI is the 5th band + names(current_ci_band) <- "CI" + + message(paste(" Loaded single mosaic:", week_mosaic_file)) + + # Calculate gap scores for all fields + gap_result <- calculate_gap_filling_kpi(current_ci_band, field_boundaries_sf) + + # Extract field-level results (use field column directly to match current_stats Field_id) + gap_scores_df <- gap_result$field_results %>% + mutate(Field_id = field) %>% + select(Field_id, gap_score) + + message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) + message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) + + }, error = function(e) { + message(paste(" WARNING: Could not calculate gap scores from single mosaic:", e$message)) + message(" Gap scores will be set to NA") + gap_scores_df <- NULL + }) + + } else { + # Single mosaic doesn't exist - check for tiles and process per-tile + message(" Single mosaic not found. Checking for tiles...") + + # List all tiles for this week (e.g., week_04_2026_01.tif through week_04_2026_25.tif) + tile_pattern <- sprintf("week_%02d_%d_\\d{2}\\.tif$", current_week, year) + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + message(sprintf(" WARNING: No tiles found matching pattern: %s in %s", tile_pattern, mosaic_dir)) + message(" Gap scores will be set to NA") + + } else { + tryCatch({ + message(sprintf(" Found %d tiles. Processing per-tile (memory efficient)...", length(tile_files))) + + # Process each tile separately and accumulate results + all_tile_results <- list() + + for (i in seq_along(tile_files)) { + tile_file <- tile_files[i] + + # Load tile raster + tile_raster <- terra::rast(tile_file) + tile_ci_band <- tile_raster[[5]] + names(tile_ci_band) <- "CI" + + # Calculate gap scores for fields in this tile + tile_gap_result <- calculate_gap_filling_kpi(tile_ci_band, field_boundaries_sf) + + # Store results (only keep fields with non-NA scores, use field directly to match current_stats) + if (!is.null(tile_gap_result$field_results) && nrow(tile_gap_result$field_results) > 0) { + tile_results_clean <- tile_gap_result$field_results %>% + mutate(Field_id = field) %>% + select(Field_id, gap_score) %>% + filter(!is.na(gap_score)) + + if (nrow(tile_results_clean) > 0) { + all_tile_results[[i]] <- tile_results_clean + } + } + + # Clear memory + rm(tile_raster, tile_ci_band, tile_gap_result) + gc(verbose = FALSE) + } + + # Combine all tile results + if (length(all_tile_results) > 0) { + gap_scores_df <- bind_rows(all_tile_results) + + # If a field appears in multiple tiles, take the maximum gap score + gap_scores_df <- gap_scores_df %>% + group_by(Field_id) %>% + summarise(gap_score = max(gap_score, na.rm = TRUE), .groups = "drop") + + message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields across", length(all_tile_results), "tiles")) + message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) + } else { + message(" WARNING: No gap scores calculated from any tiles") + gap_scores_df <- NULL + } + + }, error = function(e) { + message(paste(" WARNING: Could not process tiles or calculate gap scores:", e$message)) + message(" Gap scores will be set to NA") + gap_scores_df <- NULL + }) + } + } + + # ============================================================================ + # Build final output dataframe with all 22 columns (including Gap_score) # ============================================================================ message("\nBuilding final field analysis output...") @@ -591,6 +705,23 @@ main <- function() { else if (pct >= 10) return("10-20%") else return("0-10%") }), + # Column 22: Gap_score (2σ below median - from kpi_utils.R) + Gap_score = { + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + # Debug: Print first few gap scores + message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)", + nrow(gap_scores_df), + paste(head(gap_scores_df$gap_score, 3), collapse=", "))) + message(sprintf(" First 3 Field_ids in gap_scores_df: %s", + paste(head(gap_scores_df$Field_id, 3), collapse=", "))) + message(sprintf(" First 3 Field_ids in current_stats: %s", + paste(head(current_stats$Field_id, 3), collapse=", "))) + + gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] + } else { + rep(NA_real_, nrow(current_stats)) + } + } ) %>% select( all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", @@ -598,10 +729,10 @@ main <- function() { "Germination_progress", "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", - "Imminent_prob", "Cloud_pct_clear", "Cloud_category")) + "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) ) - message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns")) + message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)")) export_paths <- export_field_analysis_excel( field_analysis_df,