From f94a6317bda0621a8e61a15e161011db27bad013 Mon Sep 17 00:00:00 2001
From: DimitraVeropoulou <dimitra_ver@yahoo.com>
Date: Tue, 3 Feb 2026 16:41:04 +0100
Subject: [PATCH] =?UTF-8?q?feat:=20Integrate=202=CF=83=20gap=20filling=20K?=
 =?UTF-8?q?PI=20into=20weekly=20field=20analysis?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Changed gap calculation from Q25 to 2σ below median method (kpi_utils.R)
- Integrated gap filling into script 80 with tile-based processing
- Added Gap_score column to field analysis output (Excel/CSV/RDS)
- Fixed memory issues by processing 25 tiles individually instead of merging
- Fixed Field_id matching to ensure gap scores populate correctly

Gap scores now calculate for all 1185 fields with range 0-11.25%
Works with tile-based mosaics (Angata 5x5 grid) without memory errors
---
 r_app/80_calculate_kpis.R | 137 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 134 insertions(+), 3 deletions(-)

diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R
index ed2c330..10b7c86 100644
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@@ -134,6 +134,12 @@ tryCatch({
   stop("Error loading 80_report_building_utils.R: ", e$message)
 })
 
+tryCatch({
+  source(here("r_app", "kpi_utils.R"))
+}, error = function(e) {
+  stop("Error loading kpi_utils.R: ", e$message)
+})
+
 # ============================================================================
 # PHASE AND STATUS TRIGGER DEFINITIONS
 # ============================================================================
@@ -426,7 +432,115 @@ main <- function() {
   })
   
   # ============================================================================
-  # Build final output dataframe with all 21 columns
+  # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R)
+  # ============================================================================
+  
+  message("\nCalculating gap filling scores (2σ method)...")
+  
+  # Try single merged mosaic first, then fall back to merging tiles
+  week_mosaic_file <- file.path(mosaic_dir, sprintf("week_%02d_%d.tif", current_week, year))
+  
+  gap_scores_df <- NULL
+  
+  if (file.exists(week_mosaic_file)) {
+    # Single merged mosaic exists - use it directly
+    tryCatch({
+      current_week_raster <- terra::rast(week_mosaic_file)
+      current_ci_band <- current_week_raster[[5]]  # CI is the 5th band
+      names(current_ci_band) <- "CI"
+      
+      message(paste("  Loaded single mosaic:", week_mosaic_file))
+      
+      # Calculate gap scores for all fields
+      gap_result <- calculate_gap_filling_kpi(current_ci_band, field_boundaries_sf)
+      
+      # Extract field-level results (use field column directly to match current_stats Field_id)
+      gap_scores_df <- gap_result$field_results %>%
+        mutate(Field_id = field) %>%
+        select(Field_id, gap_score)
+      
+      message(paste("  ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
+      message(paste("    Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
+      
+    }, error = function(e) {
+      message(paste("  WARNING: Could not calculate gap scores from single mosaic:", e$message))
+      message("  Gap scores will be set to NA")
+      gap_scores_df <- NULL
+    })
+    
+  } else {
+    # Single mosaic doesn't exist - check for tiles and process per-tile
+    message("  Single mosaic not found. Checking for tiles...")
+    
+    # List all tiles for this week (e.g., week_04_2026_01.tif through week_04_2026_25.tif)
+    tile_pattern <- sprintf("week_%02d_%d_\\d{2}\\.tif$", current_week, year)
+    tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
+    
+    if (length(tile_files) == 0) {
+      message(sprintf("  WARNING: No tiles found matching pattern: %s in %s", tile_pattern, mosaic_dir))
+      message("  Gap scores will be set to NA")
+      
+    } else {
+      tryCatch({
+        message(sprintf("  Found %d tiles. Processing per-tile (memory efficient)...", length(tile_files)))
+        
+        # Process each tile separately and accumulate results
+        all_tile_results <- list()
+        
+        for (i in seq_along(tile_files)) {
+          tile_file <- tile_files[i]
+          
+          # Load tile raster
+          tile_raster <- terra::rast(tile_file)
+          tile_ci_band <- tile_raster[[5]]
+          names(tile_ci_band) <- "CI"
+          
+          # Calculate gap scores for fields in this tile
+          tile_gap_result <- calculate_gap_filling_kpi(tile_ci_band, field_boundaries_sf)
+          
+          # Store results (only keep fields with non-NA scores, use field directly to match current_stats)
+          if (!is.null(tile_gap_result$field_results) && nrow(tile_gap_result$field_results) > 0) {
+            tile_results_clean <- tile_gap_result$field_results %>%
+              mutate(Field_id = field) %>%
+              select(Field_id, gap_score) %>%
+              filter(!is.na(gap_score))
+            
+            if (nrow(tile_results_clean) > 0) {
+              all_tile_results[[i]] <- tile_results_clean
+            }
+          }
+          
+          # Clear memory
+          rm(tile_raster, tile_ci_band, tile_gap_result)
+          gc(verbose = FALSE)
+        }
+        
+        # Combine all tile results
+        if (length(all_tile_results) > 0) {
+          gap_scores_df <- bind_rows(all_tile_results)
+          
+          # If a field appears in multiple tiles, take the maximum gap score
+          gap_scores_df <- gap_scores_df %>%
+            group_by(Field_id) %>%
+            summarise(gap_score = max(gap_score, na.rm = TRUE), .groups = "drop")
+          
+          message(paste("  ✓ Calculated gap scores for", nrow(gap_scores_df), "fields across", length(all_tile_results), "tiles"))
+          message(paste("    Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
+        } else {
+          message("  WARNING: No gap scores calculated from any tiles")
+          gap_scores_df <- NULL
+        }
+        
+      }, error = function(e) {
+        message(paste("  WARNING: Could not process tiles or calculate gap scores:", e$message))
+        message("  Gap scores will be set to NA")
+        gap_scores_df <- NULL
+      })
+    }
+  }
+  
+  # ============================================================================
+  # Build final output dataframe with all 22 columns (including Gap_score)
   # ============================================================================
   
   message("\nBuilding final field analysis output...")
@@ -591,6 +705,23 @@ main <- function() {
         else if (pct >= 10) return("10-20%")
         else return("0-10%")
       }),
+      # Column 22: Gap_score (2σ below median - from kpi_utils.R)
+      Gap_score = {
+        if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
+          # Debug: Print first few gap scores
+          message(sprintf("  Joining %d gap scores to field_analysis (first 3: %s)", 
+                         nrow(gap_scores_df),
+                         paste(head(gap_scores_df$gap_score, 3), collapse=", ")))
+          message(sprintf("  First 3 Field_ids in gap_scores_df: %s", 
+                         paste(head(gap_scores_df$Field_id, 3), collapse=", ")))
+          message(sprintf("  First 3 Field_ids in current_stats: %s", 
+                         paste(head(current_stats$Field_id, 3), collapse=", ")))
+          
+          gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
+        } else {
+          rep(NA_real_, nrow(current_stats))
+        }
+      }
     ) %>%
     select(
       all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
@@ -598,10 +729,10 @@ main <- function() {
                "Germination_progress",
                "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
                "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
-               "Imminent_prob", "Cloud_pct_clear", "Cloud_category"))
+               "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
     )
   
-  message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
+  message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)"))
   
   export_paths <- export_field_analysis_excel(
     field_analysis_df,