SmartCane/r_app/80_calculate_kpis.R

# ============================================================================
# SCRIPT 80: Key Performance Indicator (KPI) Calculation
# ============================================================================
# PURPOSE:
#   Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes
#   field uniformity, growth trends (4-week and 8-week), area change detection,
#   TCH forecasts, stress identification, and weed presence. Generates
#   comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting.
#
# INPUT DATA:
#   - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif
#   - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
#   - Source 3: Historical field stats (RDS from previous weeks)
#
# OUTPUT DATA:
#   - Destination: laravel_app/storage/app/{project}/output/
#   - Format: Excel (.xlsx), CSV (.csv), RDS (.rds)
#   - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata
#
# USAGE:
#   Rscript 80_calculate_kpis.R [project] [week] [year]
#
#   Example (Windows PowerShell):
#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
#
# PARAMETERS:
#   - project: Project name (character) - angata, chemba, xinavane, esa, simba
#   - week: ISO week number (numeric, 1-53, default current week)
#   - year: ISO year (numeric, default current year)
#
# CLIENT TYPES:
#   - cane_supply (ANGATA): Yes - uses 80_utils_cane_supply.R (placeholder)
#   - agronomic_support (AURA): Yes - uses 80_utils_agronomic_support.R (6 KPI funcs)
#
# DEPENDENCIES:
#   - Packages: terra, sf, tidyverse, lubridate, writexl, spdep
#   - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R
#   - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
#   - Input data: Weekly mosaic TIFFs (Script 40 output)
#   - Data directories: weekly_mosaic/, output/ (created if missing)
#
# NOTES:
#   - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast,
#     Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling %
#   - Client-aware: Conditional sourcing based on client_config$script_90_compatible
#   - Exports: 21-column output with field-level metrics, phase, status, alerts
#   - Supports 4-week and 8-week trend analysis from historical RDS cache
#   - Critical dependency for Scripts 90/91 (reporting/dashboards)
#   - Uses Moran's I for spatial clustering detection (weed/stress patterns)
#
#
# ============================================================================
#    [✓] Extract planting dates per field
#    [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
#
# COMMAND-LINE USAGE:
#   Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
#     Arguments: [end_date] [project_dir]
#
#   Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
#     Arguments: [end_date] [project_dir] [offset_days]
#
#  & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7
#
# Usage in run_full_pipeline.R:
#   source("r_app/80_calculate_kpis.R")
#   main()

# ============================================================================
# NEXT INTEGRATIONS (See Linear issues for detailed requirements)
# ============================================================================
# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
# ============================================================================

# ============================================================================
# CONFIGURATION
# ============================================================================
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5

# CV TREND THRESHOLDS
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05

# CV_TREND_LONG_TERM (8-WEEK SLOPE) INTERPRETATION THRESHOLDS
# Interpretation: Slope of CV over 8 weeks indicates field uniformity trend
#   Negative slope = CV decreasing = field becoming MORE uniform = GOOD
#   Positive slope = CV increasing = field becoming MORE patchy = BAD
#   Near zero = Homogenous growth (all crops progressing equally)
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03   # CV decreasing rapidly (>3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MIN <- -0.02         # CV decreasing (2-3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MAX <- -0.01         # Gradual improvement (1-2% drop over 8 weeks)
CV_SLOPE_HOMOGENOUS_MIN <- -0.01          # Essentially stable (small natural variation)
CV_SLOPE_HOMOGENOUS_MAX <- 0.01           # No change in uniformity (within ±1% over 8 weeks)
CV_SLOPE_PATCHINESS_MIN <- 0.01           # Minor divergence (1-2% increase over 8 weeks)
CV_SLOPE_PATCHINESS_MAX <- 0.02           # Growing patchiness (2-3% increase over 8 weeks)
CV_SLOPE_SEVERE_MIN <- 0.02               # Severe fragmentation (>3% increase over 8 weeks)


# PERCENTILE CALCULATIONS
CI_PERCENTILE_LOW <- 0.10
CI_PERCENTILE_HIGH <- 0.90

# GERMINATION THRESHOLD (for germination_progress calculation)
GERMINATION_CI_THRESHOLD <- 2.0

# PLANTING DATE & AGE CONFIGURATION
# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates

# HISTORICAL DATA LOOKBACK
WEEKS_FOR_FOUR_WEEK_TREND <- 4
WEEKS_FOR_CV_TREND_SHORT <- 2
WEEKS_FOR_CV_TREND_LONG <- 8

# ============================================================================
# 1. Load required libraries
# ============================================================================

suppressPackageStartupMessages({
  # File path handling
  library(here)      # For relative path resolution (platform-independent file paths)

  # Spatial data handling
  library(sf)        # For reading/manipulating field boundaries (GeoJSON)
  library(terra)     # For raster operations (reading mosaic TIFFs)

  # Data manipulation
  library(dplyr)     # For data wrangling (filter, mutate, group_by, summarize)
  library(tidyr)     # For data reshaping (pivot_longer, pivot_wider, gather)
  library(lubridate) # For date/time operations (week extraction, date arithmetic)

  # File I/O
  library(readr)     # For reading CSV files (harvest predictions from Python)
  library(readxl)    # For reading harvest.xlsx (harvest dates for field mapping)
  library(writexl)   # For writing Excel outputs (KPI summary tables)
  library(progress)  # For progress bars during field processing

  # ML/Analysis (optional - only for harvest model inference)
  tryCatch({
    library(torch)   # For PyTorch model inference (harvest readiness prediction)
  }, error = function(e) {
    message("Note: torch package not available - harvest model inference will be skipped")
  })
})

# ============================================================================
# LOAD CONFIGURATION - MUST BE DONE FIRST
# ============================================================================
# Parameters must be loaded early to determine client type and paths

tryCatch({
  source(here("r_app", "parameters_project.R"))
}, error = function(e) {
  stop("Error loading parameters_project.R: ", e$message)
})

# Get client configuration from global project setup
# NOTE: This cannot be done until parameters_project.R is sourced
# We determine client_type from the current project_dir (if running in main() context)
# For now, set a placeholder that will be overridden in main()
if (exists("project_dir", envir = .GlobalEnv)) {
  temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv))
} else {
  temp_client_type <- "cane_supply"  # Safe default
}
temp_client_config <- get_client_kpi_config(temp_client_type)

# ============================================================================
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
# ============================================================================
# All clients use the common utilities (shared statistical functions, reporting)
tryCatch({
  source(here("r_app", "80_utils_common.R"))
}, error = function(e) {
  stop("Error loading 80_utils_common.R: ", e$message)
})

# Load both client-specific utilities (functions will be available for both workflows)
# This avoids needing to determine client type at startup time
message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...")
tryCatch({
  source(here("r_app", "80_utils_agronomic_support.R"))
}, error = function(e) {
  stop("Error loading 80_utils_agronomic_support.R: ", e$message)
})

tryCatch({
  source(here("r_app", "80_utils_cane_supply.R"))
}, error = function(e) {
  stop("Error loading 80_utils_cane_supply.R: ", e$message)
})

# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
# ============================================================================

PHASE_DEFINITIONS <- data.frame(
  phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
  age_start = c(0, 4, 17, 39),
  age_end = c(6, 16, 39, 200),
  stringsAsFactors = FALSE
)

STATUS_TRIGGERS <- data.frame(
  trigger = c(
    "germination_started",
    "germination_complete",
    "stress_detected_whole_field",
    "strong_recovery",
    "growth_on_track",
    "maturation_progressing",
    "harvest_ready"
  ),
  age_min = c(0, 0, NA, NA, 4, 39, 45),
  age_max = c(6, 6, NA, NA, 39, 200, 200),
  description = c(
    "10% of field CI > 2",
    "70% of field CI >= 2",
    "CI decline > -1.5 + low CV",
    "CI increase > +1.5",
    "CI increasing consistently",
    "High CI, stable/declining",
    "Age 45+ weeks (ready to harvest)"
  ),
  stringsAsFactors = FALSE
)


main <- function() {
  # Parse command-line arguments
  args <- commandArgs(trailingOnly = TRUE)

  # end_date (arg 1)
  # Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
  end_date <- if (length(args) >= 1 && !is.na(args[1])) {
    # Parse date explicitly in YYYY-MM-DD format from command line
    as.Date(args[1], format = "%Y-%m-%d")
  } else if (exists("end_date", envir = .GlobalEnv)) {
    global_date <- get("end_date", envir = .GlobalEnv)
    # Check if it's a valid Date with length > 0
    if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) {
      global_date
    } else if (exists("end_date_str", envir = .GlobalEnv)) {
      as.Date(get("end_date_str", envir = .GlobalEnv))
    } else {
      Sys.Date()
    }
  } else if (exists("end_date_str", envir = .GlobalEnv)) {
    as.Date(get("end_date_str", envir = .GlobalEnv))
  } else {
    Sys.Date()
  }

  # project_dir (arg 2)
  project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
    as.character(args[2])
  } else if (exists("project_dir", envir = .GlobalEnv)) {
    get("project_dir", envir = .GlobalEnv)
  } else {
    "angata"
  }

  # offset (arg 3) - for backward compatibility with old 09
  offset <- if (length(args) >= 3 && !is.na(args[3])) {
    as.numeric(args[3])
  } else {
    7
  }

  # Validate end_date is a proper Date object
  if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) {
    stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date))
  }

  assign("project_dir", project_dir, envir = .GlobalEnv)
  assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)

  message("\n", strrep("=", 70))
  message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
  message(strrep("=", 70))
  message("Date:", format(end_date, "%Y-%m-%d"))
  message("Project:", project_dir)
  message("Mode: Conditional KPI calculation based on client type")
  message("")

  # Load configuration and utilities
  # source(here("r_app", "crop_messaging_utils.R"))

  tryCatch({
    source(here("r_app", "parameters_project.R"))
  }, error = function(e) {
    stop("Error loading parameters_project.R: ", e$message)
  })

  # Initialize project directories and configuration
  setup <- setup_project_directories(project_dir)

  # DETERMINE CLIENT TYPE AND KPI CONFIGURATION
  client_type <- get_client_type(project_dir)
  client_config <- get_client_kpi_config(client_type)

  # Assign to global environment so utilities and downstream scripts can access it
  assign("client_config", client_config, envir = .GlobalEnv)

  message("Client Type:", client_type)
  message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
  message("Output Formats:", paste(client_config$outputs, collapse = ", "))

  # Use centralized paths from setup object (no need for file.path calls)
  weekly_mosaic <- setup$weekly_mosaic_dir
  daily_vals_dir <- setup$daily_ci_vals_dir

  tryCatch({
    source(here("r_app", "30_growth_model_utils.R"))
  }, error = function(e) {
    warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
  })

  # CONDITIONAL EXECUTION BASED ON CLIENT TYPE
  # ============================================

  if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
    # AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
    message("\n", strrep("=", 70))
    message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
    message(strrep("=", 70))

    # Prepare inputs for KPI calculation (already created by setup_project_directories)
    reports_dir_kpi <- setup$kpi_reports_dir
    cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir

    # Load field boundaries for AURA workflow (use data_dir from setup)
    message("\nLoading field boundaries for AURA KPI calculation...")
    tryCatch({
      boundaries_result <- load_field_boundaries(setup$data_dir)

      if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
        field_boundaries_sf <- boundaries_result$field_boundaries_sf
      } else {
        field_boundaries_sf <- boundaries_result
      }

      if (nrow(field_boundaries_sf) == 0) {
        stop("No fields loaded from boundaries")
      }

      message(paste("  ✓ Loaded", nrow(field_boundaries_sf), "fields"))
    }, error = function(e) {
      stop("ERROR loading field boundaries: ", e$message)
    })

    # Load harvesting data
    if (!exists("harvesting_data")) {
      warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
      harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
    }

    # Extract current week/year from end_date
    current_week <- as.numeric(format(end_date, "%V"))
    current_year <- as.numeric(format(end_date, "%G"))

    # Call with correct signature
    kpi_results <- calculate_all_kpis(
      report_date = end_date,
      output_dir = reports_dir_kpi,
      field_boundaries_sf = field_boundaries_sf,
      harvesting_data = harvesting_data,
      cumulative_CI_vals_dir = cumulative_CI_vals_dir,
      weekly_CI_mosaic = setup$weekly_mosaic_dir,
      reports_dir = reports_dir_kpi,
      project_dir = project_dir
    )

    cat("\n=== AURA KPI CALCULATION COMPLETE ===\n")
    cat("Summary tables saved for Script 90 integration\n")
    cat("Output directory:", reports_dir_kpi, "\n\n")

  } else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
    # CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
    message("\n", strrep("=", 70))
    message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
    message(strrep("=", 70))

    # Set reports_dir for CANE_SUPPLY workflow (used by export functions)
    reports_dir <- setup$kpi_reports_dir
    data_dir <- setup$data_dir

    # Continue with existing per-field analysis code below

    message("\n", strrep("-", 70))
    message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
    message(strrep("-", 70))

    weeks <- calculate_week_numbers(end_date)
    current_week <- weeks$current_week
    current_year <- weeks$current_year
    previous_week <- weeks$previous_week
    previous_year <- weeks$previous_year

    message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))
    # Find per-field weekly mosaics
    message("Finding per-field weekly mosaics...")

    if (!dir.exists(weekly_mosaic)) {
      stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
                 "\nScript 40 (mosaic creation) must be run first."))
    }

    field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
    field_dirs <- field_dirs[field_dirs != ""]

    if (length(field_dirs) == 0) {
      stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
                 "\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
    }

    # Verify we have mosaics for this week
    single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
    per_field_files <- c()
    for (field in field_dirs) {
      field_mosaic_dir <- file.path(weekly_mosaic, field)
      files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
      if (length(files) > 0) {
        per_field_files <- c(per_field_files, files)
      }
    }

    if (length(per_field_files) == 0) {
      stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
                 "\nExpected pattern:", single_file_pattern,
                 "\nChecked:", weekly_mosaic))
    }

    message(paste("  ✓ Found", length(per_field_files), "per-field weekly mosaics"))

    mosaic_mode <- "per-field"
    mosaic_dir <- weekly_mosaic


    # Load field boundaries
    tryCatch({
      boundaries_result <- load_field_boundaries(data_dir)

      if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
        field_boundaries_sf <- boundaries_result$field_boundaries_sf
      } else {
        field_boundaries_sf <- boundaries_result
      }

      if (nrow(field_boundaries_sf) == 0) {
        stop("No fields loaded from boundaries")
      }

      message(paste("  Loaded", nrow(field_boundaries_sf), "fields"))
    }, error = function(e) {
      stop("ERROR loading field boundaries: ", e$message)
    })

    message("Loading historical field data for trend calculations...")
    # Load up to 8 weeks (max of 4-week and 8-week trend requirements)
    # Function gracefully handles missing weeks and loads whatever exists
    num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)  # Always 8
    message(paste("  Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))

    # Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
    allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)

    historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir,
                                                   num_weeks = num_weeks_to_load,
                                                   auto_generate = allow_auto_gen,
                                                   field_boundaries_sf = field_boundaries_sf,
                                                   daily_vals_dir = daily_vals_dir)

    # Load harvest.xlsx for planting dates (season_start)
    message("\nLoading harvest data from harvest.xlsx for planting dates...")
    harvest_file_path <- file.path(data_dir, "harvest.xlsx")

    harvesting_data <- tryCatch({
      if (file.exists(harvest_file_path)) {
        harvest_raw <- readxl::read_excel(harvest_file_path)
        harvest_raw$season_start <- as.Date(harvest_raw$season_start)
        harvest_raw$season_end <- as.Date(harvest_raw$season_end)
        message(paste("  ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
        harvest_raw
      } else {
        message(paste("  WARNING: harvest.xlsx not found at", harvest_file_path))
        NULL
      }
    }, error = function(e) {
      message(paste("  ERROR loading harvest.xlsx:", e$message))
      NULL
    })

    planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)

    # Validate planting_dates
    if (is.null(planting_dates) || nrow(planting_dates) == 0) {
      message("WARNING: No planting dates available. Using NA for all fields.")
      planting_dates <- data.frame(
        field_id = field_boundaries_sf$field,
        planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
        stringsAsFactors = FALSE
      )
    }
    # Build per-field configuration
    message("\nPreparing mosaic configuration for statistics calculation...")
    message("  ✓ Using per-field mosaic architecture (1 TIFF per field)")

    # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
    field_grid <- list(
      mosaic_dir = mosaic_dir,
      mode = "per-field"
    )

    message("\nUsing modular RDS-based approach for weekly statistics...")

    # Load/calculate CURRENT week stats (from tiles or RDS cache)
    message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
    current_stats <- load_or_calculate_weekly_stats(
      week_num = current_week,
      year = current_year,
      project_dir = project_dir,
      field_boundaries_sf = field_boundaries_sf,
      mosaic_dir = field_grid$mosaic_dir,
      reports_dir = reports_dir,
      report_date = end_date
    )

    message(paste("  ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))

    # Load/calculate PREVIOUS week stats (from RDS cache or tiles)
    message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")

    # Calculate report date for previous week (7 days before current)
    prev_report_date <- end_date - 7

    prev_stats <- load_or_calculate_weekly_stats(
      week_num = previous_week,
      year = previous_year,
      project_dir = project_dir,
      field_boundaries_sf = field_boundaries_sf,
      mosaic_dir = field_grid$mosaic_dir,
      reports_dir = reports_dir,
      report_date = prev_report_date
    )

    message(paste("  ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
    message(paste("    Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
    message(paste("    CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))

    # Apply trend calculations (requires both weeks)
    message("\n3. Calculating trend columns...")
    current_stats <- calculate_kpi_trends(current_stats, prev_stats,
                                          project_dir = project_dir,
                                          reports_dir = reports_dir,
                                          current_week = current_week,
                                          year = current_year)

    message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))

    # Load weekly harvest probabilities from script 31 (if available)
    # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
    message("\n4. Loading harvest probabilities from script 31...")
    harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
    harvest_prob_file <- file.path(harvest_prob_dir,
                                     sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
    message(paste("  Looking for:", harvest_prob_file))

    imminent_prob_data <- tryCatch({
      if (file.exists(harvest_prob_file)) {
        prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
                                   col_types = readr::cols(.default = readr::col_character()))
        message(paste("  ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
        prob_df %>%
          select(field, imminent_prob, detected_prob) %>%
          rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
      } else {
        message(paste("  INFO: Harvest probabilities not available (script 31 not run)"))
        NULL
      }
    }, error = function(e) {
      message(paste("  WARNING: Could not load harvest probabilities:", e$message))
      NULL
    })

    # ============================================================================
    # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R)
    # ============================================================================

    message("\nCalculating gap filling scores (2σ method)...")

    # Process per-field mosaics
    message(paste("  Using per-field mosaics for", length(per_field_files), "fields"))

    field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)

    process_gap_for_field <- function(field_file) {
      field_id <- basename(dirname(field_file))
      field_bounds <- field_boundaries_by_id[[field_id]]

      if (is.null(field_bounds) || nrow(field_bounds) == 0) {
        return(data.frame(Field_id = field_id, gap_score = NA_real_))
      }

      tryCatch({
        field_raster <- terra::rast(field_file)
        ci_band_name <- "CI"
        if (!(ci_band_name %in% names(field_raster))) {
          return(data.frame(Field_id = field_id, gap_score = NA_real_))
        }
        field_ci_band <- field_raster[[ci_band_name]]
        names(field_ci_band) <- "CI"

        gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)

        if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
          return(data.frame(Field_id = field_id, gap_score = NA_real_))
        }

        gap_scores <- gap_result$field_results
        gap_scores$Field_id <- gap_scores$field
        gap_scores <- gap_scores[, c("Field_id", "gap_score")]

        stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
      }, error = function(e) {
        message(paste("  WARNING: Gap score failed for field", field_id, ":", e$message))
        data.frame(Field_id = field_id, gap_score = NA_real_)
      })
    }

    # Process fields sequentially with progress bar
    message("  Processing gap scores for ", length(per_field_files), " fields...")
    pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)

    results_list <- lapply(seq_along(per_field_files), function(idx) {
      result <- process_gap_for_field(per_field_files[[idx]])
      utils::setTxtProgressBar(pb, idx)
      result
    })
    close(pb)

    gap_scores_df <- dplyr::bind_rows(results_list)

    if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
      gap_scores_df <- gap_scores_df %>%
        dplyr::group_by(Field_id) %>%
        dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")

      message(paste("  ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
      message(paste("    Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
    } else {
      message("  WARNING: No gap scores calculated from per-field mosaics")
      gap_scores_df <- NULL
    }

    # ============================================================================
    # Build final output dataframe with all 22 columns (including Gap_score)
    # ============================================================================

    message("\nBuilding final field analysis output...")

    # Pre-calculate acreages with geometry validation
    # This avoids geometry errors during field_analysis construction
    acreage_lookup <- tryCatch({
      lookup_df <- field_boundaries_sf %>%
        sf::st_drop_geometry() %>%
        as.data.frame() %>%
        mutate(
          geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
            tryCatch({
              sf::st_is_valid(field_boundaries_sf[idx, ])
            }, error = function(e) FALSE)
          }),
          area_ha = 0
        )

      # Calculate area for valid geometries
      for (idx in which(lookup_df$geometry_valid)) {
        tryCatch({
          area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
          lookup_df$area_ha[idx] <- area_m2 / 10000
        }, error = function(e) {
          lookup_df$area_ha[idx] <<- NA_real_
        })
      }

      # Convert hectares to acres
      lookup_df %>%
        mutate(acreage = area_ha / 0.404686) %>%
        select(field, acreage)
    }, error = function(e) {
      message(paste("Warning: Could not calculate acreages from geometries -", e$message))
      data.frame(field = character(0), acreage = numeric(0))
    })

    field_analysis_df <- current_stats %>%
      mutate(
        # Column 2: Farm_Section (user fills)
        Farm_Section = NA_character_,
        # Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
        Field_name = Field_id,
        # Column 4: Acreage (calculate from geometry)
        Acreage = {
          acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
          if_else(is.na(acreages_vec), 0, acreages_vec)
        },
        # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
        # Column 7: Four_week_trend (from current_stats)
        # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
        Last_harvest_or_planting_date = {
          planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
        },
        # Column 9: Age_week (calculated from report date and planting date)
        Age_week = {
          sapply(seq_len(nrow(current_stats)), function(idx) {
            planting_dt <- Last_harvest_or_planting_date[idx]
            if (is.na(planting_dt)) {
              return(NA_real_)
            }
            round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0)
          })
        },
        # Column 10: Phase (recalculate based on updated Age_week)
        Phase = {
          sapply(Age_week, function(age) {
            if (is.na(age)) return(NA_character_)
            if (age >= 0 & age < 4) return("Germination")
            if (age >= 4 & age < 17) return("Tillering")
            if (age >= 17 & age < 39) return("Grand Growth")
            if (age >= 39) return("Maturation")
            NA_character_
          })
        },
        # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends)
        # Column 12: Germination_progress (calculated here from CI values)
        # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
        Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) {
          if (is.na(pct)) return(NA_character_)
          if (pct >= 95) return("95-100%")
          else if (pct >= 90) return("90-95%")
          else if (pct >= 80) return("80-90%")
          else if (pct >= 70) return("70-80%")
          else if (pct >= 60) return("60-70%")
          else if (pct >= 50) return("50-60%")
          else if (pct >= 40) return("40-50%")
          else if (pct >= 30) return("30-40%")
          else if (pct >= 20) return("20-30%")
          else if (pct >= 10) return("10-20%")
          else return("0-10%")
        }),
        # Column 13: Imminent_prob (from script 31 or NA if not available)
        Imminent_prob = {
          if (!is.null(imminent_prob_data)) {
            imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
          } else {
            rep(NA_real_, nrow(current_stats))
          }
        },
        # Column 14: Status_Alert (based on harvest probability + crop health status)
        # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA
        Status_Alert = {
          sapply(seq_len(nrow(current_stats)), function(idx) {
            imminent_prob <- Imminent_prob[idx]
            age_w <- Age_week[idx]
            weekly_ci_chg <- Weekly_ci_change[idx]
            mean_ci_val <- Mean_CI[idx]

            # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months)
            if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) {
              return("Ready for harvest-check")
            }

            # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5)
            if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) {
              return("Strong decline in crop health")
            }

            # Priority 3: Harvested/bare (Mean CI < 1.5)
            if (!is.na(mean_ci_val) && mean_ci_val < 1.5) {
              return("Harvested/bare")
            }

            # Fallback: no alert
            NA_character_
          })
        },
        # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
        # Column 17: Already in current_stats (CV)
        # Column 18: Already in current_stats (CV_Trend_Short_Term)
        # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
        # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
        # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01)
        CV_Trend_Long_Term_Category = {
          sapply(current_stats$CV_Trend_Long_Term, function(slope) {
            if (is.na(slope)) {
              return(NA_character_)
            } else if (slope < -0.01) {
              return("More uniform")
            } else if (slope > 0.01) {
              return("Less uniform")
            } else {
              return("Stable uniformity")
            }
          })
        },
        # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
        # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
        Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) {
          if (is.na(pct)) return(NA_character_)
          if (pct >= 95) return("95-100%")
          else if (pct >= 90) return("90-95%")
          else if (pct >= 80) return("80-90%")
          else if (pct >= 70) return("70-80%")
          else if (pct >= 60) return("60-70%")
          else if (pct >= 50) return("50-60%")
          else if (pct >= 40) return("40-50%")
          else if (pct >= 30) return("30-40%")
          else if (pct >= 20) return("20-30%")
          else if (pct >= 10) return("10-20%")
          else return("0-10%")
        }),
        # Column 22: Gap_score (2σ below median - from kpi_utils.R)
        Gap_score = {
          if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
            # Debug: Print first few gap scores
            message(sprintf("  Joining %d gap scores to field_analysis (first 3: %s)",
                           nrow(gap_scores_df),
                           paste(head(gap_scores_df$gap_score, 3), collapse=", ")))
            message(sprintf("  First 3 Field_ids in gap_scores_df: %s",
                           paste(head(gap_scores_df$Field_id, 3), collapse=", ")))
            message(sprintf("  First 3 Field_ids in current_stats: %s",
                           paste(head(current_stats$Field_id, 3), collapse=", ")))

            gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
          } else {
            rep(NA_real_, nrow(current_stats))
          }
        }
      ) %>%
      select(
        all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
                 "Last_harvest_or_planting_date", "Age_week", "Phase",
                 "Germination_progress",
                 "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
                 "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
                 "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
      )

    message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)"))

    export_paths <- export_field_analysis_excel(
      field_analysis_df,
      NULL,
      project_dir,
      current_week,
      current_year,
      reports_dir
    )

    cat("\n--- Per-field Results (first 10) ---\n")
    available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
    available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
    if (length(available_cols) > 0) {
      print(head(field_analysis_df[, available_cols], 10))
    }

    # ========== FARM-LEVEL KPI AGGREGATION ==========
    # Aggregate the per-field analysis into farm-level summary statistics

    cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")

    # Filter to only fields that have actual data (non-NA CI and valid acreage)
    field_data <- field_analysis_df %>%
      filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
      filter(Acreage > 0)

    if (nrow(field_data) > 0) {

      if (nrow(field_data) > 0) {
        # Create summary statistics
        farm_summary <- list()

        # 1. PHASE DISTRIBUTION
        phase_dist <- field_data %>%
          group_by(Phase) %>%
          summarise(
            num_fields = n(),
            acreage = sum(Acreage, na.rm = TRUE),
            .groups = 'drop'
          ) %>%
          rename(Category = Phase)

        farm_summary$phase_distribution <- phase_dist

        # 2. STATUS ALERT DISTRIBUTION
        status_dist <- field_data %>%
          group_by(Status_Alert) %>%
          summarise(
            num_fields = n(),
            acreage = sum(Acreage, na.rm = TRUE),
            .groups = 'drop'
          ) %>%
          rename(Category = Status_Alert)

        farm_summary$status_distribution <- status_dist

        # 3. CLOUD COVERAGE DISTRIBUTION
        cloud_dist <- field_data %>%
          group_by(Cloud_category) %>%
          summarise(
            num_fields = n(),
            acreage = sum(Acreage, na.rm = TRUE),
            .groups = 'drop'
          ) %>%
          rename(Category = Cloud_category)

        farm_summary$cloud_distribution <- cloud_dist

        # 4. OVERALL STATISTICS
        farm_summary$overall_stats <- data.frame(
          total_fields = nrow(field_data),
          total_acreage = sum(field_data$Acreage, na.rm = TRUE),
          mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
          median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
          mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
          week = current_week,
          year = current_year,
          date = as.character(end_date)
        )

        # Print summaries
        cat("\n--- PHASE DISTRIBUTION ---\n")
        print(phase_dist)

        cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
        print(status_dist)

        cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
        print(cloud_dist)

        cat("\n--- OVERALL FARM STATISTICS ---\n")
        print(farm_summary$overall_stats)

        farm_kpi_results <- farm_summary
      } else {
        farm_kpi_results <- NULL
      }
    } else {
      farm_kpi_results <- NULL
    }

    # ========== FINAL SUMMARY ==========

    cat("\n", strrep("=", 70), "\n")
    cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
    cat(strrep("=", 70), "\n")
    cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
    cat("Excel export:", export_paths$excel, "\n")
    cat("RDS export:", export_paths$rds, "\n")
    cat("CSV export:", export_paths$csv, "\n")

    if (!is.null(farm_kpi_results)) {
      cat("\nFarm-level KPIs: CALCULATED\n")
    } else {
      cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
    }

    cat("\n✓ Consolidated KPI calculation complete!\n")
    cat("  - Per-field data exported\n")
    cat("  - Farm-level KPIs calculated\n")
    cat("  - All outputs in:", reports_dir, "\n\n")

    } else {
      # Unknown client type - log warning and exit
      warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
    cat("\n⚠️  Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
    cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
    cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
  }
}

if (sys.nframe() == 0) {
  main()
}