# ============================================================================ # SCRIPT 80: Key Performance Indicator (KPI) Calculation # ============================================================================ # PURPOSE: # Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes # field uniformity, growth trends (4-week and 8-week), area change detection, # TCH forecasts, stress identification, and weed presence. Generates # comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting. # # INPUT DATA: # - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif # - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx) # - Source 3: Historical field stats (RDS from previous weeks) # # OUTPUT DATA: # - Destination: laravel_app/storage/app/{project}/output/ # - Format: Excel (.xlsx), CSV (.csv), RDS (.rds) # - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata # # USAGE: # Rscript 80_calculate_kpis.R [project] [week] [year] # # Example (Windows PowerShell): # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026 # # PARAMETERS: # - project: Project name (character) - angata, chemba, xinavane, esa, simba # - week: ISO week number (numeric, 1-53, default current week) # - year: ISO year (numeric, default current year) # # CLIENT TYPES: # - cane_supply: Yes - uses 80_utils_cane_supply.R (placeholder) # - agronomic_support: Yes - uses 80_utils_agronomic_support.R (6 KPI funcs) # # DEPENDENCIES: # - Packages: terra, sf, tidyverse, lubridate, writexl, spdep # - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R # - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx) # - Input data: Weekly mosaic TIFFs (Script 40 output) # - Data directories: weekly_mosaic/, output/ (created if missing) # # NOTES: # - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast, # Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling % # - Client-aware: Conditional sourcing based on client_config$script_90_compatible # - Exports: 21-column output with field-level metrics, phase, status, alerts # - Supports 4-week and 8-week trend analysis from historical RDS cache # - Critical dependency for Scripts 90/91 (reporting/dashboards) # - Uses Moran's I for spatial clustering detection (weed/stress patterns) # # # ============================================================================ # [✓] Extract planting dates per field # [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks") # # COMMAND-LINE USAGE: # Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata # Arguments: [end_date] [project_dir] # # Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7 # Arguments: [end_date] [project_dir] [offset_days] # # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7 # # Usage in run_full_pipeline.R: # source("r_app/80_calculate_kpis.R") # main() # ============================================================================ # NEXT INTEGRATIONS (See Linear issues for detailed requirements) # ============================================================================ # 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv) # 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation # 3. [ ] Improve Status_trigger logic to use actual imminent_prob values # ============================================================================ # ============================================================================ # CONFIGURATION # ============================================================================ FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 # CV TREND THRESHOLDS CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05 # CV_TREND_LONG_TERM (8-WEEK SLOPE) INTERPRETATION THRESHOLDS # Interpretation: Slope of CV over 8 weeks indicates field uniformity trend # Negative slope = CV decreasing = field becoming MORE uniform = GOOD # Positive slope = CV increasing = field becoming MORE patchy = BAD # Near zero = Homogenous growth (all crops progressing equally) CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 # CV decreasing rapidly (>3% drop over 8 weeks) CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # CV decreasing (2-3% drop over 8 weeks) CV_SLOPE_IMPROVEMENT_MAX <- -0.01 # Gradual improvement (1-2% drop over 8 weeks) CV_SLOPE_HOMOGENOUS_MIN <- -0.01 # Essentially stable (small natural variation) CV_SLOPE_HOMOGENOUS_MAX <- 0.01 # No change in uniformity (within ±1% over 8 weeks) CV_SLOPE_PATCHINESS_MIN <- 0.01 # Minor divergence (1-2% increase over 8 weeks) CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness (2-3% increase over 8 weeks) CV_SLOPE_SEVERE_MIN <- 0.02 # Severe fragmentation (>3% increase over 8 weeks) # PERCENTILE CALCULATIONS CI_PERCENTILE_LOW <- 0.10 CI_PERCENTILE_HIGH <- 0.90 # GERMINATION THRESHOLD (for germination_progress calculation) GERMINATION_CI_THRESHOLD <- 2.0 # PLANTING DATE & AGE CONFIGURATION # Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates # HISTORICAL DATA LOOKBACK WEEKS_FOR_FOUR_WEEK_TREND <- 4 WEEKS_FOR_CV_TREND_SHORT <- 2 WEEKS_FOR_CV_TREND_LONG <- 8 # ============================================================================ # 1. Load required libraries # ============================================================================ suppressPackageStartupMessages({ # File path handling library(here) # For relative path resolution (platform-independent file paths) # Spatial data handling library(sf) # For reading/manipulating field boundaries (GeoJSON) library(terra) # For raster operations (reading mosaic TIFFs) # Data manipulation library(dplyr) # For data wrangling (filter, mutate, group_by, summarize) library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather) library(lubridate) # For date/time operations (week extraction, date arithmetic) # File I/O library(readr) # For reading CSV files (harvest predictions from Python) library(readxl) # For reading harvest.xlsx (harvest dates for field mapping) library(writexl) # For writing Excel outputs (KPI summary tables) library(progress) # For progress bars during field processing # ML models (for yield prediction KPI) library(caret) # For training Random Forest with cross-validation library(CAST) # For Forward Feature Selection in caret models # ML/Analysis (optional - only for harvest model inference) tryCatch({ library(torch) # For PyTorch model inference (harvest readiness prediction) }, error = function(e) { message("Note: torch package not available - harvest model inference will be skipped") }) }) # ============================================================================ # LOAD CONFIGURATION - MUST BE DONE FIRST # ============================================================================ # Parameters must be loaded early to determine client type and paths tryCatch({ source(here("r_app", "parameters_project.R")) }, error = function(e) { stop("Error loading parameters_project.R: ", e$message) }) # Get client configuration from global project setup # NOTE: This cannot be done until parameters_project.R is sourced # We determine client_type from the current project_dir (if running in main() context) # For now, set a placeholder that will be overridden in main() if (exists("project_dir", envir = .GlobalEnv)) { temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv)) } else { temp_client_type <- "cane_supply" # Safe default } temp_client_config <- get_client_kpi_config(temp_client_type) # ============================================================================ # LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES # ============================================================================ # All clients use the common utilities (shared statistical functions, reporting) tryCatch({ source(here("r_app", "80_utils_common.R")) }, error = function(e) { stop("Error loading 80_utils_common.R: ", e$message) }) # Load both client-specific utilities (functions will be available for both workflows) # This avoids needing to determine client type at startup time message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...") tryCatch({ source(here("r_app", "80_utils_agronomic_support.R")) }, error = function(e) { stop("Error loading 80_utils_agronomic_support.R: ", e$message) }) tryCatch({ source(here("r_app", "80_utils_cane_supply.R")) }, error = function(e) { stop("Error loading 80_utils_cane_supply.R: ", e$message) }) # ============================================================================ # PHASE AND STATUS TRIGGER DEFINITIONS # ============================================================================ PHASE_DEFINITIONS <- data.frame( phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), age_start = c(0, 4, 17, 39), age_end = c(6, 16, 39, 200), stringsAsFactors = FALSE ) STATUS_TRIGGERS <- data.frame( trigger = c( "germination_started", "germination_complete", "stress_detected_whole_field", "strong_recovery", "growth_on_track", "maturation_progressing", "harvest_ready" ), age_min = c(0, 0, NA, NA, 4, 39, 45), age_max = c(6, 6, NA, NA, 39, 200, 200), description = c( "10% of field CI > 2", "70% of field CI >= 2", "CI decline > -1.5 + low CV", "CI increase > +1.5", "CI increasing consistently", "High CI, stable/declining", "Age 45+ weeks (ready to harvest)" ), stringsAsFactors = FALSE ) main <- function() { # Parse command-line arguments args <- commandArgs(trailingOnly = TRUE) # end_date (arg 1) # Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date() end_date <- if (length(args) >= 1 && !is.na(args[1])) { # Parse date explicitly in YYYY-MM-DD format from command line as.Date(args[1], format = "%Y-%m-%d") } else if (exists("end_date", envir = .GlobalEnv)) { global_date <- get("end_date", envir = .GlobalEnv) # Check if it's a valid Date with length > 0 if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) { global_date } else if (exists("end_date_str", envir = .GlobalEnv)) { as.Date(get("end_date_str", envir = .GlobalEnv)) } else { Sys.Date() } } else if (exists("end_date_str", envir = .GlobalEnv)) { as.Date(get("end_date_str", envir = .GlobalEnv)) } else { Sys.Date() } # project_dir (arg 2) project_dir <- if (length(args) >= 2 && !is.na(args[2])) { as.character(args[2]) } else if (exists("project_dir", envir = .GlobalEnv)) { get("project_dir", envir = .GlobalEnv) } else { "angata" } # offset (arg 3) - for backward compatibility with old 09 offset <- if (length(args) >= 3 && !is.na(args[3])) { as.numeric(args[3]) } else { 7 } # Validate end_date is a proper Date object if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) { stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date)) } assign("project_dir", project_dir, envir = .GlobalEnv) assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv) message("\n", strrep("=", 70)) message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION") message(strrep("=", 70)) message("Date:", format(end_date, "%Y-%m-%d")) message("Project:", project_dir) message("Mode: Conditional KPI calculation based on client type") message("") # Load configuration and utilities # source(here("r_app", "crop_messaging_utils.R")) tryCatch({ source(here("r_app", "parameters_project.R")) }, error = function(e) { stop("Error loading parameters_project.R: ", e$message) }) # Initialize project directories and configuration setup <- setup_project_directories(project_dir) # DETERMINE CLIENT TYPE AND KPI CONFIGURATION client_type <- get_client_type(project_dir) client_config <- get_client_kpi_config(client_type) # Assign to global environment so utilities and downstream scripts can access it assign("client_config", client_config, envir = .GlobalEnv) message("Client Type:", client_type) message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", ")) message("Output Formats:", paste(client_config$outputs, collapse = ", ")) # Use centralized paths from setup object (no need for file.path calls) weekly_mosaic <- setup$weekly_mosaic_dir daily_vals_dir <- setup$daily_ci_vals_dir reports_dir <- setup$kpi_reports_dir data_dir <- setup$data_dir tryCatch({ source(here("r_app", "30_growth_model_utils.R")) }, error = function(e) { warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data") }) # CONDITIONAL EXECUTION BASED ON CLIENT TYPE # ============================================ if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) { # WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility message("\n", strrep("=", 70)) message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)") message(strrep("=", 70)) # Prepare outputs and inputs for KPI calculation (already created by setup_project_directories) reports_dir_kpi <- file.path(setup$reports_dir, "kpis") cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir # Load field boundaries for workflow (use data_dir from setup) message("\nLoading field boundaries for KPI calculation...") tryCatch({ boundaries_result <- load_field_boundaries(setup$data_dir) if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { field_boundaries_sf <- boundaries_result$field_boundaries_sf } else { field_boundaries_sf <- boundaries_result } if (nrow(field_boundaries_sf) == 0) { stop("No fields loaded from boundaries") } message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields")) }, error = function(e) { stop("ERROR loading field boundaries: ", e$message) }) # Load harvesting data for yield prediction (using common helper function) harvesting_data <- load_harvest_data(setup$data_dir) # Extract current week/year from end_date current_week <- as.numeric(format(end_date, "%V")) current_year <- as.numeric(format(end_date, "%G")) # Call with correct signature kpi_results <- calculate_all_field_analysis_agronomic_support( field_boundaries_sf = field_boundaries_sf, current_week = current_week, current_year = current_year, current_mosaic_dir = setup$weekly_mosaic_dir, previous_mosaic_dir = NULL, ci_rds_path = NULL, harvesting_data = harvesting_data, output_dir = reports_dir_kpi, data_dir = setup$data_dir, project_dir = project_dir ) # Extract results field_analysis_df <- kpi_results$field_analysis_df export_paths <- kpi_results$export_paths cat("\n=== KPI CALCULATION COMPLETE ===\n") cat("Summary tables saved for Script 90 integration\n") cat("Output directory:", reports_dir_kpi, "\n\n") } else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) { # CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment message("\n", strrep("=", 70)) message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message(strrep("=", 70)) # Define variables needed for workflow functions data_dir <- setup$data_dir # Load field boundaries for workflow (use data_dir from setup) message("\nLoading field boundaries for KPI calculation...") tryCatch({ boundaries_result <- load_field_boundaries(setup$data_dir) if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { field_boundaries_sf <- boundaries_result$field_boundaries_sf } else { field_boundaries_sf <- boundaries_result } if (nrow(field_boundaries_sf) == 0) { stop("No fields loaded from boundaries") } message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields")) }, error = function(e) { stop("ERROR loading field boundaries: ", e$message) }) # Load harvesting data for yield prediction (using common helper function) harvesting_data <- load_harvest_data(setup$data_dir) # Extract current week/year from end_date current_week <- as.numeric(format(end_date, "%V")) current_year <- as.numeric(format(end_date, "%G")) # Call the main orchestrator function from kpi_calculation_utils.R workflow_results <- calculate_field_analysis_cane_supply( setup = setup, client_config = client_config, end_date = end_date, project_dir = project_dir, weekly_mosaic = weekly_mosaic, daily_vals_dir = daily_vals_dir, field_boundaries_sf = field_boundaries_sf, data_dir = data_dir ) # Extract results field_analysis_df <- workflow_results$field_analysis_df farm_kpi_results <- workflow_results$farm_kpi_results export_paths <- workflow_results$export_paths } else { # Unknown client type - log warning and exit warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "") cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n") cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n") } } if (sys.nframe() == 0) { main() }