# ============================================================================ # SCRIPT 80: Key Performance Indicator (KPI) Calculation # ============================================================================ # PURPOSE: # Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes # field uniformity, growth trends (4-week and 8-week), area change detection, # TCH forecasts, stress identification, and weed presence. Generates # comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting. # # INPUT DATA: # - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif # - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx) # - Source 3: Historical field stats (RDS from previous weeks) # # OUTPUT DATA: # - Destination: laravel_app/storage/app/{project}/output/ # - Format: Excel (.xlsx), CSV (.csv), RDS (.rds) # - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata # # USAGE: # Rscript 80_calculate_kpis.R [project] [week] [year] # # Example (Windows PowerShell): # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026 # # PARAMETERS: # - project: Project name (character) - angata, chemba, xinavane, esa, simba # - week: ISO week number (numeric, 1-53, default current week) # - year: ISO year (numeric, default current year) # # CLIENT TYPES: # - cane_supply: Yes - uses 80_utils_cane_supply.R (placeholder) # - agronomic_support: Yes - uses 80_utils_agronomic_support.R (6 KPI funcs) # # DEPENDENCIES: # - Packages: terra, sf, tidyverse, lubridate, writexl, spdep # - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R # - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx) # - Input data: Weekly mosaic TIFFs (Script 40 output) # - Data directories: weekly_mosaic/, output/ (created if missing) # # NOTES: # - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast, # Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling % # - Client-aware: Conditional sourcing based on client_config$script_90_compatible # - Exports: 21-column output with field-level metrics, phase, status, alerts # - Supports 4-week and 8-week trend analysis from historical RDS cache # - Critical dependency for Scripts 90/91 (reporting/dashboards) # - Uses Moran's I for spatial clustering detection (weed/stress patterns) # # # ============================================================================ # [✓] Extract planting dates per field # [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks") # # COMMAND-LINE USAGE: # Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata # Arguments: [end_date] [project_dir] # # Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7 # Arguments: [end_date] [project_dir] [offset_days] # # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7 # # Usage in run_full_pipeline.R: # source("r_app/80_calculate_kpis.R") # main() # ============================================================================ # NEXT INTEGRATIONS (See Linear issues for detailed requirements) # ============================================================================ # 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv) # 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation # 3. [ ] Improve Status_trigger logic to use actual imminent_prob values # ============================================================================ # ============================================================================ # CONFIGURATION # ============================================================================ FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 # CV TREND THRESHOLDS CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05 # CV_TREND_LONG_TERM (8-WEEK SLOPE) INTERPRETATION THRESHOLDS # Interpretation: Slope of CV over 8 weeks indicates field uniformity trend # Negative slope = CV decreasing = field becoming MORE uniform = GOOD # Positive slope = CV increasing = field becoming MORE patchy = BAD # Near zero = Homogenous growth (all crops progressing equally) CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 # CV decreasing rapidly (>3% drop over 8 weeks) CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # CV decreasing (2-3% drop over 8 weeks) CV_SLOPE_IMPROVEMENT_MAX <- -0.01 # Gradual improvement (1-2% drop over 8 weeks) CV_SLOPE_HOMOGENOUS_MIN <- -0.01 # Essentially stable (small natural variation) CV_SLOPE_HOMOGENOUS_MAX <- 0.01 # No change in uniformity (within ±1% over 8 weeks) CV_SLOPE_PATCHINESS_MIN <- 0.01 # Minor divergence (1-2% increase over 8 weeks) CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness (2-3% increase over 8 weeks) CV_SLOPE_SEVERE_MIN <- 0.02 # Severe fragmentation (>3% increase over 8 weeks) # PERCENTILE CALCULATIONS CI_PERCENTILE_LOW <- 0.10 CI_PERCENTILE_HIGH <- 0.90 # GERMINATION THRESHOLD (for germination_progress calculation) GERMINATION_CI_THRESHOLD <- 2.0 # PLANTING DATE & AGE CONFIGURATION # Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates # HISTORICAL DATA LOOKBACK WEEKS_FOR_FOUR_WEEK_TREND <- 4 WEEKS_FOR_CV_TREND_SHORT <- 2 WEEKS_FOR_CV_TREND_LONG <- 8 # ============================================================================ # 1. Load required libraries # ============================================================================ suppressPackageStartupMessages({ # File path handling library(here) # For relative path resolution (platform-independent file paths) # Spatial data handling library(sf) # For reading/manipulating field boundaries (GeoJSON) library(terra) # For raster operations (reading mosaic TIFFs) # Data manipulation library(dplyr) # For data wrangling (filter, mutate, group_by, summarize) library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather) library(lubridate) # For date/time operations (week extraction, date arithmetic) # File I/O library(readr) # For reading CSV files (harvest predictions from Python) library(readxl) # For reading harvest.xlsx (harvest dates for field mapping) library(writexl) # For writing Excel outputs (KPI summary tables) library(progress) # For progress bars during field processing # Note: ML model inference is handled via Python/PyTorch (conda pytorch_gpu) # No R torch package needed }) # ============================================================================ # LOAD CONFIGURATION - MUST BE DONE FIRST # ============================================================================ # Parameters must be loaded early to determine client type and paths tryCatch({ source(here("r_app", "parameters_project.R")) }, error = function(e) { stop("Error loading parameters_project.R: ", e$message) }) # Get client configuration from global project setup # NOTE: This cannot be done until parameters_project.R is sourced # We determine client_type from the current project_dir (if running in main() context) # For now, set a placeholder that will be overridden in main() if (exists("project_dir", envir = .GlobalEnv)) { temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv)) } else { temp_client_type <- "cane_supply" # Safe default } temp_client_config <- get_client_kpi_config(temp_client_type) # ============================================================================ # LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES # ============================================================================ # All clients use the common utilities (shared statistical functions, reporting) tryCatch({ source(here("r_app", "80_utils_common.R")) }, error = function(e) { stop("Error loading 80_utils_common.R: ", e$message) }) # Load both client-specific utilities (functions will be available for both workflows) # This avoids needing to determine client type at startup time message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...") tryCatch({ source(here("r_app", "80_utils_agronomic_support.R")) }, error = function(e) { stop("Error loading 80_utils_agronomic_support.R: ", e$message) }) tryCatch({ source(here("r_app", "80_utils_cane_supply.R")) }, error = function(e) { stop("Error loading 80_utils_cane_supply.R: ", e$message) }) # ============================================================================ # PHASE AND STATUS TRIGGER DEFINITIONS # ============================================================================ PHASE_DEFINITIONS <- data.frame( phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), age_start = c(0, 4, 17, 39), age_end = c(6, 16, 39, 200), stringsAsFactors = FALSE ) STATUS_TRIGGERS <- data.frame( trigger = c( "germination_started", "germination_complete", "stress_detected_whole_field", "strong_recovery", "growth_on_track", "maturation_progressing", "harvest_ready" ), age_min = c(0, 0, NA, NA, 4, 39, 45), age_max = c(6, 6, NA, NA, 39, 200, 200), description = c( "10% of field CI > 2", "70% of field CI >= 2", "CI decline > -1.5 + low CV", "CI increase > +1.5", "CI increasing consistently", "High CI, stable/declining", "Age 45+ weeks (ready to harvest)" ), stringsAsFactors = FALSE ) main <- function() { # Parse command-line arguments args <- commandArgs(trailingOnly = TRUE) # end_date (arg 1) # Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date() end_date <- if (length(args) >= 1 && !is.na(args[1])) { # Parse date explicitly in YYYY-MM-DD format from command line as.Date(args[1], format = "%Y-%m-%d") } else if (exists("end_date", envir = .GlobalEnv)) { global_date <- get("end_date", envir = .GlobalEnv) # Check if it's a valid Date with length > 0 if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) { global_date } else if (exists("end_date_str", envir = .GlobalEnv)) { as.Date(get("end_date_str", envir = .GlobalEnv)) } else { Sys.Date() } } else if (exists("end_date_str", envir = .GlobalEnv)) { as.Date(get("end_date_str", envir = .GlobalEnv)) } else { Sys.Date() } # project_dir (arg 2) project_dir <- if (length(args) >= 2 && !is.na(args[2])) { as.character(args[2]) } else if (exists("project_dir", envir = .GlobalEnv)) { get("project_dir", envir = .GlobalEnv) } else { "angata" } # offset (arg 3) - for backward compatibility with old 09 offset <- if (length(args) >= 3 && !is.na(args[3])) { as.numeric(args[3]) } else { 7 } # Validate end_date is a proper Date object if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) { stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date)) } assign("project_dir", project_dir, envir = .GlobalEnv) assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv) message("\n", strrep("=", 70)) message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION") message(strrep("=", 70)) message("Date:", format(end_date, "%Y-%m-%d")) message("Project:", project_dir) message("Mode: Conditional KPI calculation based on client type") message("") # Load configuration and utilities # source(here("r_app", "crop_messaging_utils.R")) tryCatch({ source(here("r_app", "parameters_project.R")) }, error = function(e) { stop("Error loading parameters_project.R: ", e$message) }) # Initialize project directories and configuration setup <- setup_project_directories(project_dir) # DETERMINE CLIENT TYPE AND KPI CONFIGURATION client_type <- get_client_type(project_dir) client_config <- get_client_kpi_config(client_type) # Assign to global environment so utilities and downstream scripts can access it assign("client_config", client_config, envir = .GlobalEnv) message("Client Type:", client_type) message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", ")) message("Output Formats:", paste(client_config$outputs, collapse = ", ")) # Use centralized paths from setup object (no need for file.path calls) weekly_mosaic <- setup$weekly_mosaic_dir daily_vals_dir <- setup$daily_ci_vals_dir tryCatch({ source(here("r_app", "30_growth_model_utils.R")) }, error = function(e) { warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data") }) # CONDITIONAL EXECUTION BASED ON CLIENT TYPE # ============================================ if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) { # WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility message("\n", strrep("=", 70)) message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)") message(strrep("=", 70)) # Prepare inputs for KPI calculation (already created by setup_project_directories) reports_dir_kpi <- setup$kpi_reports_dir cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir # Load field boundaries for workflow (use data_dir from setup) message("\nLoading field boundaries for KPI calculation...") tryCatch({ boundaries_result <- load_field_boundaries(setup$data_dir) if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { field_boundaries_sf <- boundaries_result$field_boundaries_sf } else { field_boundaries_sf <- boundaries_result } if (nrow(field_boundaries_sf) == 0) { stop("No fields loaded from boundaries") } message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields")) }, error = function(e) { stop("ERROR loading field boundaries: ", e$message) }) # Load harvesting data if (!exists("harvesting_data")) { warning("harvesting_data not loaded. TCH KPI will use placeholder values.") harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) } # Extract current week/year from end_date current_week <- as.numeric(format(end_date, "%V")) current_year <- as.numeric(format(end_date, "%G")) # Call with correct signature kpi_results <- calculate_all_kpis( field_boundaries_sf = field_boundaries_sf, current_week = current_week, current_year = current_year, current_mosaic_dir = setup$weekly_mosaic_dir, previous_mosaic_dir = NULL, ci_rds_path = NULL, harvesting_data = harvesting_data, output_dir = reports_dir_kpi, project_dir = project_dir ) cat("\n=== KPI CALCULATION COMPLETE ===\n") cat("Summary tables saved for Script 90 integration\n") cat("Output directory:", reports_dir_kpi, "\n\n") } else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) { # CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment message("\n", strrep("=", 70)) message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message(strrep("=", 70)) # Set reports_dir for CANE_SUPPLY workflow (used by export functions) reports_dir <- setup$kpi_reports_dir data_dir <- setup$data_dir # Continue with existing per-field analysis code below message("\n", strrep("-", 70)) message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") message(strrep("-", 70)) weeks <- calculate_week_numbers(end_date) current_week <- weeks$current_week current_year <- weeks$current_year previous_week <- weeks$previous_week previous_year <- weeks$previous_year message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year)) # Find per-field weekly mosaics message("Finding per-field weekly mosaics...") if (!dir.exists(weekly_mosaic)) { stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, "\nScript 40 (mosaic creation) must be run first.")) } field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) field_dirs <- field_dirs[field_dirs != ""] if (length(field_dirs) == 0) { stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) } # Verify we have mosaics for this week single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) per_field_files <- c() for (field in field_dirs) { field_mosaic_dir <- file.path(weekly_mosaic, field) files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) if (length(files) > 0) { per_field_files <- c(per_field_files, files) } } if (length(per_field_files) == 0) { stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year, "\nExpected pattern:", single_file_pattern, "\nChecked:", weekly_mosaic)) } message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) mosaic_mode <- "per-field" mosaic_dir <- weekly_mosaic # Load field boundaries tryCatch({ boundaries_result <- load_field_boundaries(data_dir) if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { field_boundaries_sf <- boundaries_result$field_boundaries_sf } else { field_boundaries_sf <- boundaries_result } if (nrow(field_boundaries_sf) == 0) { stop("No fields loaded from boundaries") } message(paste(" Loaded", nrow(field_boundaries_sf), "fields")) }, error = function(e) { stop("ERROR loading field boundaries: ", e$message) }) message("Loading historical field data for trend calculations...") # Load up to 8 weeks (max of 4-week and 8-week trend requirements) # Function gracefully handles missing weeks and loads whatever exists num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8 message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data...")) # Only auto-generate on first call (not in recursive calls from within load_historical_field_data) allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir, num_weeks = num_weeks_to_load, auto_generate = allow_auto_gen, field_boundaries_sf = field_boundaries_sf, daily_vals_dir = daily_vals_dir) # Load harvest.xlsx for planting dates (season_start) message("\nLoading harvest data from harvest.xlsx for planting dates...") harvest_file_path <- file.path(data_dir, "harvest.xlsx") harvesting_data <- tryCatch({ if (file.exists(harvest_file_path)) { harvest_raw <- readxl::read_excel(harvest_file_path) harvest_raw$season_start <- as.Date(harvest_raw$season_start) harvest_raw$season_end <- as.Date(harvest_raw$season_end) message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) harvest_raw } else { message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) NULL } }, error = function(e) { message(paste(" ERROR loading harvest.xlsx:", e$message)) NULL }) planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) # Validate planting_dates if (is.null(planting_dates) || nrow(planting_dates) == 0) { message("WARNING: No planting dates available. Using NA for all fields.") planting_dates <- data.frame( field_id = field_boundaries_sf$field, planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), stringsAsFactors = FALSE ) } # Build per-field configuration message("\nPreparing mosaic configuration for statistics calculation...") message(" ✓ Using per-field mosaic architecture (1 TIFF per field)") # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif field_grid <- list( mosaic_dir = mosaic_dir, mode = "per-field" ) message("\nUsing modular RDS-based approach for weekly statistics...") # Load/calculate CURRENT week stats (from tiles or RDS cache) message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...") current_stats <- load_or_calculate_weekly_stats( week_num = current_week, year = current_year, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, mosaic_dir = field_grid$mosaic_dir, reports_dir = reports_dir, report_date = end_date ) message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week")) # Load/calculate PREVIOUS week stats (from RDS cache or tiles) message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") # Calculate report date for previous week (7 days before current) prev_report_date <- end_date - 7 prev_stats <- load_or_calculate_weekly_stats( week_num = previous_week, year = previous_year, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, mosaic_dir = field_grid$mosaic_dir, reports_dir = reports_dir, report_date = prev_report_date ) message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", "))) message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV)))) # Apply trend calculations (requires both weeks) message("\n3. Calculating trend columns...") current_stats <- calculate_kpi_trends(current_stats, prev_stats, project_dir = project_dir, reports_dir = reports_dir, current_week = current_week, year = current_year) message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) # Load weekly harvest probabilities from script 31 (if available) # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level) message("\n4. Loading harvest probabilities from script 31...") harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") harvest_prob_file <- file.path(harvest_prob_dir, sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) message(paste(" Looking for:", harvest_prob_file)) imminent_prob_data <- tryCatch({ if (file.exists(harvest_prob_file)) { prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE, col_types = readr::cols(.default = readr::col_character())) message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) prob_df %>% select(field, imminent_prob, detected_prob) %>% rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) } else { message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) NULL } }, error = function(e) { message(paste(" WARNING: Could not load harvest probabilities:", e$message)) NULL }) # ============================================================================ # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R) # ============================================================================ message("\nCalculating gap filling scores (2σ method)...") # Process per-field mosaics message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) process_gap_for_field <- function(field_file) { field_id <- basename(dirname(field_file)) field_bounds <- field_boundaries_by_id[[field_id]] if (is.null(field_bounds) || nrow(field_bounds) == 0) { return(data.frame(Field_id = field_id, gap_score = NA_real_)) } tryCatch({ field_raster <- terra::rast(field_file) ci_band_name <- "CI" if (!(ci_band_name %in% names(field_raster))) { return(data.frame(Field_id = field_id, gap_score = NA_real_)) } field_ci_band <- field_raster[[ci_band_name]] names(field_ci_band) <- "CI" gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { return(data.frame(Field_id = field_id, gap_score = NA_real_)) } gap_scores <- gap_result$field_results gap_scores$Field_id <- gap_scores$field gap_scores <- gap_scores[, c("Field_id", "gap_score")] stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) }, error = function(e) { message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) data.frame(Field_id = field_id, gap_score = NA_real_) }) } # Process fields sequentially with progress bar message(" Processing gap scores for ", length(per_field_files), " fields...") pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) results_list <- lapply(seq_along(per_field_files), function(idx) { result <- process_gap_for_field(per_field_files[[idx]]) utils::setTxtProgressBar(pb, idx) result }) close(pb) gap_scores_df <- dplyr::bind_rows(results_list) if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { gap_scores_df <- gap_scores_df %>% dplyr::group_by(Field_id) %>% dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) } else { message(" WARNING: No gap scores calculated from per-field mosaics") gap_scores_df <- NULL } # ============================================================================ # Build final output dataframe with all 22 columns (including Gap_score) # ============================================================================ message("\nBuilding final field analysis output...") # Pre-calculate acreages with geometry validation # This avoids geometry errors during field_analysis construction acreage_lookup <- tryCatch({ lookup_df <- field_boundaries_sf %>% sf::st_drop_geometry() %>% as.data.frame() %>% mutate( geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { tryCatch({ sf::st_is_valid(field_boundaries_sf[idx, ]) }, error = function(e) FALSE) }), area_ha = 0 ) # Calculate area for valid geometries for (idx in which(lookup_df$geometry_valid)) { tryCatch({ area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) lookup_df$area_ha[idx] <- area_m2 / 10000 }, error = function(e) { lookup_df$area_ha[idx] <<- NA_real_ }) } # Convert hectares to acres lookup_df %>% mutate(acreage = area_ha / 0.404686) %>% select(field, acreage) }, error = function(e) { message(paste("Warning: Could not calculate acreages from geometries -", e$message)) data.frame(field = character(0), acreage = numeric(0)) }) field_analysis_df <- current_stats %>% mutate( # Column 2: Farm_Section (user fills) Farm_Section = NA_character_, # Column 3: Field_name (from GeoJSON - already have Field_id, can look up) Field_name = Field_id, # Column 4: Acreage (calculate from geometry) Acreage = { acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] if_else(is.na(acreages_vec), 0, acreages_vec) }, # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change) # Column 7: Four_week_trend (from current_stats) # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start) Last_harvest_or_planting_date = { planting_dates$planting_date[match(Field_id, planting_dates$field_id)] }, # Column 9: Age_week (calculated from report date and planting date) Age_week = { sapply(seq_len(nrow(current_stats)), function(idx) { planting_dt <- Last_harvest_or_planting_date[idx] if (is.na(planting_dt)) { return(NA_real_) } round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0) }) }, # Column 10: Phase (recalculate based on updated Age_week) Phase = { sapply(Age_week, function(age) { if (is.na(age)) return(NA_character_) if (age >= 0 & age < 4) return("Germination") if (age >= 4 & age < 17) return("Tillering") if (age >= 17 & age < 39) return("Grand Growth") if (age >= 39) return("Maturation") NA_character_ }) }, # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends) # Column 12: Germination_progress (calculated here from CI values) # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) { if (is.na(pct)) return(NA_character_) if (pct >= 95) return("95-100%") else if (pct >= 90) return("90-95%") else if (pct >= 80) return("80-90%") else if (pct >= 70) return("70-80%") else if (pct >= 60) return("60-70%") else if (pct >= 50) return("50-60%") else if (pct >= 40) return("40-50%") else if (pct >= 30) return("30-40%") else if (pct >= 20) return("20-30%") else if (pct >= 10) return("10-20%") else return("0-10%") }), # Column 13: Imminent_prob (from script 31 or NA if not available) Imminent_prob = { if (!is.null(imminent_prob_data)) { imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] } else { rep(NA_real_, nrow(current_stats)) } }, # Column 14: Status_Alert (based on harvest probability + crop health status) # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA Status_Alert = { sapply(seq_len(nrow(current_stats)), function(idx) { imminent_prob <- Imminent_prob[idx] age_w <- Age_week[idx] weekly_ci_chg <- Weekly_ci_change[idx] mean_ci_val <- Mean_CI[idx] # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months) if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) { return("Ready for harvest-check") } # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5) if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) { return("Strong decline in crop health") } # Priority 3: Harvested/bare (Mean CI < 1.5) if (!is.na(mean_ci_val) && mean_ci_val < 1.5) { return("Harvested/bare") } # Fallback: no alert NA_character_ }) }, # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles) # Column 17: Already in current_stats (CV) # Column 18: Already in current_stats (CV_Trend_Short_Term) # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value) # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope) # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01) CV_Trend_Long_Term_Category = { sapply(current_stats$CV_Trend_Long_Term, function(slope) { if (is.na(slope)) { return(NA_character_) } else if (slope < -0.01) { return("More uniform") } else if (slope > 0.01) { return("Less uniform") } else { return("Stable uniformity") } }) }, # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category) # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) { if (is.na(pct)) return(NA_character_) if (pct >= 95) return("95-100%") else if (pct >= 90) return("90-95%") else if (pct >= 80) return("80-90%") else if (pct >= 70) return("70-80%") else if (pct >= 60) return("60-70%") else if (pct >= 50) return("50-60%") else if (pct >= 40) return("40-50%") else if (pct >= 30) return("30-40%") else if (pct >= 20) return("20-30%") else if (pct >= 10) return("10-20%") else return("0-10%") }), # Column 22: Gap_score (2σ below median - from kpi_utils.R) Gap_score = { if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { # Debug: Print first few gap scores message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)", nrow(gap_scores_df), paste(head(gap_scores_df$gap_score, 3), collapse=", "))) message(sprintf(" First 3 Field_ids in gap_scores_df: %s", paste(head(gap_scores_df$Field_id, 3), collapse=", "))) message(sprintf(" First 3 Field_ids in current_stats: %s", paste(head(current_stats$Field_id, 3), collapse=", "))) gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] } else { rep(NA_real_, nrow(current_stats)) } } ) %>% select( all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", "Last_harvest_or_planting_date", "Age_week", "Phase", "Germination_progress", "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) ) message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)")) export_paths <- export_field_analysis_excel( field_analysis_df, NULL, project_dir, current_week, current_year, reports_dir ) cat("\n--- Per-field Results (first 10) ---\n") available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") available_cols <- available_cols[available_cols %in% names(field_analysis_df)] if (length(available_cols) > 0) { print(head(field_analysis_df[, available_cols], 10)) } # ========== FARM-LEVEL KPI AGGREGATION ========== # Aggregate the per-field analysis into farm-level summary statistics cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") # Filter to only fields that have actual data (non-NA CI and valid acreage) field_data <- field_analysis_df %>% filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% filter(Acreage > 0) if (nrow(field_data) > 0) { if (nrow(field_data) > 0) { # Create summary statistics farm_summary <- list() # 1. PHASE DISTRIBUTION phase_dist <- field_data %>% group_by(Phase) %>% summarise( num_fields = n(), acreage = sum(Acreage, na.rm = TRUE), .groups = 'drop' ) %>% rename(Category = Phase) farm_summary$phase_distribution <- phase_dist # 2. STATUS ALERT DISTRIBUTION status_dist <- field_data %>% group_by(Status_Alert) %>% summarise( num_fields = n(), acreage = sum(Acreage, na.rm = TRUE), .groups = 'drop' ) %>% rename(Category = Status_Alert) farm_summary$status_distribution <- status_dist # 3. CLOUD COVERAGE DISTRIBUTION cloud_dist <- field_data %>% group_by(Cloud_category) %>% summarise( num_fields = n(), acreage = sum(Acreage, na.rm = TRUE), .groups = 'drop' ) %>% rename(Category = Cloud_category) farm_summary$cloud_distribution <- cloud_dist # 4. OVERALL STATISTICS farm_summary$overall_stats <- data.frame( total_fields = nrow(field_data), total_acreage = sum(field_data$Acreage, na.rm = TRUE), mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4), week = current_week, year = current_year, date = as.character(end_date) ) # Print summaries cat("\n--- PHASE DISTRIBUTION ---\n") print(phase_dist) cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") print(status_dist) cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") print(cloud_dist) cat("\n--- OVERALL FARM STATISTICS ---\n") print(farm_summary$overall_stats) farm_kpi_results <- farm_summary } else { farm_kpi_results <- NULL } } else { farm_kpi_results <- NULL } # ========== FINAL SUMMARY ========== cat("\n", strrep("=", 70), "\n") cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n") cat(strrep("=", 70), "\n") cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n") cat("Excel export:", export_paths$excel, "\n") cat("RDS export:", export_paths$rds, "\n") cat("CSV export:", export_paths$csv, "\n") if (!is.null(farm_kpi_results)) { cat("\nFarm-level KPIs: CALCULATED\n") } else { cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n") } cat("\n✓ Consolidated KPI calculation complete!\n") cat(" - Per-field data exported\n") cat(" - Farm-level KPIs calculated\n") cat(" - All outputs in:", reports_dir, "\n\n") } else { # Unknown client type - log warning and exit warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "") cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n") cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n") } } if (sys.nframe() == 0) { main() }