460 lines
18 KiB
R
460 lines
18 KiB
R
# ============================================================================
|
||
# SCRIPT 80: Key Performance Indicator (KPI) Calculation
|
||
# ============================================================================
|
||
# PURPOSE:
|
||
# Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes
|
||
# field uniformity, growth trends (4-week and 8-week), area change detection,
|
||
# TCH forecasts, stress identification, and weed presence. Generates
|
||
# comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting.
|
||
#
|
||
# INPUT DATA:
|
||
# - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif
|
||
# - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
|
||
# - Source 3: Historical field stats (RDS from previous weeks)
|
||
#
|
||
# OUTPUT DATA:
|
||
# - Destination: laravel_app/storage/app/{project}/output/
|
||
# - Format: Excel (.xlsx), CSV (.csv), RDS (.rds)
|
||
# - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata
|
||
#
|
||
# USAGE:
|
||
# Rscript 80_calculate_kpis.R [project] [week] [year]
|
||
#
|
||
# Example (Windows PowerShell):
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
|
||
#
|
||
# PARAMETERS:
|
||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
||
# - week: ISO week number (numeric, 1-53, default current week)
|
||
# - year: ISO year (numeric, default current year)
|
||
#
|
||
# CLIENT TYPES:
|
||
# - cane_supply: Yes - uses 80_utils_cane_supply.R (placeholder)
|
||
# - agronomic_support: Yes - uses 80_utils_agronomic_support.R (6 KPI funcs)
|
||
#
|
||
# DEPENDENCIES:
|
||
# - Packages: terra, sf, tidyverse, lubridate, writexl, spdep
|
||
# - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R
|
||
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
|
||
# - Input data: Weekly mosaic TIFFs (Script 40 output)
|
||
# - Data directories: weekly_mosaic/, output/ (created if missing)
|
||
#
|
||
# NOTES:
|
||
# - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast,
|
||
# Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling %
|
||
# - Client-aware: Conditional sourcing based on client_config$script_90_compatible
|
||
# - Exports: 21-column output with field-level metrics, phase, status, alerts
|
||
# - Supports 4-week and 8-week trend analysis from historical RDS cache
|
||
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
|
||
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
|
||
#
|
||
#
|
||
# ============================================================================
|
||
# [✓] Extract planting dates per field
|
||
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
|
||
#
|
||
# COMMAND-LINE USAGE:
|
||
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
|
||
# Arguments: [end_date] [project_dir]
|
||
#
|
||
# Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
|
||
# Arguments: [end_date] [project_dir] [offset_days]
|
||
#
|
||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7
|
||
#
|
||
# Usage in run_full_pipeline.R:
|
||
# source("r_app/80_calculate_kpis.R")
|
||
# main()
|
||
|
||
# ============================================================================
|
||
# NEXT INTEGRATIONS (See Linear issues for detailed requirements)
|
||
# ============================================================================
|
||
# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
|
||
# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
|
||
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
|
||
# ============================================================================
|
||
|
||
# ============================================================================
|
||
# CONFIGURATION
|
||
# ============================================================================
|
||
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
|
||
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
|
||
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
|
||
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
|
||
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
|
||
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
|
||
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5
|
||
|
||
# CV TREND THRESHOLDS
|
||
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
|
||
|
||
# CV_TREND_LONG_TERM (8-WEEK SLOPE) INTERPRETATION THRESHOLDS
|
||
# Interpretation: Slope of CV over 8 weeks indicates field uniformity trend
|
||
# Negative slope = CV decreasing = field becoming MORE uniform = GOOD
|
||
# Positive slope = CV increasing = field becoming MORE patchy = BAD
|
||
# Near zero = Homogenous growth (all crops progressing equally)
|
||
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 # CV decreasing rapidly (>3% drop over 8 weeks)
|
||
CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # CV decreasing (2-3% drop over 8 weeks)
|
||
CV_SLOPE_IMPROVEMENT_MAX <- -0.01 # Gradual improvement (1-2% drop over 8 weeks)
|
||
CV_SLOPE_HOMOGENOUS_MIN <- -0.01 # Essentially stable (small natural variation)
|
||
CV_SLOPE_HOMOGENOUS_MAX <- 0.01 # No change in uniformity (within ±1% over 8 weeks)
|
||
CV_SLOPE_PATCHINESS_MIN <- 0.01 # Minor divergence (1-2% increase over 8 weeks)
|
||
CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness (2-3% increase over 8 weeks)
|
||
CV_SLOPE_SEVERE_MIN <- 0.02 # Severe fragmentation (>3% increase over 8 weeks)
|
||
|
||
|
||
# PERCENTILE CALCULATIONS
|
||
CI_PERCENTILE_LOW <- 0.10
|
||
CI_PERCENTILE_HIGH <- 0.90
|
||
|
||
# GERMINATION THRESHOLD (for germination_progress calculation)
|
||
GERMINATION_CI_THRESHOLD <- 2.0
|
||
|
||
# PLANTING DATE & AGE CONFIGURATION
|
||
# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates
|
||
|
||
# HISTORICAL DATA LOOKBACK
|
||
WEEKS_FOR_FOUR_WEEK_TREND <- 4
|
||
WEEKS_FOR_CV_TREND_SHORT <- 2
|
||
WEEKS_FOR_CV_TREND_LONG <- 8
|
||
|
||
# ============================================================================
|
||
# 1. Load required libraries
|
||
# ============================================================================
|
||
|
||
suppressPackageStartupMessages({
|
||
# File path handling
|
||
library(here) # For relative path resolution (platform-independent file paths)
|
||
|
||
# Spatial data handling
|
||
library(sf) # For reading/manipulating field boundaries (GeoJSON)
|
||
library(terra) # For raster operations (reading mosaic TIFFs)
|
||
|
||
# Data manipulation
|
||
library(dplyr) # For data wrangling (filter, mutate, group_by, summarize)
|
||
library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather)
|
||
library(lubridate) # For date/time operations (week extraction, date arithmetic)
|
||
|
||
# File I/O
|
||
library(readr) # For reading CSV files (harvest predictions from Python)
|
||
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
|
||
library(writexl) # For writing Excel outputs (KPI summary tables)
|
||
library(progress) # For progress bars during field processing
|
||
|
||
# ML models (for yield prediction KPI)
|
||
library(caret) # For training Random Forest with cross-validation
|
||
library(CAST) # For Forward Feature Selection in caret models
|
||
|
||
# ML models (for yield prediction KPI)
|
||
library(caret) # For training Random Forest with cross-validation
|
||
library(CAST) # For Forward Feature Selection in caret models
|
||
|
||
|
||
})
|
||
|
||
# ============================================================================
|
||
# LOAD CONFIGURATION - MUST BE DONE FIRST
|
||
# ============================================================================
|
||
# Parameters must be loaded early to determine client type and paths
|
||
|
||
tryCatch({
|
||
source(here("r_app", "parameters_project.R"))
|
||
}, error = function(e) {
|
||
stop("Error loading parameters_project.R: ", e$message)
|
||
})
|
||
|
||
# Get client configuration from global project setup
|
||
# NOTE: This cannot be done until parameters_project.R is sourced
|
||
# We determine client_type from the current project_dir (if running in main() context)
|
||
# For now, set a placeholder that will be overridden in main()
|
||
if (exists("project_dir", envir = .GlobalEnv)) {
|
||
temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv))
|
||
} else {
|
||
temp_client_type <- "cane_supply" # Safe default
|
||
}
|
||
temp_client_config <- get_client_kpi_config(temp_client_type)
|
||
|
||
# ============================================================================
|
||
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
|
||
# ============================================================================
|
||
# All clients use the common utilities (shared statistical functions, reporting)
|
||
tryCatch({
|
||
source(here("r_app", "80_utils_common.R"))
|
||
}, error = function(e) {
|
||
stop("Error loading 80_utils_common.R: ", e$message)
|
||
})
|
||
|
||
# Load both client-specific utilities (functions will be available for both workflows)
|
||
# This avoids needing to determine client type at startup time
|
||
message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...")
|
||
tryCatch({
|
||
source(here("r_app", "80_utils_agronomic_support.R"))
|
||
}, error = function(e) {
|
||
stop("Error loading 80_utils_agronomic_support.R: ", e$message)
|
||
})
|
||
|
||
tryCatch({
|
||
source(here("r_app", "80_utils_cane_supply.R"))
|
||
}, error = function(e) {
|
||
stop("Error loading 80_utils_cane_supply.R: ", e$message)
|
||
})
|
||
|
||
# ============================================================================
|
||
# PHASE AND STATUS TRIGGER DEFINITIONS
|
||
# ============================================================================
|
||
|
||
PHASE_DEFINITIONS <- data.frame(
|
||
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
|
||
age_start = c(0, 4, 17, 39),
|
||
age_end = c(6, 16, 39, 200),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
STATUS_TRIGGERS <- data.frame(
|
||
trigger = c(
|
||
"germination_started",
|
||
"germination_complete",
|
||
"stress_detected_whole_field",
|
||
"strong_recovery",
|
||
"growth_on_track",
|
||
"maturation_progressing",
|
||
"harvest_ready"
|
||
),
|
||
age_min = c(0, 0, NA, NA, 4, 39, 45),
|
||
age_max = c(6, 6, NA, NA, 39, 200, 200),
|
||
description = c(
|
||
"10% of field CI > 2",
|
||
"70% of field CI >= 2",
|
||
"CI decline > -1.5 + low CV",
|
||
"CI increase > +1.5",
|
||
"CI increasing consistently",
|
||
"High CI, stable/declining",
|
||
"Age 45+ weeks (ready to harvest)"
|
||
),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
|
||
|
||
main <- function() {
|
||
# Parse command-line arguments
|
||
args <- commandArgs(trailingOnly = TRUE)
|
||
|
||
# end_date (arg 1)
|
||
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
|
||
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
|
||
# Parse date explicitly in YYYY-MM-DD format from command line
|
||
as.Date(args[1], format = "%Y-%m-%d")
|
||
} else if (exists("end_date", envir = .GlobalEnv)) {
|
||
global_date <- get("end_date", envir = .GlobalEnv)
|
||
# Check if it's a valid Date with length > 0
|
||
if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) {
|
||
global_date
|
||
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
||
as.Date(get("end_date_str", envir = .GlobalEnv))
|
||
} else {
|
||
Sys.Date()
|
||
}
|
||
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
||
as.Date(get("end_date_str", envir = .GlobalEnv))
|
||
} else {
|
||
Sys.Date()
|
||
}
|
||
|
||
# project_dir (arg 2)
|
||
project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
|
||
as.character(args[2])
|
||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
||
get("project_dir", envir = .GlobalEnv)
|
||
} else {
|
||
"angata"
|
||
}
|
||
|
||
# offset (arg 3) - for backward compatibility with old 09
|
||
offset <- if (length(args) >= 3 && !is.na(args[3])) {
|
||
as.numeric(args[3])
|
||
} else {
|
||
7
|
||
}
|
||
|
||
# Validate end_date is a proper Date object
|
||
if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) {
|
||
stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date))
|
||
}
|
||
|
||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||
assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
|
||
|
||
message("\n", strrep("=", 70))
|
||
message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
|
||
message(strrep("=", 70))
|
||
message("Date:", format(end_date, "%Y-%m-%d"))
|
||
message("Project:", project_dir)
|
||
message("Mode: Conditional KPI calculation based on client type")
|
||
message("")
|
||
|
||
# Load configuration and utilities
|
||
# source(here("r_app", "crop_messaging_utils.R"))
|
||
|
||
tryCatch({
|
||
source(here("r_app", "parameters_project.R"))
|
||
}, error = function(e) {
|
||
stop("Error loading parameters_project.R: ", e$message)
|
||
})
|
||
|
||
# Initialize project directories and configuration
|
||
setup <- setup_project_directories(project_dir)
|
||
|
||
# DETERMINE CLIENT TYPE AND KPI CONFIGURATION
|
||
client_type <- get_client_type(project_dir)
|
||
client_config <- get_client_kpi_config(client_type)
|
||
|
||
# Assign to global environment so utilities and downstream scripts can access it
|
||
assign("client_config", client_config, envir = .GlobalEnv)
|
||
|
||
message("Client Type:", client_type)
|
||
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
|
||
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
|
||
|
||
# Use centralized paths from setup object (no need for file.path calls)
|
||
weekly_mosaic <- setup$weekly_mosaic_dir
|
||
daily_vals_dir <- setup$daily_ci_vals_dir
|
||
reports_dir <- setup$kpi_reports_dir
|
||
data_dir <- setup$data_dir
|
||
|
||
|
||
tryCatch({
|
||
source(here("r_app", "30_growth_model_utils.R"))
|
||
}, error = function(e) {
|
||
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
|
||
})
|
||
|
||
# CONDITIONAL EXECUTION BASED ON CLIENT TYPE
|
||
# ============================================
|
||
|
||
if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
|
||
# WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
|
||
message("\n", strrep("=", 70))
|
||
message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
|
||
message(strrep("=", 70))
|
||
|
||
# Prepare outputs and inputs for KPI calculation (already created by setup_project_directories)
|
||
reports_dir_kpi <- file.path(setup$reports_dir, "kpis")
|
||
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
|
||
|
||
# Load field boundaries for workflow (use data_dir from setup)
|
||
message("\nLoading field boundaries for KPI calculation...")
|
||
tryCatch({
|
||
boundaries_result <- load_field_boundaries(setup$data_dir)
|
||
|
||
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
||
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
||
} else {
|
||
field_boundaries_sf <- boundaries_result
|
||
}
|
||
|
||
if (nrow(field_boundaries_sf) == 0) {
|
||
stop("No fields loaded from boundaries")
|
||
}
|
||
|
||
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
|
||
}, error = function(e) {
|
||
stop("ERROR loading field boundaries: ", e$message)
|
||
})
|
||
|
||
# Load harvesting data for yield prediction (using common helper function)
|
||
harvesting_data <- load_harvest_data(setup$data_dir)
|
||
|
||
# Extract current week/year from end_date
|
||
current_week <- as.numeric(format(end_date, "%V"))
|
||
current_year <- as.numeric(format(end_date, "%G"))
|
||
|
||
# Call with correct signature
|
||
kpi_results <- calculate_all_field_analysis_agronomic_support(
|
||
field_boundaries_sf = field_boundaries_sf,
|
||
current_week = current_week,
|
||
current_year = current_year,
|
||
current_mosaic_dir = setup$weekly_mosaic_dir,
|
||
previous_mosaic_dir = NULL,
|
||
ci_rds_path = NULL,
|
||
harvesting_data = harvesting_data,
|
||
output_dir = reports_dir_kpi,
|
||
data_dir = setup$data_dir,
|
||
project_dir = project_dir
|
||
)
|
||
|
||
# Extract results
|
||
field_analysis_df <- kpi_results$field_analysis_df
|
||
export_paths <- kpi_results$export_paths
|
||
|
||
cat("\n=== KPI CALCULATION COMPLETE ===\n")
|
||
cat("Summary tables saved for Script 90 integration\n")
|
||
cat("Output directory:", reports_dir_kpi, "\n\n")
|
||
|
||
} else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
|
||
# CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
|
||
message("\n", strrep("=", 70))
|
||
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
||
message(strrep("=", 70))
|
||
|
||
# Define variables needed for workflow functions
|
||
data_dir <- setup$data_dir
|
||
|
||
# Load field boundaries for workflow (use data_dir from setup)
|
||
message("\nLoading field boundaries for KPI calculation...")
|
||
tryCatch({
|
||
boundaries_result <- load_field_boundaries(setup$data_dir)
|
||
|
||
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
||
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
||
} else {
|
||
field_boundaries_sf <- boundaries_result
|
||
}
|
||
|
||
if (nrow(field_boundaries_sf) == 0) {
|
||
stop("No fields loaded from boundaries")
|
||
}
|
||
|
||
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
|
||
}, error = function(e) {
|
||
stop("ERROR loading field boundaries: ", e$message)
|
||
})
|
||
|
||
# Load harvesting data for yield prediction (using common helper function)
|
||
harvesting_data <- load_harvest_data(setup$data_dir)
|
||
|
||
# Extract current week/year from end_date
|
||
current_week <- as.numeric(format(end_date, "%V"))
|
||
current_year <- as.numeric(format(end_date, "%G"))
|
||
|
||
|
||
# Call the main orchestrator function from kpi_calculation_utils.R
|
||
workflow_results <- calculate_field_analysis_cane_supply(
|
||
setup = setup,
|
||
client_config = client_config,
|
||
end_date = end_date,
|
||
project_dir = project_dir,
|
||
weekly_mosaic = weekly_mosaic,
|
||
daily_vals_dir = daily_vals_dir,
|
||
field_boundaries_sf = field_boundaries_sf,
|
||
data_dir = data_dir
|
||
)
|
||
|
||
# Extract results
|
||
field_analysis_df <- workflow_results$field_analysis_df
|
||
farm_kpi_results <- workflow_results$farm_kpi_results
|
||
export_paths <- workflow_results$export_paths
|
||
|
||
} else {
|
||
# Unknown client type - log warning and exit
|
||
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
|
||
cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
|
||
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
|
||
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
|
||
}
|
||
}
|
||
|
||
if (sys.nframe() == 0) {
|
||
main()
|
||
}
|