SmartCane/r_app/80_calculate_kpis.R

460 lines
18 KiB
R
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ============================================================================
# SCRIPT 80: Key Performance Indicator (KPI) Calculation
# ============================================================================
# PURPOSE:
# Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes
# field uniformity, growth trends (4-week and 8-week), area change detection,
# TCH forecasts, stress identification, and weed presence. Generates
# comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting.
#
# INPUT DATA:
# - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif
# - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
# - Source 3: Historical field stats (RDS from previous weeks)
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/output/
# - Format: Excel (.xlsx), CSV (.csv), RDS (.rds)
# - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata
#
# USAGE:
# Rscript 80_calculate_kpis.R [project] [week] [year]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
# - week: ISO week number (numeric, 1-53, default current week)
# - year: ISO year (numeric, default current year)
#
# CLIENT TYPES:
# - cane_supply: Yes - uses 80_utils_cane_supply.R (placeholder)
# - agronomic_support: Yes - uses 80_utils_agronomic_support.R (6 KPI funcs)
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse, lubridate, writexl, spdep
# - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
# - Input data: Weekly mosaic TIFFs (Script 40 output)
# - Data directories: weekly_mosaic/, output/ (created if missing)
#
# NOTES:
# - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast,
# Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling %
# - Client-aware: Conditional sourcing based on client_config$script_90_compatible
# - Exports: 21-column output with field-level metrics, phase, status, alerts
# - Supports 4-week and 8-week trend analysis from historical RDS cache
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
#
#
# ============================================================================
# [✓] Extract planting dates per field
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
#
# COMMAND-LINE USAGE:
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
# Arguments: [end_date] [project_dir]
#
# Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
# Arguments: [end_date] [project_dir] [offset_days]
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7
#
# Usage in run_full_pipeline.R:
# source("r_app/80_calculate_kpis.R")
# main()
# ============================================================================
# NEXT INTEGRATIONS (See Linear issues for detailed requirements)
# ============================================================================
# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
# ============================================================================
# ============================================================================
# CONFIGURATION
# ============================================================================
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5
# CV TREND THRESHOLDS
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
# CV_TREND_LONG_TERM (8-WEEK SLOPE) INTERPRETATION THRESHOLDS
# Interpretation: Slope of CV over 8 weeks indicates field uniformity trend
# Negative slope = CV decreasing = field becoming MORE uniform = GOOD
# Positive slope = CV increasing = field becoming MORE patchy = BAD
# Near zero = Homogenous growth (all crops progressing equally)
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 # CV decreasing rapidly (>3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # CV decreasing (2-3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MAX <- -0.01 # Gradual improvement (1-2% drop over 8 weeks)
CV_SLOPE_HOMOGENOUS_MIN <- -0.01 # Essentially stable (small natural variation)
CV_SLOPE_HOMOGENOUS_MAX <- 0.01 # No change in uniformity (within ±1% over 8 weeks)
CV_SLOPE_PATCHINESS_MIN <- 0.01 # Minor divergence (1-2% increase over 8 weeks)
CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness (2-3% increase over 8 weeks)
CV_SLOPE_SEVERE_MIN <- 0.02 # Severe fragmentation (>3% increase over 8 weeks)
# PERCENTILE CALCULATIONS
CI_PERCENTILE_LOW <- 0.10
CI_PERCENTILE_HIGH <- 0.90
# GERMINATION THRESHOLD (for germination_progress calculation)
GERMINATION_CI_THRESHOLD <- 2.0
# PLANTING DATE & AGE CONFIGURATION
# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates
# HISTORICAL DATA LOOKBACK
WEEKS_FOR_FOUR_WEEK_TREND <- 4
WEEKS_FOR_CV_TREND_SHORT <- 2
WEEKS_FOR_CV_TREND_LONG <- 8
# ============================================================================
# 1. Load required libraries
# ============================================================================
suppressPackageStartupMessages({
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Spatial data handling
library(sf) # For reading/manipulating field boundaries (GeoJSON)
library(terra) # For raster operations (reading mosaic TIFFs)
# Data manipulation
library(dplyr) # For data wrangling (filter, mutate, group_by, summarize)
library(tidyr) # For data reshaping (pivot_longer, pivot_wider, gather)
library(lubridate) # For date/time operations (week extraction, date arithmetic)
# File I/O
library(readr) # For reading CSV files (harvest predictions from Python)
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
library(writexl) # For writing Excel outputs (KPI summary tables)
library(progress) # For progress bars during field processing
# ML/Analysis (optional - only for harvest model inference)
tryCatch({
library(torch) # For PyTorch model inference (harvest readiness prediction)
}, error = function(e) {
message("Note: torch package not available - harvest model inference will be skipped")
})
})
# ============================================================================
# LOAD CONFIGURATION - MUST BE DONE FIRST
# ============================================================================
# Parameters must be loaded early to determine client type and paths
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Get client configuration from global project setup
# NOTE: This cannot be done until parameters_project.R is sourced
# We determine client_type from the current project_dir (if running in main() context)
# For now, set a placeholder that will be overridden in main()
if (exists("project_dir", envir = .GlobalEnv)) {
temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv))
} else {
temp_client_type <- "cane_supply" # Safe default
}
temp_client_config <- get_client_kpi_config(temp_client_type)
# ============================================================================
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
# ============================================================================
# All clients use the common utilities (shared statistical functions, reporting)
tryCatch({
source(here("r_app", "80_utils_common.R"))
}, error = function(e) {
stop("Error loading 80_utils_common.R: ", e$message)
})
# Load both client-specific utilities (functions will be available for both workflows)
# This avoids needing to determine client type at startup time
message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...")
tryCatch({
source(here("r_app", "80_utils_agronomic_support.R"))
}, error = function(e) {
stop("Error loading 80_utils_agronomic_support.R: ", e$message)
})
tryCatch({
source(here("r_app", "80_utils_cane_supply.R"))
}, error = function(e) {
stop("Error loading 80_utils_cane_supply.R: ", e$message)
})
# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
# ============================================================================
PHASE_DEFINITIONS <- data.frame(
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
age_start = c(0, 4, 17, 39),
age_end = c(6, 16, 39, 200),
stringsAsFactors = FALSE
)
STATUS_TRIGGERS <- data.frame(
trigger = c(
"germination_started",
"germination_complete",
"stress_detected_whole_field",
"strong_recovery",
"growth_on_track",
"maturation_progressing",
"harvest_ready"
),
age_min = c(0, 0, NA, NA, 4, 39, 45),
age_max = c(6, 6, NA, NA, 39, 200, 200),
description = c(
"10% of field CI > 2",
"70% of field CI >= 2",
"CI decline > -1.5 + low CV",
"CI increase > +1.5",
"CI increasing consistently",
"High CI, stable/declining",
"Age 45+ weeks (ready to harvest)"
),
stringsAsFactors = FALSE
)
main <- function() {
# Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
# end_date (arg 1)
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
# Parse date explicitly in YYYY-MM-DD format from command line
as.Date(args[1], format = "%Y-%m-%d")
} else if (exists("end_date", envir = .GlobalEnv)) {
global_date <- get("end_date", envir = .GlobalEnv)
# Check if it's a valid Date with length > 0
if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) {
global_date
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
Sys.Date()
}
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
Sys.Date()
}
# project_dir (arg 2)
project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
as.character(args[2])
} else if (exists("project_dir", envir = .GlobalEnv)) {
get("project_dir", envir = .GlobalEnv)
} else {
"angata"
}
# offset (arg 3) - for backward compatibility with old 09
offset <- if (length(args) >= 3 && !is.na(args[3])) {
as.numeric(args[3])
} else {
7
}
# Validate end_date is a proper Date object
if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) {
stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date))
}
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
message("\n", strrep("=", 70))
message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
message(strrep("=", 70))
message("Date:", format(end_date, "%Y-%m-%d"))
message("Project:", project_dir)
message("Mode: Conditional KPI calculation based on client type")
message("")
# Load configuration and utilities
# source(here("r_app", "crop_messaging_utils.R"))
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Initialize project directories and configuration
setup <- setup_project_directories(project_dir)
# DETERMINE CLIENT TYPE AND KPI CONFIGURATION
client_type <- get_client_type(project_dir)
client_config <- get_client_kpi_config(client_type)
# Assign to global environment so utilities and downstream scripts can access it
assign("client_config", client_config, envir = .GlobalEnv)
message("Client Type:", client_type)
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
# Use centralized paths from setup object (no need for file.path calls)
weekly_mosaic <- setup$weekly_mosaic_dir
daily_vals_dir <- setup$daily_ci_vals_dir
reports_dir <- setup$kpi_reports_dir
data_dir <- setup$data_dir
tryCatch({
source(here("r_app", "30_growth_model_utils.R"))
}, error = function(e) {
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
})
# CONDITIONAL EXECUTION BASED ON CLIENT TYPE
# ============================================
if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
# WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
message("\n", strrep("=", 70))
message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
message(strrep("=", 70))
# Prepare inputs for KPI calculation (already created by setup_project_directories)
reports_dir_kpi <- setup$kpi_reports_dir
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
# Load field boundaries for workflow (use data_dir from setup)
message("\nLoading field boundaries for KPI calculation...")
tryCatch({
boundaries_result <- load_field_boundaries(setup$data_dir)
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
field_boundaries_sf <- boundaries_result$field_boundaries_sf
} else {
field_boundaries_sf <- boundaries_result
}
if (nrow(field_boundaries_sf) == 0) {
stop("No fields loaded from boundaries")
}
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
}, error = function(e) {
stop("ERROR loading field boundaries: ", e$message)
})
# Load harvesting data
if (!exists("harvesting_data")) {
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
}
# Extract current week/year from end_date
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%G"))
# Call with correct signature
kpi_results <- calculate_all_field_analysis_agronomic_support(
field_boundaries_sf = field_boundaries_sf,
current_week = current_week,
current_year = current_year,
current_mosaic_dir = setup$weekly_mosaic_dir,
previous_mosaic_dir = NULL,
ci_rds_path = NULL,
harvesting_data = harvesting_data,
output_dir = reports_dir_kpi,
project_dir = project_dir
)
cat("\n=== KPI CALCULATION COMPLETE ===\n")
cat("Summary tables saved for Script 90 integration\n")
cat("Output directory:", reports_dir_kpi, "\n\n")
} else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
# CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
message("\n", strrep("=", 70))
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70))
# Define variables needed for workflow functions
reports_dir <- setup$kpi_reports_dir
data_dir <- setup$data_dir
# Load field boundaries for workflow (use data_dir from setup)
message("\nLoading field boundaries for KPI calculation...")
tryCatch({
boundaries_result <- load_field_boundaries(setup$data_dir)
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
field_boundaries_sf <- boundaries_result$field_boundaries_sf
} else {
field_boundaries_sf <- boundaries_result
}
if (nrow(field_boundaries_sf) == 0) {
stop("No fields loaded from boundaries")
}
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
}, error = function(e) {
stop("ERROR loading field boundaries: ", e$message)
})
# Load harvesting data
if (!exists("harvesting_data")) {
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
}
# Extract current week/year from end_date
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%G"))
# Call the main orchestrator function from kpi_calculation_utils.R
workflow_results <- calculate_field_analysis_cane_supply(
setup = setup,
client_config = client_config,
end_date = end_date,
project_dir = project_dir,
weekly_mosaic = weekly_mosaic,
daily_vals_dir = daily_vals_dir,
field_boundaries_sf = field_boundaries_sf,
data_dir = data_dir
)
# Extract results
field_analysis_df <- workflow_results$field_analysis_df
farm_kpi_results <- workflow_results$farm_kpi_results
export_paths <- workflow_results$export_paths
} else {
# Unknown client type - log warning and exit
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
cat("\n⚠ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
}
}
if (sys.nframe() == 0) {
main()
}