SmartCane/r_app/80_calculate_kpis.R

955 lines
37 KiB
R
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ============================================================================
# SCRIPT 80: Key Performance Indicator (KPI) Calculation
# ============================================================================
# PURPOSE:
# Calculate per-field and farm-level KPIs from weekly CI mosaics. Computes
# field uniformity, growth trends (4-week and 8-week), area change detection,
# TCH forecasts, stress identification, and weed presence. Generates
# comprehensive Excel/CSV/RDS exports for dashboards and stakeholder reporting.
#
# INPUT DATA:
# - Source 1: laravel_app/storage/app/{project}/weekly_mosaic/{FIELD}/week_*.tif
# - Source 2: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
# - Source 3: Historical field stats (RDS from previous weeks)
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/output/
# - Format: Excel (.xlsx), CSV (.csv), RDS (.rds)
# - Files: {project}_field_analysis_week{WW}_{YYYY}.xlsx + metadata
#
# USAGE:
# Rscript 80_calculate_kpis.R [project] [week] [year]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
# - week: ISO week number (numeric, 1-53, default current week)
# - year: ISO year (numeric, default current year)
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - uses 80_utils_cane_supply.R (placeholder)
# - agronomic_support (AURA): Yes - uses 80_utils_agronomic_support.R (6 KPI funcs)
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse, lubridate, writexl, spdep
# - Utils files: parameters_project.R, 00_common_utils.R, 80_utils_agronomic_support.R, 80_utils_cane_supply.R
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
# - Input data: Weekly mosaic TIFFs (Script 40 output)
# - Data directories: weekly_mosaic/, output/ (created if missing)
#
# NOTES:
# - KPIs calculated: Field Uniformity (CV), Area Change (pixel %), TCH Forecast,
# Growth/Decline Trend, Weed Presence (spatial autocorrelation), Gap Filling %
# - Client-aware: Conditional sourcing based on client_config$script_90_compatible
# - Exports: 21-column output with field-level metrics, phase, status, alerts
# - Supports 4-week and 8-week trend analysis from historical RDS cache
# - Critical dependency for Scripts 90/91 (reporting/dashboards)
# - Uses Moran's I for spatial clustering detection (weed/stress patterns)
#
# RELATED ISSUES:
# SC-112: Script 80 utilities restructuring (common + client-aware modules)
# SC-108: Core pipeline improvements
# SC-100: KPI definition and formula documentation
#
# ============================================================================
# [✓] Extract planting dates per field
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
#
# COMMAND-LINE USAGE:
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
# Arguments: [end_date] [project_dir]
#
# Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
# Arguments: [end_date] [project_dir] [offset_days]
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7
#
# Usage in run_full_pipeline.R:
# source("r_app/80_calculate_kpis.R")
# main()
# ============================================================================
# NEXT INTEGRATIONS (See Linear issues for detailed requirements)
# ============================================================================
# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
# ============================================================================
# ============================================================================
# CONFIGURATION
# ============================================================================
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5
# CV TREND THRESHOLDS
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
# CV_TREND_LONG_TERM (8-WEEK SLOPE) INTERPRETATION THRESHOLDS
# Interpretation: Slope of CV over 8 weeks indicates field uniformity trend
# Negative slope = CV decreasing = field becoming MORE uniform = GOOD
# Positive slope = CV increasing = field becoming MORE patchy = BAD
# Near zero = Homogenous growth (all crops progressing equally)
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 # CV decreasing rapidly (>3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # CV decreasing (2-3% drop over 8 weeks)
CV_SLOPE_IMPROVEMENT_MAX <- -0.01 # Gradual improvement (1-2% drop over 8 weeks)
CV_SLOPE_HOMOGENOUS_MIN <- -0.01 # Essentially stable (small natural variation)
CV_SLOPE_HOMOGENOUS_MAX <- 0.01 # No change in uniformity (within ±1% over 8 weeks)
CV_SLOPE_PATCHINESS_MIN <- 0.01 # Minor divergence (1-2% increase over 8 weeks)
CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness (2-3% increase over 8 weeks)
CV_SLOPE_SEVERE_MIN <- 0.02 # Severe fragmentation (>3% increase over 8 weeks)
# PERCENTILE CALCULATIONS
CI_PERCENTILE_LOW <- 0.10
CI_PERCENTILE_HIGH <- 0.90
# GERMINATION THRESHOLD (for germination_progress calculation)
GERMINATION_CI_THRESHOLD <- 2.0
# PLANTING DATE & AGE CONFIGURATION
# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates
# HISTORICAL DATA LOOKBACK
WEEKS_FOR_FOUR_WEEK_TREND <- 4
WEEKS_FOR_CV_TREND_SHORT <- 2
WEEKS_FOR_CV_TREND_LONG <- 8
# ============================================================================
# 1. Load required libraries
# ============================================================================
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(dplyr)
library(tidyr)
library(lubridate)
library(readr)
library(readxl)
library(writexl)
library(purrr)
library(furrr)
library(future)
library(caret)
library(CAST)
library(randomForest)
tryCatch({
library(torch)
}, error = function(e) {
message("Note: torch package not available - harvest model inference will be skipped")
})
})
# ============================================================================
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
# ============================================================================
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
tryCatch({
source(here("r_app", "00_common_utils.R"))
}, error = function(e) {
stop("Error loading 00_common_utils.R: ", e$message)
})
# ============================================================================
# LOAD CLIENT-AWARE UTILITIES
# ============================================================================
# All clients use the common utilities (shared statistical functions, reporting)
tryCatch({
source(here("r_app", "80_utils_common.R"))
}, error = function(e) {
stop("Error loading 80_utils_common.R: ", e$message)
})
# Client-specific utilities based on client_config$script_90_compatible
# script_90_compatible = TRUE -> AURA workflow (6 KPIs)
# script_90_compatible = FALSE -> CANE_SUPPLY workflow (weekly stats + basic reporting)
if (client_config$script_90_compatible) {
message("Loading AURA client utilities (80_utils_agronomic_support.R)...")
tryCatch({
source(here("r_app", "80_utils_agronomic_support.R"))
}, error = function(e) {
stop("Error loading 80_utils_agronomic_support.R: ", e$message)
})
} else {
message("Loading CANE_SUPPLY client utilities (80_utils_cane_supply.R)...")
tryCatch({
source(here("r_app", "80_utils_cane_supply.R"))
}, error = function(e) {
stop("Error loading 80_utils_cane_supply.R: ", e$message)
})
}
# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
# ============================================================================
PHASE_DEFINITIONS <- data.frame(
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
age_start = c(0, 4, 17, 39),
age_end = c(6, 16, 39, 200),
stringsAsFactors = FALSE
)
STATUS_TRIGGERS <- data.frame(
trigger = c(
"germination_started",
"germination_complete",
"stress_detected_whole_field",
"strong_recovery",
"growth_on_track",
"maturation_progressing",
"harvest_ready"
),
age_min = c(0, 0, NA, NA, 4, 39, 45),
age_max = c(6, 6, NA, NA, 39, 200, 200),
description = c(
"10% of field CI > 2",
"70% of field CI >= 2",
"CI decline > -1.5 + low CV",
"CI increase > +1.5",
"CI increasing consistently",
"High CI, stable/declining",
"Age 45+ weeks (ready to harvest)"
),
stringsAsFactors = FALSE
)
# ============================================================================
# MAIN
# ============================================================================
main <- function() {
# Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
# end_date (arg 1)
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
# Parse date explicitly in YYYY-MM-DD format from command line
as.Date(args[1], format = "%Y-%m-%d")
} else if (exists("end_date", envir = .GlobalEnv)) {
global_date <- get("end_date", envir = .GlobalEnv)
# Check if it's a valid Date with length > 0
if (is.Date(global_date) && length(global_date) > 0 && !is.na(global_date)) {
global_date
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
Sys.Date()
}
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
Sys.Date()
}
# project_dir (arg 2)
project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
as.character(args[2])
} else if (exists("project_dir", envir = .GlobalEnv)) {
get("project_dir", envir = .GlobalEnv)
} else {
"angata"
}
# offset (arg 3) - for backward compatibility with old 09
offset <- if (length(args) >= 3 && !is.na(args[3])) {
as.numeric(args[3])
} else {
7
}
# Validate end_date is a proper Date object
if (is.null(end_date) || length(end_date) == 0 || !inherits(end_date, "Date")) {
stop("ERROR: end_date is not valid. Got: ", class(end_date), " with length ", length(end_date))
}
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
message("\n", strrep("=", 70))
message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
message(strrep("=", 70))
message("Date:", format(end_date, "%Y-%m-%d"))
message("Project:", project_dir)
message("Mode: Conditional KPI calculation based on client type")
message("")
# Load configuration and utilities
# source(here("r_app", "crop_messaging_utils.R"))
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Initialize project directories and configuration
setup <- setup_project_directories(project_dir)
# DETERMINE CLIENT TYPE AND KPI CONFIGURATION
client_type <- get_client_type(project_dir)
client_config <- get_client_kpi_config(client_type)
message("Client Type:", client_type)
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
# Use centralized paths from setup object (no need for file.path calls)
weekly_tile_max <- setup$weekly_tile_max_dir
weekly_mosaic <- setup$weekly_mosaic_dir
daily_vals_dir <- setup$daily_ci_vals_dir
tryCatch({
source(here("r_app", "30_growth_model_utils.R"))
}, error = function(e) {
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
})
# CONDITIONAL EXECUTION BASED ON CLIENT TYPE
# ============================================
if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
# AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
message("\n", strrep("=", 70))
message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
message(strrep("=", 70))
# Load 80_kpi_utils.R with all 6 KPI functions
# (Note: 80_kpi_utils.R includes all necessary helper functions from crop_messaging_utils.R)
tryCatch({
source(here("r_app", "80_kpi_utils.R"))
}, error = function(e) {
stop("Error loading 80_kpi_utils.R: ", e$message)
})
# Prepare inputs for KPI calculation (already created by setup_project_directories)
reports_dir_kpi <- setup$kpi_reports_dir
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
# Load field boundaries and harvesting data (already loaded by parameters_project.R)
if (!exists("field_boundaries_sf")) {
stop("field_boundaries_sf not loaded. Check parameters_project.R initialization.")
}
if (!exists("harvesting_data")) {
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
}
# Calculate all 6 KPIs
kpi_results <- calculate_all_kpis(
report_date = end_date,
output_dir = reports_dir_kpi,
field_boundaries_sf = field_boundaries_sf,
harvesting_data = harvesting_data,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
weekly_CI_mosaic = weekly_mosaic,
reports_dir = reports_dir_kpi,
project_dir = project_dir
)
cat("\n=== AURA KPI CALCULATION COMPLETE ===\n")
cat("Summary tables saved for Script 90 integration\n")
cat("Output directory:", reports_dir_kpi, "\n\n")
} else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
# CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
message("\n", strrep("=", 70))
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70))
# Continue with existing per-field analysis code below
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
message(strrep("-", 70))
current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53)
year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming
# Calculate previous week using authoritative helper (handles year boundaries correctly)
source("r_app/80_weekly_stats_utils.R") # Load helper function
previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1)
previous_week <- previous_info$week
previous_year <- previous_info$year
message(paste("Week:", current_week, "/ Year (ISO):", year))
# Find mosaic files - support both tile-based AND single-file approaches
message("Finding mosaic files...")
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year)
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
# PRIORITY 1: Check for tile-based mosaics (projects with large ROI)
detected_grid_size <- NA
mosaic_dir <- NA
mosaic_mode <- NA
if (dir.exists(weekly_tile_max)) {
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
detected_grid_size <- grid_patterns[1]
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
if (length(tile_files) > 0) {
message(paste(" ✓ Using tile-based approach (grid-size:", detected_grid_size, ")"))
message(paste(" Found", length(tile_files), "tiles"))
mosaic_mode <- "tiled"
}
}
}
# PRIORITY 2: Check for per-field mosaics (NEW per-field architecture)
if (is.na(mosaic_mode)) {
message(" No tiles found. Checking for per-field mosaics...")
# Check if weekly_mosaic has field subdirectories
if (dir.exists(weekly_mosaic)) {
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
if (length(field_dirs) > 0) {
# Check if any field has the week pattern we're looking for
per_field_files <- c()
for (field in field_dirs) {
field_mosaic_dir <- file.path(weekly_mosaic, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files <- c(per_field_files, files)
}
}
if (length(per_field_files) > 0) {
message(paste(" ✓ Using per-field mosaic approach"))
message(paste(" Found", length(per_field_files), "per-field mosaics"))
mosaic_mode <- "per-field"
mosaic_dir <- weekly_mosaic # Will be field subdirectories
}
}
}
}
# PRIORITY 3: Fall back to single-file mosaic (legacy approach)
if (is.na(mosaic_mode)) {
message(" No per-field mosaics found. Checking for single-file mosaic (legacy approach)...")
mosaic_dir <- weekly_mosaic
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(single_file) > 0) {
message(paste(" ✓ Using single-file approach"))
message(paste(" Found 1 mosaic file:", basename(single_file[1])))
mosaic_mode <- "single-file"
} else {
stop(paste("ERROR: No mosaic files found for week", current_week, year,
"\n Checked (1) tile-based:", file.path(weekly_tile_max, "*", "week_*.tif"),
"\n Checked (2) per-field:", file.path(weekly_mosaic, "*", "week_*.tif"),
"\n Checked (3) single-file:", file.path(weekly_mosaic, "week_*.tif")))
}
}
message(paste(" Using mosaic mode:", mosaic_mode))
# Load field boundaries
tryCatch({
boundaries_result <- load_field_boundaries(data_dir)
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
field_boundaries_sf <- boundaries_result$field_boundaries_sf
} else {
field_boundaries_sf <- boundaries_result
}
if (nrow(field_boundaries_sf) == 0) {
stop("No fields loaded from boundaries")
}
message(paste(" Loaded", nrow(field_boundaries_sf), "fields"))
}, error = function(e) {
stop("ERROR loading field boundaries: ", e$message)
})
message("Loading historical field data for trend calculations...")
# Load up to 8 weeks (max of 4-week and 8-week trend requirements)
# Function gracefully handles missing weeks and loads whatever exists
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir,
num_weeks = num_weeks_to_load,
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf,
daily_vals_dir = daily_vals_dir)
# Load harvest.xlsx for planting dates (season_start)
message("\nLoading harvest data from harvest.xlsx for planting dates...")
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
harvesting_data <- tryCatch({
if (file.exists(harvest_file_path)) {
harvest_raw <- readxl::read_excel(harvest_file_path)
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
harvest_raw
} else {
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
NULL
}
}, error = function(e) {
message(paste(" ERROR loading harvest.xlsx:", e$message))
NULL
})
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
# Validate planting_dates
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
message("WARNING: No planting dates available. Using NA for all fields.")
planting_dates <- data.frame(
field_id = field_boundaries_sf$field,
date = rep(as.Date(NA), nrow(field_boundaries_sf)),
stringsAsFactors = FALSE
)
}
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
# ============================================================================
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
# ============================================================================
# Build tile grid (needed by calculate_field_statistics)
message("\nPreparing mosaic configuration for statistics calculation...")
# For tile-based mosaics: build the grid mapping
# For single-file: create a minimal grid structure (single "tile" = entire mosaic)
if (mosaic_mode == "tiled") {
tile_grid <- build_tile_grid(mosaic_dir, current_week, year)
message(paste(" ✓ Built tile grid with", nrow(tile_grid), "tiles"))
} else {
# Single-file mode: create a minimal grid with just the single mosaic
message(" ✓ Using single-file mosaic (no tile grid needed)")
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, year)
single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(single_file) == 0) {
stop("ERROR: Single-file mosaic not found in", mosaic_dir)
}
# Create a minimal tile_grid structure with one "tile" representing the entire mosaic
tile_grid <- list(
mosaic_dir = mosaic_dir,
data = data.frame(
id = 0, # Single tile ID = 0 (full extent)
xmin = NA_real_,
xmax = NA_real_,
ymin = NA_real_,
ymax = NA_real_,
stringsAsFactors = FALSE
),
mode = "single-file",
file = single_file[1]
)
}
message("\nUsing modular RDS-based approach for weekly statistics...")
# Load/calculate CURRENT week stats (from tiles or RDS cache)
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
current_stats <- load_or_calculate_weekly_stats(
week_num = current_week,
year = year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir,
reports_dir = reports_dir,
report_date = end_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
# Load/calculate PREVIOUS week stats (from RDS cache or tiles)
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
# Calculate report date for previous week (7 days before current)
prev_report_date <- end_date - 7
prev_stats <- load_or_calculate_weekly_stats(
week_num = previous_week,
year = previous_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir,
reports_dir = reports_dir,
report_date = prev_report_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))
# Apply trend calculations (requires both weeks)
message("\n3. Calculating trend columns...")
current_stats <- calculate_kpi_trends(current_stats, prev_stats,
project_dir = project_dir,
reports_dir = reports_dir,
current_week = current_week,
year = year)
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
# Load weekly harvest probabilities from script 31 (if available)
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({
if (file.exists(harvest_prob_file)) {
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE)
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
prob_df %>%
select(field, imminent_prob, detected_prob) %>%
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
} else {
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
NULL
}
}, error = function(e) {
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
NULL
})
# ============================================================================
# Build final output dataframe with all 21 columns
# ============================================================================
message("\nBuilding final field analysis output...")
# Pre-calculate acreages with geometry validation
# This avoids geometry errors during field_analysis construction
acreage_lookup <- tryCatch({
lookup_df <- field_boundaries_sf %>%
sf::st_drop_geometry() %>%
as.data.frame() %>%
mutate(
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
tryCatch({
sf::st_is_valid(field_boundaries_sf[idx, ])
}, error = function(e) FALSE)
}),
area_ha = 0
)
# Calculate area for valid geometries
for (idx in which(lookup_df$geometry_valid)) {
tryCatch({
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
lookup_df$area_ha[idx] <- area_m2 / 10000
}, error = function(e) {
lookup_df$area_ha[idx] <<- NA_real_
})
}
# Convert hectares to acres
lookup_df %>%
mutate(acreage = area_ha / 0.404686) %>%
select(field, acreage)
}, error = function(e) {
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
data.frame(field = character(0), acreage = numeric(0))
})
field_analysis_df <- current_stats %>%
mutate(
# Column 2: Farm_Section (user fills)
Farm_Section = NA_character_,
# Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
Field_name = Field_id,
# Column 4: Acreage (calculate from geometry)
Acreage = {
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
if_else(is.na(acreages_vec), 0, acreages_vec)
},
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
# Column 7: Four_week_trend (from current_stats)
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
Last_harvest_or_planting_date = {
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
},
# Column 9: Age_week (calculated from report date and planting date)
Age_week = {
sapply(seq_len(nrow(current_stats)), function(idx) {
planting_dt <- Last_harvest_or_planting_date[idx]
if (is.na(planting_dt)) {
return(NA_real_)
}
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0)
})
},
# Column 10: Phase (recalculate based on updated Age_week)
Phase = {
sapply(Age_week, function(age) {
if (is.na(age)) return(NA_character_)
if (age >= 0 & age < 4) return("Germination")
if (age >= 4 & age < 17) return("Tillering")
if (age >= 17 & age < 39) return("Grand Growth")
if (age >= 39) return("Maturation")
NA_character_
})
},
# Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends)
# Column 12: Germination_progress (calculated here from CI values)
# Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}),
# Column 13: Imminent_prob (from script 31 or NA if not available)
Imminent_prob = {
if (!is.null(imminent_prob_data)) {
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
},
# Column 14: Status_Alert (based on harvest probability + crop health status)
# Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA
Status_Alert = {
sapply(seq_len(nrow(current_stats)), function(idx) {
imminent_prob <- Imminent_prob[idx]
age_w <- Age_week[idx]
weekly_ci_chg <- Weekly_ci_change[idx]
mean_ci_val <- Mean_CI[idx]
# Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months)
if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) {
return("Ready for harvest-check")
}
# Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5)
if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) {
return("Strong decline in crop health")
}
# Priority 3: Harvested/bare (Mean CI < 1.5)
if (!is.na(mean_ci_val) && mean_ci_val < 1.5) {
return("Harvested/bare")
}
# Fallback: no alert
NA_character_
})
},
# Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
# Column 17: Already in current_stats (CV)
# Column 18: Already in current_stats (CV_Trend_Short_Term)
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
# Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
# 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01)
CV_Trend_Long_Term_Category = {
sapply(current_stats$CV_Trend_Long_Term, function(slope) {
if (is.na(slope)) {
return(NA_character_)
} else if (slope < -0.01) {
return("More uniform")
} else if (slope > 0.01) {
return("Less uniform")
} else {
return("Stable uniformity")
}
})
},
# Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
# Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}),
) %>%
select(
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
"Last_harvest_or_planting_date", "Age_week", "Phase",
"Germination_progress",
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
"Imminent_prob", "Cloud_pct_clear", "Cloud_category"))
)
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
export_paths <- export_field_analysis_excel(
field_analysis_df,
NULL,
project_dir,
current_week,
year,
reports_dir
)
cat("\n--- Per-field Results (first 10) ---\n")
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
if (length(available_cols) > 0) {
print(head(field_analysis_df[, available_cols], 10))
}
# ========== FARM-LEVEL KPI AGGREGATION ==========
# Aggregate the per-field analysis into farm-level summary statistics
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
# Filter to only fields that have actual data (non-NA CI and valid acreage)
field_data <- field_analysis_df %>%
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
filter(Acreage > 0)
if (nrow(field_data) > 0) {
if (nrow(field_data) > 0) {
# Create summary statistics
farm_summary <- list()
# 1. PHASE DISTRIBUTION
phase_dist <- field_data %>%
group_by(Phase) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Phase)
farm_summary$phase_distribution <- phase_dist
# 2. STATUS ALERT DISTRIBUTION
status_dist <- field_data %>%
group_by(Status_Alert) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Status_Alert)
farm_summary$status_distribution <- status_dist
# 3. CLOUD COVERAGE DISTRIBUTION
cloud_dist <- field_data %>%
group_by(Cloud_category) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Cloud_category)
farm_summary$cloud_distribution <- cloud_dist
# 4. OVERALL STATISTICS
farm_summary$overall_stats <- data.frame(
total_fields = nrow(field_data),
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4),
week = current_week,
year = year,
date = as.character(end_date)
)
# Print summaries
cat("\n--- PHASE DISTRIBUTION ---\n")
print(phase_dist)
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
print(status_dist)
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
print(cloud_dist)
cat("\n--- OVERALL FARM STATISTICS ---\n")
print(farm_summary$overall_stats)
farm_kpi_results <- farm_summary
} else {
farm_kpi_results <- NULL
}
} else {
farm_kpi_results <- NULL
}
# ========== FINAL SUMMARY ==========
cat("\n", strrep("=", 70), "\n")
cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
cat(strrep("=", 70), "\n")
cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
cat("Excel export:", export_paths$excel, "\n")
cat("RDS export:", export_paths$rds, "\n")
cat("CSV export:", export_paths$csv, "\n")
if (!is.null(farm_kpi_results)) {
cat("\nFarm-level KPIs: CALCULATED\n")
} else {
cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
}
cat("\n✓ Consolidated KPI calculation complete!\n")
cat(" - Per-field data exported\n")
cat(" - Farm-level KPIs calculated\n")
cat(" - All outputs in:", reports_dir, "\n\n")
} else {
# Unknown client type - log warning and exit
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
cat("\n⚠ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
}
}
if (sys.nframe() == 0) {
main()
}