From 04e1641a18dbaf338ef4f3032cc86708633070e3 Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 14 Jan 2026 11:36:06 +0100 Subject: [PATCH] Create 80_calculate_kpis.R: unified KPI calculation (per-field SC-64 + farm-level metrics) --- r_app/80_calculate_kpis.R | 1077 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1077 insertions(+) create mode 100644 r_app/80_calculate_kpis.R diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R new file mode 100644 index 0000000..6977491 --- /dev/null +++ b/r_app/80_calculate_kpis.R @@ -0,0 +1,1077 @@ +# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION) +# ============================================================================ +# UNIFIED KPI CALCULATION SCRIPT +# +# This script combines: +# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses) +# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators) +# +# FEATURES: +# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.) +# - Farm-level KPI calculation (6 metrics for executive overview) +# - Parallel processing (tile-aware, 1000+ fields supported) +# - Comprehensive Excel + RDS + CSV exports +# - Test mode for development +# +# COMMAND-LINE USAGE: +# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata +# Arguments: [end_date] [project_dir] +# +# Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7 +# Arguments: [end_date] [project_dir] [offset_days] +# +# Usage in run_full_pipeline.R: +# source("r_app/80_calculate_kpis.R") +# main() + +# ============================================================================ +# *** CONFIGURATION SECTION - MANUALLY DEFINED THRESHOLDS *** +# ============================================================================ + +# TEST MODE (for development with limited historical data) +TEST_MODE <- TRUE +TEST_MODE_NUM_WEEKS <- 2 + +# FOUR-WEEK TREND THRESHOLDS +FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 +FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 +FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 +FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 +FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 +FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 +FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 + +# CV TREND THRESHOLDS +CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05 + +# CLOUD COVER ROUNDING INTERVALS +CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100) + +# PERCENTILE CALCULATIONS +CI_PERCENTILE_LOW <- 0.10 +CI_PERCENTILE_HIGH <- 0.90 + +# HISTORICAL DATA LOOKBACK +WEEKS_FOR_FOUR_WEEK_TREND <- 4 +WEEKS_FOR_CV_TREND_SHORT <- 2 +WEEKS_FOR_CV_TREND_LONG <- 8 + +# ============================================================================ +# 1. Load required libraries +# ============================================================================ + +suppressPackageStartupMessages({ + library(here) + library(sf) + library(terra) + library(dplyr) + library(tidyr) + library(lubridate) + library(readr) + library(readxl) + library(writexl) + library(purrr) + library(furrr) + library(future) + library(caret) + library(CAST) + library(randomForest) + tryCatch({ + library(torch) + }, error = function(e) { + message("Note: torch package not available - harvest model inference will be skipped") + }) +}) + +# ============================================================================ +# PHASE AND STATUS TRIGGER DEFINITIONS +# ============================================================================ + +PHASE_DEFINITIONS <- data.frame( + phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), + age_start = c(0, 4, 17, 39), + age_end = c(6, 16, 39, 200), + stringsAsFactors = FALSE +) + +STATUS_TRIGGERS <- data.frame( + trigger = c( + "germination_started", + "germination_complete", + "stress_detected_whole_field", + "strong_recovery", + "growth_on_track", + "maturation_progressing", + "harvest_ready" + ), + age_min = c(0, 0, NA, NA, 4, 39, 45), + age_max = c(6, 6, NA, NA, 39, 200, 200), + description = c( + "10% of field CI > 2", + "70% of field CI >= 2", + "CI decline > -1.5 + low CV", + "CI increase > +1.5", + "CI increasing consistently", + "High CI, stable/declining", + "Age 45+ weeks (ready to harvest)" + ), + stringsAsFactors = FALSE +) + +# ============================================================================ +# TILE-AWARE HELPER FUNCTIONS +# ============================================================================ + +get_tile_ids_for_field <- function(field_geom, tile_grid) { + if (inherits(field_geom, "sf")) { + field_bbox <- sf::st_bbox(field_geom) + field_xmin <- field_bbox["xmin"] + field_xmax <- field_bbox["xmax"] + field_ymin <- field_bbox["ymin"] + field_ymax <- field_bbox["ymax"] + } else if (inherits(field_geom, "SpatVector")) { + field_bbox <- terra::ext(field_geom) + field_xmin <- field_bbox$xmin + field_xmax <- field_bbox$xmax + field_ymin <- field_bbox$ymin + field_ymax <- field_bbox$ymax + } else { + stop("field_geom must be sf or terra::vect object") + } + + intersecting_tiles <- tile_grid$id[ + !(tile_grid$xmax < field_xmin | + tile_grid$xmin > field_xmax | + tile_grid$ymax < field_ymin | + tile_grid$ymin > field_ymax) + ] + + return(as.numeric(intersecting_tiles)) +} + +load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { + if (length(tile_ids) == 0) { + return(NULL) + } + + tiles_list <- list() + for (tile_id in sort(tile_ids)) { + tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id) + tile_path <- file.path(mosaic_dir, tile_filename) + + if (file.exists(tile_path)) { + tryCatch({ + tile_rast <- terra::rast(tile_path) + ci_band <- terra::subset(tile_rast, 5) + tiles_list[[length(tiles_list) + 1]] <- ci_band + }, error = function(e) { + message(paste(" Warning: Could not load tile", tile_id, ":", e$message)) + }) + } + } + + if (length(tiles_list) == 0) { + return(NULL) + } + + if (length(tiles_list) == 1) { + return(tiles_list[[1]]) + } else { + tryCatch({ + rsrc <- terra::sprc(tiles_list) + merged <- terra::mosaic(rsrc, fun = "max") + return(merged) + }, error = function(e) { + message(paste(" Warning: Could not merge tiles:", e$message)) + return(tiles_list[[1]]) + }) + } +} + +build_tile_grid <- function(mosaic_dir, week_num, year) { + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) + } + + tile_grid <- data.frame( + id = integer(), + xmin = numeric(), + xmax = numeric(), + ymin = numeric(), + ymax = numeric(), + stringsAsFactors = FALSE + ) + + for (tile_file in tile_files) { + tryCatch({ + matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file))) + if (length(matches) > 0) { + tile_id <- as.integer(sub("_|\\.tif", "", matches[1])) + tile_rast <- terra::rast(tile_file) + tile_ext <- terra::ext(tile_rast) + tile_grid <- rbind(tile_grid, data.frame( + id = tile_id, + xmin = tile_ext$xmin, + xmax = tile_ext$xmax, + ymin = tile_ext$ymin, + ymax = tile_ext$ymax, + stringsAsFactors = FALSE + )) + } + }, error = function(e) { + message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message)) + }) + } + + if (nrow(tile_grid) == 0) { + stop("Could not extract extents from any tile files") + } + + return(tile_grid) +} + +# ============================================================================ +# SC-64 ENHANCEMENT FUNCTIONS +# ============================================================================ + +categorize_four_week_trend <- function(ci_values_list) { + if (is.null(ci_values_list) || length(ci_values_list) < 2) { + return(NA_character_) + } + + ci_values_list <- ci_values_list[!is.na(ci_values_list)] + if (length(ci_values_list) < 2) { + return(NA_character_) + } + + weekly_changes <- diff(ci_values_list) + avg_weekly_change <- mean(weekly_changes, na.rm = TRUE) + + if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) { + return("strong growth") + } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN && + avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) { + return("growth") + } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) { + return("no growth") + } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN && + avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("decline") + } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("strong decline") + } else { + return("no growth") + } +} + +round_cloud_to_intervals <- function(cloud_pct_clear) { + if (is.na(cloud_pct_clear)) { + return(NA_character_) + } + + if (cloud_pct_clear < 50) return("<50%") + if (cloud_pct_clear < 60) return("50-60%") + if (cloud_pct_clear < 70) return("60-70%") + if (cloud_pct_clear < 80) return("70-80%") + if (cloud_pct_clear < 90) return("80-90%") + return(">90%") +} + +get_ci_percentiles <- function(ci_values) { + if (is.null(ci_values) || length(ci_values) == 0) { + return(NA_character_) + } + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) { + return(NA_character_) + } + + p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE) + p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE) + + return(sprintf("%.1f-%.1f", p10, p90)) +} + +calculate_cv_trend <- function(cv_current, cv_previous) { + if (is.na(cv_current) || is.na(cv_previous)) { + return(NA_real_) + } + return(round(cv_current - cv_previous, 4)) +} + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +get_phase_by_age <- function(age_weeks) { + if (is.na(age_weeks)) return(NA_character_) + for (i in seq_len(nrow(PHASE_DEFINITIONS))) { + if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && + age_weeks <= PHASE_DEFINITIONS$age_end[i]) { + return(PHASE_DEFINITIONS$phase[i]) + } + } + return("Unknown") +} + +get_status_trigger <- function(ci_values, ci_change, age_weeks) { + if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) return(NA_character_) + + pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 + pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 + ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 + mean_ci <- mean(ci_values, na.rm = TRUE) + + if (age_weeks >= 0 && age_weeks <= 6) { + if (pct_at_or_above_2 >= 70) { + return("germination_complete") + } else if (pct_above_2 > 10) { + return("germination_started") + } + } + + if (age_weeks >= 45) { + return("harvest_ready") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { + return("stress_detected_whole_field") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { + return("strong_recovery") + } + + if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { + return("growth_on_track") + } + + if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { + return("maturation_progressing") + } + + return(NA_character_) +} + +load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4) { + historical_data <- list() + loaded_weeks <- c() + + for (lookback in 0:(num_weeks - 1)) { + target_week <- current_week - lookback + if (target_week < 1) target_week <- target_week + 52 + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv") + csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) + + if (file.exists(csv_path)) { + tryCatch({ + data <- read_csv(csv_path, show_col_types = FALSE) + historical_data[[lookback + 1]] <- list( + week = target_week, + data = data + ) + loaded_weeks <- c(loaded_weeks, target_week) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, ":", e$message)) + }) + } + } + + if (length(historical_data) == 0) { + message(paste("Warning: No historical field data found for trend calculations")) + return(NULL) + } + + message(paste("Loaded", length(historical_data), "weeks of historical data:", + paste(loaded_weeks, collapse = ", "))) + + return(historical_data) +} + +USE_UNIFORM_AGE <- TRUE +UNIFORM_PLANTING_DATE <- as.Date("2025-01-01") + +extract_planting_dates <- function(harvesting_data) { + if (USE_UNIFORM_AGE) { + message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) + return(data.frame( + field_id = character(), + planting_date = as.Date(character()), + stringsAsFactors = FALSE + )) + } + + if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { + message("Warning: No harvesting data available.") + return(NULL) + } + + tryCatch({ + planting_dates <- harvesting_data %>% + arrange(field, desc(season_start)) %>% + distinct(field, .keep_all = TRUE) %>% + select(field, season_start) %>% + rename(field_id = field, planting_date = season_start) %>% + filter(!is.na(planting_date)) %>% + as.data.frame() + + message(paste("Extracted planting dates for", nrow(planting_dates), "fields")) + return(planting_dates) + }, error = function(e) { + message(paste("Error extracting planting dates:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# PARALLEL FIELD ANALYSIS FUNCTION +# ============================================================================ + +analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, + mosaic_dir, historical_data = NULL, planting_dates = NULL, + report_date = Sys.Date(), harvest_imminence_data = NULL, + harvesting_data = NULL) { + + tryCatch({ + field_id <- field_boundaries_sf$field[field_idx] + farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) { + field_boundaries_sf$sub_area[field_idx] + } else { + NA_character_ + } + field_name <- field_id + + field_sf <- field_boundaries_sf[field_idx, ] + if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) { + return(data.frame( + Field_id = field_id, + error = "Empty or invalid geometry" + )) + } + + field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000 + field_area_acres <- field_area_ha / 0.404686 + + tile_ids <- get_tile_ids_for_field(field_sf, tile_grid) + + current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir) + + if (is.null(current_ci)) { + return(data.frame( + Field_id = field_id, + error = "No tile data available" + )) + } + + field_vect <- terra::vect(sf::as_Spatial(field_sf)) + terra::crs(field_vect) <- terra::crs(current_ci) + + all_extracted <- terra::extract(current_ci, field_vect)[, 2] + current_ci_vals <- all_extracted[!is.na(all_extracted)] + + num_total <- length(all_extracted) + num_data <- sum(!is.na(all_extracted)) + pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 + + cloud_cat <- if (num_data == 0) "No image available" + else if (pct_clear >= 99.5) "Clear view" + else "Partial coverage" + cloud_pct <- 100 - pct_clear + cloud_interval <- round_cloud_to_intervals(pct_clear) + + if (length(current_ci_vals) == 0) { + return(data.frame( + Field_id = field_id, + error = "No CI values extracted" + )) + } + + mean_ci_current <- mean(current_ci_vals, na.rm = TRUE) + ci_std <- sd(current_ci_vals, na.rm = TRUE) + cv_current <- ci_std / mean_ci_current + range_min <- min(current_ci_vals, na.rm = TRUE) + range_max <- max(current_ci_vals, na.rm = TRUE) + range_str <- sprintf("%.1f-%.1f", range_min, range_max) + + ci_percentiles_str <- get_ci_percentiles(current_ci_vals) + + weekly_ci_change <- NA + previous_ci_vals <- NULL + + tryCatch({ + previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir) + if (!is.null(previous_ci)) { + prev_extracted <- terra::extract(previous_ci, field_vect)[, 2] + previous_ci_vals <- prev_extracted[!is.na(prev_extracted)] + if (length(previous_ci_vals) > 0) { + mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE) + weekly_ci_change <- mean_ci_current - mean_ci_previous + } + } + }, error = function(e) { + # Silent fail + }) + + if (is.na(weekly_ci_change)) { + weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std) + } else { + weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ%.1f)", mean_ci_current, ci_std, weekly_ci_change) + } + + age_weeks <- NA + if (!is.null(planting_dates) && nrow(planting_dates) > 0) { + field_planting <- planting_dates %>% + filter(field_id == !!field_id) %>% + pull(planting_date) + + if (length(field_planting) > 0) { + age_weeks <- as.numeric(difftime(report_date, field_planting[1], units = "weeks")) + } + } + + if (USE_UNIFORM_AGE) { + age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) + } + + pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100 + pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100 + germination_progress_str <- NA_character_ + if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) { + germination_progress_str <- sprintf("%.0f%%", pct_ci_ge_2) + } + + phase <- "Unknown" + imminent_prob_val <- NA + if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) { + imminence_row <- harvest_imminence_data %>% + filter(field_id == !!field_id) + if (nrow(imminence_row) > 0) { + imminent_prob_val <- imminence_row$probability[1] + if (imminent_prob_val > 0.5) { + phase <- "Harvest Imminent (Model)" + } + } + } + + if (phase == "Unknown") { + phase <- get_phase_by_age(age_weeks) + } + + status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks) + + nmr_weeks_in_phase <- 1 + + four_week_trend <- NA_character_ + ci_values_for_trend <- c(mean_ci_current) + + if (!is.null(historical_data) && length(historical_data) > 0) { + for (hist in historical_data) { + hist_week <- hist$week + hist_data <- hist$data + + field_row <- hist_data %>% + filter(Field_id == !!field_id) + + if (nrow(field_row) > 0 && !is.na(field_row$Mean_CI[1])) { + ci_values_for_trend <- c(field_row$Mean_CI[1], ci_values_for_trend) + } + } + + if (length(ci_values_for_trend) >= 2) { + four_week_trend <- categorize_four_week_trend(ci_values_for_trend) + } + } + + cv_trend_short <- NA_real_ + cv_trend_long <- NA_real_ + + if (!is.null(historical_data) && length(historical_data) > 0) { + if (length(historical_data) >= 2) { + cv_2w <- historical_data[[2]]$data %>% + filter(Field_id == !!field_id) %>% + pull(CV) + if (length(cv_2w) > 0 && !is.na(cv_2w[1])) { + cv_trend_short <- calculate_cv_trend(cv_current, cv_2w[1]) + } + } + + if (length(historical_data) >= 8) { + cv_8w <- historical_data[[8]]$data %>% + filter(Field_id == !!field_id) %>% + pull(CV) + if (length(cv_8w) > 0 && !is.na(cv_8w[1])) { + cv_trend_long <- calculate_cv_trend(cv_current, cv_8w[1]) + } + } + } + + last_harvest_date <- NA_character_ + if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) { + last_harvest_row <- harvesting_data %>% + filter(field == !!field_id) %>% + arrange(desc(season_start)) %>% + slice(1) + + if (nrow(last_harvest_row) > 0 && !is.na(last_harvest_row$season_start[1])) { + last_harvest_date <- as.character(last_harvest_row$season_start[1]) + } + } + + result <- data.frame( + Field_id = field_id, + Farm_Section = farm_section, + Field_name = field_name, + Hectare = round(field_area_ha, 2), + Acreage = round(field_area_acres, 2), + Mean_CI = round(mean_ci_current, 2), + Weekly_ci_change = if (is.na(weekly_ci_change)) NA_real_ else round(weekly_ci_change, 2), + Weekly_ci_change_str = weekly_ci_change_str, + Four_week_trend = four_week_trend, + Last_harvest_or_planting_date = last_harvest_date, + Age_week = if (is.na(age_weeks)) NA_integer_ else as.integer(round(age_weeks)), + `Phase (age based)` = phase, + nmr_weeks_in_this_phase = nmr_weeks_in_phase, + Germination_progress = germination_progress_str, + Imminent_prob = imminent_prob_val, + Status_trigger = status_trigger, + CI_range = range_str, + CI_Percentiles = ci_percentiles_str, + CV = round(cv_current, 4), + CV_Trend_Short_Term = cv_trend_short, + CV_Trend_Long_Term = cv_trend_long, + Cloud_pct_clear = pct_clear, + Cloud_pct_clear_interval = cloud_interval, + Cloud_pct = cloud_pct, + Cloud_category = cloud_cat, + stringsAsFactors = FALSE + ) + + return(result) + + }, error = function(e) { + message(paste("Error analyzing field", field_idx, ":", e$message)) + return(data.frame( + Field_id = NA_character_, + error = e$message + )) + }) +} + +# ============================================================================ +# SUMMARY GENERATION +# ============================================================================ + +generate_field_analysis_summary <- function(field_df) { + message("Generating summary statistics...") + + total_acreage <- sum(field_df$Acreage, na.rm = TRUE) + + germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE) + tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE) + grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE) + maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE) + unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE) + + harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) + stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) + recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) + growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) + germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) + germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) + no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) + + clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) + partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) + no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) + total_fields <- nrow(field_df) + + clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) + partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) + no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) + + summary_df <- data.frame( + Category = c( + "--- PHASE DISTRIBUTION ---", + "Germination", + "Tillering", + "Grand Growth", + "Maturation", + "Unknown phase", + "--- STATUS TRIGGERS ---", + "Harvest ready", + "Stress detected", + "Strong recovery", + "Growth on track", + "Germination complete", + "Germination started", + "No trigger", + "--- CLOUD COVERAGE (FIELDS) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- CLOUD COVERAGE (ACREAGE) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- TOTAL ---", + "Total Acreage" + ), + Acreage = c( + NA, + round(germination_acreage, 2), + round(tillering_acreage, 2), + round(grand_growth_acreage, 2), + round(maturation_acreage, 2), + round(unknown_phase_acreage, 2), + NA, + round(harvest_ready_acreage, 2), + round(stress_acreage, 2), + round(recovery_acreage, 2), + round(growth_on_track_acreage, 2), + round(germination_complete_acreage, 2), + round(germination_started_acreage, 2), + round(no_trigger_acreage, 2), + NA, + paste0(clear_fields, " fields"), + paste0(partial_fields, " fields"), + paste0(no_image_fields, " fields"), + NA, + round(clear_acreage, 2), + round(partial_acreage, 2), + round(no_image_acreage, 2), + NA, + round(total_acreage, 2) + ), + stringsAsFactors = FALSE + ) + + return(summary_df) +} + +# ============================================================================ +# EXPORT FUNCTIONS +# ============================================================================ + +export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { + message("Exporting per-field analysis to Excel, CSV, and RDS...") + + output_subdir <- file.path(reports_dir, "kpis", "field_analysis") + if (!dir.exists(output_subdir)) { + dir.create(output_subdir, recursive = TRUE) + } + + excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx") + excel_path <- file.path(output_subdir, excel_filename) + excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) + + sheets <- list( + "Field Data" = field_df, + "Summary" = summary_df + ) + + write_xlsx(sheets, excel_path) + message(paste("✓ Field analysis Excel exported to:", excel_path)) + + kpi_data <- list( + field_analysis = field_df, + field_analysis_summary = summary_df, + metadata = list( + current_week = current_week, + project = project_dir, + created_at = Sys.time() + ) + ) + + rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") + rds_path <- file.path(reports_dir, "kpis", rds_filename) + + saveRDS(kpi_data, rds_path) + message(paste("✓ Field analysis RDS exported to:", rds_path)) + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") + csv_path <- file.path(output_subdir, csv_filename) + write_csv(field_df, csv_path) + message(paste("✓ Field analysis CSV exported to:", csv_path)) + + return(list(excel = excel_path, rds = rds_path, csv = csv_path)) +} + +# ============================================================================ +# FARM-LEVEL KPI CALCULATION (FROM OLD 09_CALCULATE_KPIS.R) +# ============================================================================ + +calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf, + harvesting_data, cumulative_CI_vals_dir, + weekly_CI_mosaic, reports_dir) { + message("\n=== CALCULATING FARM-LEVEL KPIs ===") + message("(6 high-level KPI metrics)") + + tryCatch({ + source(here("r_app", "kpi_utils.R")) + }, error = function(e) { + message(paste("Warning: Could not load kpi_utils.R:", e$message)) + message("Farm-level KPIs will be skipped") + return(NULL) + }) + + if (!exists("calculate_all_kpis")) { + message("Warning: calculate_all_kpis() function not found in kpi_utils.R") + return(NULL) + } + + output_dir <- file.path(reports_dir, "kpis") + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE) + } + + tryCatch({ + kpi_results <- calculate_all_kpis( + report_date = report_date, + output_dir = output_dir, + field_boundaries_sf = field_boundaries_sf, + harvesting_data = harvesting_data, + cumulative_CI_vals_dir = cumulative_CI_vals_dir, + weekly_CI_mosaic = weekly_CI_mosaic, + reports_dir = reports_dir, + project_dir = project_dir + ) + + # Print KPI summary + cat("\n=== FARM-LEVEL KPI SUMMARY ===\n") + cat("Report Date:", as.character(kpi_results$metadata$report_date), "\n") + cat("Current Week:", kpi_results$metadata$current_week, "\n") + cat("Previous Week:", kpi_results$metadata$previous_week, "\n") + cat("Total Fields Analyzed:", kpi_results$metadata$total_fields, "\n") + cat("Calculation Time:", as.character(kpi_results$metadata$calculation_time), "\n") + + cat("\n--- KPI Metrics ---\n") + cat("Field Uniformity Summary:\n") + print(kpi_results$field_uniformity_summary) + + cat("\nArea Change Summary:\n") + print(kpi_results$area_change) + + cat("\nTCH Forecasted:\n") + print(kpi_results$tch_forecasted) + + cat("\nGrowth Decline Index:\n") + print(kpi_results$growth_decline) + + cat("\nWeed Presence Score:\n") + print(kpi_results$weed_presence) + + cat("\nGap Filling Score:\n") + print(kpi_results$gap_filling) + + return(kpi_results) + }, error = function(e) { + message(paste("Error calculating farm-level KPIs:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# MAIN +# ============================================================================ + +main <- function() { + # Parse command-line arguments + args <- commandArgs(trailingOnly = TRUE) + + # end_date (arg 1) + end_date <- if (length(args) >= 1 && !is.na(args[1])) { + as.Date(args[1]) + } else if (exists("end_date_str", envir = .GlobalEnv)) { + as.Date(get("end_date_str", envir = .GlobalEnv)) + } else { + Sys.Date() + } + + # project_dir (arg 2) + project_dir <- if (length(args) >= 2 && !is.na(args[2])) { + as.character(args[2]) + } else if (exists("project_dir", envir = .GlobalEnv)) { + get("project_dir", envir = .GlobalEnv) + } else { + "angata" + } + + # offset (arg 3) - for backward compatibility with old 09 + offset <- if (length(args) >= 3 && !is.na(args[3])) { + as.numeric(args[3]) + } else { + 7 + } + + assign("project_dir", project_dir, envir = .GlobalEnv) + assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv) + + message("\n" %+% strrep("=", 70)) + message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION") + message(strrep("=", 70)) + message("Date:", format(end_date, "%Y-%m-%d")) + message("Project:", project_dir) + message("Mode: Per-field analysis (SC-64) + Farm-level KPIs") + message("") + + # Load configuration and utilities + source(here("r_app", "crop_messaging_utils.R")) + + tryCatch({ + source(here("r_app", "parameters_project.R")) + }, error = function(e) { + stop("Error loading parameters_project.R: ", e$message) + }) + + tryCatch({ + source(here("r_app", "30_growth_model_utils.R")) + }, error = function(e) { + warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data") + }) + + # ========== PER-FIELD ANALYSIS (SC-64) ========== + + message("\n" %+% strrep("-", 70)) + message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)") + message(strrep("-", 70)) + + current_week <- as.numeric(format(end_date, "%V")) + year <- as.numeric(format(end_date, "%Y")) + previous_week <- current_week - 1 + if (previous_week < 1) previous_week <- 52 + + message(paste("Week:", current_week, "/ Year:", year)) + + message("Building tile grid from available weekly tiles...") + tile_grid <- build_tile_grid(weekly_tile_max, current_week, year) + message(paste(" Found", nrow(tile_grid), "tiles")) + + tryCatch({ + boundaries_result <- load_field_boundaries(data_dir) + + if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { + field_boundaries_sf <- boundaries_result$field_boundaries_sf + } else { + field_boundaries_sf <- boundaries_result + } + + if (nrow(field_boundaries_sf) == 0) { + stop("No fields loaded from boundaries") + } + + message(paste(" Loaded", nrow(field_boundaries_sf), "fields")) + }, error = function(e) { + stop("ERROR loading field boundaries: ", e$message) + }) + + message("Loading historical field data for trend calculations...") + num_weeks_to_load <- if (TEST_MODE) TEST_MODE_NUM_WEEKS else max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) + if (TEST_MODE) { + message(paste(" TEST MODE: Loading only", num_weeks_to_load, "weeks")) + } + historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, num_weeks = num_weeks_to_load) + + planting_dates <- extract_planting_dates(harvesting_data) + + message("Setting up parallel processing...") + current_plan <- class(future::plan())[1] + if (current_plan == "sequential") { + num_workers <- parallel::detectCores() - 1 + message(paste(" Using", num_workers, "workers")) + future::plan(future::multisession, workers = num_workers) + } + + message("Analyzing", nrow(field_boundaries_sf), "fields in parallel...") + + field_analysis_list <- furrr::future_map( + seq_len(nrow(field_boundaries_sf)), + ~ analyze_single_field( + field_idx = ., + field_boundaries_sf = field_boundaries_sf, + tile_grid = tile_grid, + week_num = current_week, + year = year, + mosaic_dir = weekly_tile_max, + historical_data = historical_data, + planting_dates = planting_dates, + report_date = end_date, + harvest_imminence_data = NULL, + harvesting_data = harvesting_data + ), + .progress = TRUE, + .options = furrr::furrr_options(seed = TRUE) + ) + + field_analysis_df <- dplyr::bind_rows(field_analysis_list) + + if (nrow(field_analysis_df) == 0) { + stop("No fields analyzed successfully!") + } + + message(paste("✓ Analyzed", nrow(field_analysis_df), "fields")) + + summary_statistics_df <- generate_field_analysis_summary(field_analysis_df) + + export_paths <- export_field_analysis_excel( + field_analysis_df, + summary_statistics_df, + project_dir, + current_week, + reports_dir + ) + + cat("\n--- Per-field Results (first 10) ---\n") + available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_trigger", "Cloud_category") + available_cols <- available_cols[available_cols %in% names(field_analysis_df)] + if (length(available_cols) > 0) { + print(head(field_analysis_df[, available_cols], 10)) + } + + cat("\n--- Summary Statistics ---\n") + print(summary_statistics_df) + + # ========== FARM-LEVEL KPI CALCULATION ========== + + farm_kpi_results <- calculate_and_export_farm_kpis( + report_date = end_date, + project_dir = project_dir, + field_boundaries_sf = field_boundaries_sf, + harvesting_data = harvesting_data, + cumulative_CI_vals_dir = cumulative_CI_vals_dir, + weekly_CI_mosaic = weekly_CI_mosaic, + reports_dir = reports_dir + ) + + # ========== FINAL SUMMARY ========== + + cat("\n" %+% strrep("=", 70) %+% "\n") + cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n") + cat(strrep("=", 70) %+% "\n") + cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n") + cat("Excel export:", export_paths$excel, "\n") + cat("RDS export:", export_paths$rds, "\n") + cat("CSV export:", export_paths$csv, "\n") + + if (!is.null(farm_kpi_results)) { + cat("\nFarm-level KPIs: CALCULATED\n") + } else { + cat("\nFarm-level KPIs: SKIPPED (kpi_utils.R not available)\n") + } + + cat("\n✓ Consolidated KPI calculation complete!\n") + cat(" - Per-field data exported\n") + cat(" - Farm-level KPIs calculated\n") + cat(" - All outputs in:", reports_dir, "\n\n") +} + +if (sys.nframe() == 0) { + main() +}