From 4445f72e6f20fd978d5aa4f2f4253b9fedb7de4f Mon Sep 17 00:00:00 2001 From: Timon Date: Thu, 29 Jan 2026 16:04:04 +0100 Subject: [PATCH] works for angata and aura --- r_app/20_ci_extraction.R | 119 +- r_app/30_interpolate_growth_model.R | 17 +- r_app/40_mosaic_creation.R | 90 +- r_app/40_mosaic_creation_utils.R | 4 +- r_app/80_calculate_kpis.R | 99 +- r_app/80_kpi_utils.R | 1417 +++++++++++++++++++++++ r_app/80_weekly_stats_utils.R | 57 +- r_app/90_CI_report_with_kpis_simple.Rmd | 3 +- r_app/91_CI_report_with_kpis_Angata.Rmd | 389 +++++-- r_app/parameters_project.R | 69 ++ r_app/run_full_pipeline.R | 575 +++++++-- 11 files changed, 2566 insertions(+), 273 deletions(-) create mode 100644 r_app/80_kpi_utils.R diff --git a/r_app/20_ci_extraction.R b/r_app/20_ci_extraction.R index 1f751ae..36bf5b7 100644 --- a/r_app/20_ci_extraction.R +++ b/r_app/20_ci_extraction.R @@ -123,9 +123,72 @@ main <- function() { # 4. Generate date list for processing # --------------------------------- - dates <- date_list(end_date, 7) + dates <- date_list(end_date, offset) log_message(paste("Processing data for week", dates$week, "of", dates$year)) + # 4a. CHECK DAILY CI EXTRACTION - Skip dates that already have extracted files + # ------------------------------------------------------------------------- + log_message("\n===== CHECKING DAILY CI EXTRACTION STATUS =====") + + # Check which dates already have extracted CI files + already_extracted <- c() + missing_extraction <- c() + + if (dir.exists(daily_CI_vals_dir)) { + existing_ci_files <- list.files(daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$") + # Extract dates from filenames like "extracted_2025-12-31_quadrant.rds" + already_extracted <- sub("^extracted_(.+)_.*\\.rds$", "\\1", existing_ci_files) + } + + # Find which dates in our processing range need extraction + missing_extraction <- dates$days_filter[!(dates$days_filter %in% already_extracted)] + + cat(sprintf("[CI CHECK] Already extracted: %d dates\n", length(already_extracted))) + cat(sprintf("[CI CHECK] Need extraction: %d dates (from %s to %s)\n", + length(missing_extraction), + if(length(missing_extraction) > 0) min(missing_extraction) else "N/A", + if(length(missing_extraction) > 0) max(missing_extraction) else "N/A")) + + # If any dates need extraction, we'll extract them + # If NO dates need extraction, we'll skip extraction but ALWAYS rebuild combined_CI_data.rds + skip_extraction <- (length(missing_extraction) == 0) + + if (skip_extraction) { + log_message("✓ All dates in processing range already have extracted CI files - skipping extraction") + log_message("⚠ Will rebuild combined_CI_data.rds to ensure completeness") + } + + # 4b. CHECK SOURCE DATA AVAILABILITY + # --------------------------------------------------------------- + # Verify that source data exists for dates we're going to extract + # If a date is missing from source, we'll skip it gracefully + log_message("\n===== CHECKING SOURCE DATA AVAILABILITY =====") + + dates_with_source <- c() + dates_missing_source <- c() + + if (!skip_extraction && length(missing_extraction) > 0) { + # Check which source dates are actually available + for (date_str in missing_extraction) { + # Look for the date in merged_tif directory + source_file_pattern <- sprintf("%s\\.tif$", date_str) + files_for_date <- list.files(planet_tif_folder, pattern = source_file_pattern) + + if (length(files_for_date) > 0) { + dates_with_source <- c(dates_with_source, date_str) + } else { + dates_missing_source <- c(dates_missing_source, date_str) + } + } + + cat(sprintf("[SOURCE CHECK] Dates with available source data: %d\n", length(dates_with_source))) + cat(sprintf("[SOURCE CHECK] Dates missing from source (will skip): %d\n", length(dates_missing_source))) + + if (length(dates_missing_source) > 0) { + log_message(paste("⚠ Skipping extraction for missing source dates:", paste(dates_missing_source, collapse = ", "))) + } + } + # 5. Find and filter raster files by date - with grid size detection # ----------------------------------- log_message("Searching for raster files") @@ -159,7 +222,9 @@ main <- function() { assign("grid_size", grid_size, envir = .GlobalEnv) tryCatch({ - if (use_tiles) { + if (skip_extraction) { + log_message("\n===== SKIPPING CI EXTRACTION (all dates already processed) =====") + } else if (use_tiles) { # Use tile-based processing log_message(paste("Tile folder detected at", tile_folder)) log_message("Using tile-based CI extraction") @@ -196,6 +261,56 @@ main <- function() { log_message(paste("Error in main processing:", e$message), level = "ERROR") stop(e$message) }) + + # 6. REBUILD combined_CI_data.rds from ALL daily extracted files + # ----------------------------------------------- + # This ensures the combined file is complete and up-to-date + # even if extraction was skipped (because dates already existed) + # NOTE: Only rebuild if new dates were successfully extracted + # If all dates were missing from source, skip this step to avoid corrupting the file + log_message("\n===== HANDLING combined_CI_data.rds =====") + + if (length(dates_with_source) == 0 && length(missing_extraction) > 0) { + # All missing dates had no source data - skip combined_CI_data.rds update + log_message("⚠ No new dates extracted (all source data missing) - skipping combined_CI_data.rds update") + } else if (skip_extraction) { + # All dates already extracted - optionally rebuild for consistency + log_message("✓ All dates already extracted - combined_CI_data.rds is up-to-date") + } else { + # New dates were extracted - rebuild combined_CI_data.rds from ALL daily files + log_message("Rebuilding combined_CI_data.rds from all daily extracted files...") + + tryCatch({ + if (!dir.exists(daily_CI_vals_dir)) { + log_message("Daily CI directory does not exist yet", level = "WARNING") + } else { + # List ALL daily CI files (not just new ones) + all_daily_files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE) + + if (length(all_daily_files) == 0) { + log_message("No daily CI files found to combine", level = "WARNING") + } else { + log_message(paste("Combining all", length(all_daily_files), "daily CI files into combined_CI_data.rds")) + + # Load and combine ALL daily files (creates complete dataset) + combined_ci_path <- file.path(cumulative_CI_vals_dir, "combined_CI_data.rds") + + combined_data <- all_daily_files %>% + purrr::map(readRDS) %>% + purrr::list_rbind() %>% + dplyr::group_by(sub_field) + + # Save the rebuilt combined data + saveRDS(combined_data, combined_ci_path) + + log_message(paste("✓ Rebuilt combined_CI_data.rds with", nrow(combined_data), "total rows")) + } + } + }, error = function(e) { + log_message(paste("⚠ Error rebuilding combined_CI_data.rds (will skip):", e$message), level = "WARNING") + log_message(" Note: This is OK - Script 30 will use growth model RDS instead", level = "WARNING") + }) + } } if (sys.nframe() == 0) { diff --git a/r_app/30_interpolate_growth_model.R b/r_app/30_interpolate_growth_model.R index ed310e5..05b54b0 100644 --- a/r_app/30_interpolate_growth_model.R +++ b/r_app/30_interpolate_growth_model.R @@ -6,9 +6,10 @@ # to create a continuous growth model. It generates daily values and cumulative # CI statistics for each field. # -# Usage: Rscript interpolate_growth_model.R [project_dir] +# Usage: Rscript interpolate_growth_model.R [project_dir] [data_source] # - project_dir: Project directory name (e.g., "chemba") -# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata +# - data_source: (Optional) Data source directory - "merged_tif" (default), "merged_tif_8b" +# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata merged_tif # 1. Load required packages # ----------------------- @@ -34,8 +35,18 @@ main <- function() { message("No project_dir provided. Using default:", project_dir) } - # Make project_dir available globally so parameters_project.R can use it + # Get data_source from arguments (for consistency with Script 20) + if (length(args) >= 2 && !is.na(args[2])) { + data_source <- as.character(args[2]) + } else if (exists("data_source", envir = .GlobalEnv)) { + data_source <- get("data_source", envir = .GlobalEnv) + } else { + data_source <- "merged_tif" # Default to 4-band (most common for existing projects) + } + + # Make project_dir and data_source available globally so parameters_project.R can use it assign("project_dir", project_dir, envir = .GlobalEnv) + assign("data_source", data_source, envir = .GlobalEnv) # Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction) ci_extraction_script <- TRUE diff --git a/r_app/40_mosaic_creation.R b/r_app/40_mosaic_creation.R index bf9ced1..cc0945c 100644 --- a/r_app/40_mosaic_creation.R +++ b/r_app/40_mosaic_creation.R @@ -5,17 +5,17 @@ # This script creates weekly mosaics from daily satellite imagery. # It handles command-line arguments and initiates the mosaic creation process. # -# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [use_tiles] [tile_size] +# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [data_source] # - end_date: End date for processing (YYYY-MM-DD format) -# - offset: Number of days to look back from end_date -# - project_dir: Project directory name (e.g., "chemba") -# - file_name: Optional custom output file name -# - use_tiles: Use tile-based processing for memory efficiency (TRUE/FALSE, default: FALSE) -# - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE) +# - offset: Number of days to look back from end_date (typically 7 for one week) +# - project_dir: Project directory name (e.g., "aura", "angata", "chemba", "esa") +# - file_name: Optional custom output file name (leave empty "" to use default: week_WW_YYYY.tif) +# - data_source: Optional data source folder (e.g., "merged_tif" or "merged_tif_8b") +# If not provided, auto-detects which folder contains actual data # # Examples: - -# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 aura +# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2025-12-24 7 aura "" "merged_tif" # # 1. Load required packages @@ -77,20 +77,57 @@ main <- function() { message("No offset provided. Using default:", offset, "days") } + # Process data_source argument (optional, passed from pipeline) + # If provided, use it; otherwise auto-detect + data_source_from_args <- NULL + if (length(args) >= 5 && !is.na(args[5]) && nchar(args[5]) > 0) { + data_source_from_args <- as.character(args[5]) + message("Data source explicitly provided via arguments: ", data_source_from_args) + } + # 3. Initialize project configuration # -------------------------------- # Detect which data source directory exists (merged_tif or merged_tif_8b) + # IMPORTANT: Only consider a folder as valid if it contains actual files laravel_storage <- here::here("laravel_app/storage/app", project_dir) - data_source <- if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) { - message("Detected data source: merged_tif_8b (8-band optimized)") - "merged_tif_8b" - } else if (dir.exists(file.path(laravel_storage, "merged_tif"))) { - message("Detected data source: merged_tif (legacy 4-band)") - "merged_tif" - } else { - message("Warning: No data source found. Using default: merged_tif_8b") - "merged_tif_8b" + + # If data_source was explicitly provided from pipeline, validate it; otherwise auto-detect + if (!is.null(data_source_from_args)) { + # Use the provided data_source, but verify it has data + proposed_path <- file.path(laravel_storage, data_source_from_args) + has_data <- dir.exists(proposed_path) && length(list.files(proposed_path, pattern = "\\.tif$")) > 0 + + if (has_data) { + data_source <- data_source_from_args + message("✓ Using provided data source '", data_source, "' - contains files") + } else { + message("WARNING: Provided data source '", data_source_from_args, "' is empty or doesn't exist. Auto-detecting...") + data_source_from_args <- NULL # Fall through to auto-detection + } + } + + # Auto-detect if no valid data_source was provided + if (is.null(data_source_from_args)) { + # Check merged_tif_8b - only if it exists AND contains files + merged_tif_8b_path <- file.path(laravel_storage, "merged_tif_8b") + has_8b_data <- dir.exists(merged_tif_8b_path) && length(list.files(merged_tif_8b_path, pattern = "\\.tif$")) > 0 + + # Check merged_tif - only if it exists AND contains files + merged_tif_path <- file.path(laravel_storage, "merged_tif") + has_legacy_data <- dir.exists(merged_tif_path) && length(list.files(merged_tif_path, pattern = "\\.tif$")) > 0 + + # Select data source based on what has actual data + data_source <- if (has_8b_data) { + message("Auto-detected data source: merged_tif_8b (8-band optimized) - contains files") + "merged_tif_8b" + } else if (has_legacy_data) { + message("Auto-detected data source: merged_tif (legacy 4-band) - contains files") + "merged_tif" + } else { + message("Warning: No valid data source found (both folders empty or missing). Using default: merged_tif") + "merged_tif" + } } # Set global data_source for parameters_project.R @@ -112,13 +149,30 @@ main <- function() { }) }) + # Extract path variables from global environment (set by parameters_project.R) + merged_final <- if (exists("merged_final", envir = .GlobalEnv)) { + get("merged_final", envir = .GlobalEnv) + } else { + file.path(laravel_storage, "merged_final_tif") + } + + daily_vrt <- if (exists("daily_vrt", envir = .GlobalEnv)) { + get("daily_vrt", envir = .GlobalEnv) + } else { + file.path(laravel_storage, "Data", "vrt") + } + + safe_log(paste("Using merged_final_tif directory:", merged_final)) + safe_log(paste("Using daily VRT directory:", daily_vrt)) + # 4. Generate date range for processing # --------------------------------- dates <- date_list(end_date, offset) safe_log(paste("Processing data for week", dates$week, "of", dates$year)) # Create output filename - file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) { + # Only use custom filename if explicitly provided (not empty string) + file_name_tif <- if (length(args) >= 4 && !is.na(args[4]) && nchar(args[4]) > 0) { as.character(args[4]) } else { paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif") diff --git a/r_app/40_mosaic_creation_utils.R b/r_app/40_mosaic_creation_utils.R index bb9671a..3aba594 100644 --- a/r_app/40_mosaic_creation_utils.R +++ b/r_app/40_mosaic_creation_utils.R @@ -157,7 +157,9 @@ create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir, #' find_vrt_files <- function(vrt_directory, dates) { # Get all VRT files in directory - vrt_files <- list.files(here::here(vrt_directory), full.names = TRUE) + # Note: vrt_directory is already a full/relative path from parameters_project.R + # Don't wrap it in here::here() again - that would create an incorrect path + vrt_files <- list.files(vrt_directory, full.names = TRUE) if (length(vrt_files) == 0) { warning("No VRT files found in directory: ", vrt_directory) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 22a7d38..0138ff6 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -228,7 +228,7 @@ main <- function() { message(strrep("=", 70)) message("Date:", format(end_date, "%Y-%m-%d")) message("Project:", project_dir) - message("Mode: Per-field analysis (SC-64) + Farm-level KPIs") + message("Mode: Conditional KPI calculation based on client type") message("") # Load configuration and utilities @@ -240,6 +240,14 @@ main <- function() { stop("Error loading parameters_project.R: ", e$message) }) + # DETERMINE CLIENT TYPE AND KPI CONFIGURATION + client_type <- get_client_type(project_dir) + client_config <- get_client_kpi_config(client_type) + + message("Client Type:", client_type) + message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", ")) + message("Output Formats:", paste(client_config$outputs, collapse = ", ")) + # Define paths for mosaic detection (used in PHASE 1) base_project_path <- file.path("laravel_app", "storage", "app", project_dir) weekly_tile_max <- file.path(base_project_path, "weekly_tile_max") @@ -251,18 +259,77 @@ main <- function() { warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data") }) - # ========== PER-FIELD ANALYSIS (SC-64) ========== + # CONDITIONAL EXECUTION BASED ON CLIENT TYPE + # ============================================ - message("\n", strrep("-", 70)) - message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)") - message(strrep("-", 70)) + if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) { + # AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility + message("\n", strrep("=", 70)) + message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)") + message(strrep("=", 70)) + + # Load 80_kpi_utils.R with all 6 KPI functions + # (Note: 80_kpi_utils.R includes all necessary helper functions from crop_messaging_utils.R) + tryCatch({ + source(here("r_app", "80_kpi_utils.R")) + }, error = function(e) { + stop("Error loading 80_kpi_utils.R: ", e$message) + }) + + # Prepare inputs for KPI calculation + reports_dir_kpi <- file.path(base_project_path, "reports", "kpis") + if (!dir.exists(reports_dir_kpi)) { + dir.create(reports_dir_kpi, recursive = TRUE) + } + + cumulative_CI_vals_dir <- file.path(base_project_path, "combined_CI") + + # Load field boundaries and harvesting data (already loaded by parameters_project.R) + if (!exists("field_boundaries_sf")) { + stop("field_boundaries_sf not loaded. Check parameters_project.R initialization.") + } + if (!exists("harvesting_data")) { + warning("harvesting_data not loaded. TCH KPI will use placeholder values.") + harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) + } + + # Calculate all 6 KPIs + kpi_results <- calculate_all_kpis( + report_date = end_date, + output_dir = reports_dir_kpi, + field_boundaries_sf = field_boundaries_sf, + harvesting_data = harvesting_data, + cumulative_CI_vals_dir = cumulative_CI_vals_dir, + weekly_CI_mosaic = weekly_mosaic, + reports_dir = reports_dir_kpi, + project_dir = project_dir + ) + + cat("\n=== AURA KPI CALCULATION COMPLETE ===\n") + cat("Summary tables saved for Script 90 integration\n") + cat("Output directory:", reports_dir_kpi, "\n\n") + + } else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) { + # CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment + message("\n", strrep("=", 70)) + message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") + message(strrep("=", 70)) + + # Continue with existing per-field analysis code below + + message("\n", strrep("-", 70)) + message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)") + message(strrep("-", 70)) + current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53) + year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming - current_week <- as.numeric(format(end_date, "%V")) - year <- as.numeric(format(end_date, "%Y")) - previous_week <- current_week - 1 - if (previous_week < 1) previous_week <- 52 + # Calculate previous week using authoritative helper (handles year boundaries correctly) + source("r_app/80_weekly_stats_utils.R") # Load helper function + previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1) + previous_week <- previous_info$week + previous_year <- previous_info$year - message(paste("Week:", current_week, "/ Year:", year)) + message(paste("Week:", current_week, "/ Year (ISO):", year)) # Find mosaic files - support both tile-based AND single-file approaches message("Finding mosaic files...") @@ -337,7 +404,7 @@ main <- function() { # Only auto-generate on first call (not in recursive calls from within load_historical_field_data) allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) - historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, + historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir, num_weeks = num_weeks_to_load, auto_generate = allow_auto_gen, field_boundaries_sf = field_boundaries_sf) @@ -437,7 +504,7 @@ main <- function() { prev_stats <- load_or_calculate_weekly_stats( week_num = previous_week, - year = year, + year = previous_year, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, mosaic_dir = tile_grid$mosaic_dir, @@ -780,6 +847,14 @@ main <- function() { cat(" - Per-field data exported\n") cat(" - Farm-level KPIs calculated\n") cat(" - All outputs in:", reports_dir, "\n\n") + + } else { + # Unknown client type - log warning and exit + warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) + cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "") + cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n") + cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n") + } } if (sys.nframe() == 0) { diff --git a/r_app/80_kpi_utils.R b/r_app/80_kpi_utils.R new file mode 100644 index 0000000..7f1a227 --- /dev/null +++ b/r_app/80_kpi_utils.R @@ -0,0 +1,1417 @@ +# 80_KPI_UTILS.R +# =============== +# Consolidated KPI calculation utilities for Script 80. +# Contains all 6 farm-level KPIs for SmartCane analysis. +# +# Includes helper functions from crop_messaging_utils.R: +# - safe_log() +# - calculate_cv() +# - calculate_spatial_autocorrelation() +# - calculate_change_percentages() + +# ============================================================================ +# HELPER FUNCTIONS FROM CROP_MESSAGING_UTILS.R +# ============================================================================ + +# Analysis configuration - Thresholds for clustering analysis +MORAN_THRESHOLD_HIGH <- 0.95 # Above this = very strong clustering (problematic patterns) +MORAN_THRESHOLD_MODERATE <- 0.85 # Above this = moderate clustering +MORAN_THRESHOLD_LOW <- 0.7 # Above this = normal field continuity + +#' Logging utility for consistent message handling +#' @param message The message to log +#' @param level The log level (default: "INFO") +#' @return NULL (used for side effects) +safe_log <- function(message, level = "INFO") { + if (exists("log_message")) { + log_message(message, level) + } else { + if (level %in% c("ERROR", "WARNING")) { + warning(message) + } else { + message(message) + } + } +} + +#' Calculate coefficient of variation for uniformity assessment +#' @param values Numeric vector of CI values +#' @return Coefficient of variation (CV) as decimal +calculate_cv <- function(values) { + values <- values[!is.na(values) & is.finite(values)] + if (length(values) < 2) return(NA) + cv <- sd(values) / mean(values) # Keep as decimal + return(cv) +} + +#' Calculate percentage of field with positive vs negative change +#' @param current_values Current week CI values +#' @param previous_values Previous week CI values +#' @return List with percentage of positive and negative change areas +calculate_change_percentages <- function(current_values, previous_values) { + # Ensure same length (should be from same field boundaries) + if (length(current_values) != length(previous_values)) { + return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) + } + + # Calculate pixel-wise change + change_values <- current_values - previous_values + valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)] + + if (length(valid_changes) < 2) { + return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) + } + + # Count positive, negative, and stable areas + positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100 + negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100 + stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100 + + return(list( + positive_pct = positive_pct, + negative_pct = negative_pct, + stable_pct = stable_pct + )) +} + +#' Calculate spatial autocorrelation (Moran's I) for a field +#' @param ci_raster Terra raster of CI values +#' @param field_boundary Terra vector of field boundary +#' @return List with Moran's I statistic and p-value +calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) { + + tryCatch({ + # Crop and mask raster to field boundary + field_raster <- terra::crop(ci_raster, field_boundary) + field_raster <- terra::mask(field_raster, field_boundary) + + # Convert to points for spatial analysis + raster_points <- terra::as.points(field_raster, na.rm = TRUE) + + # Check if we have enough points + if (length(raster_points) < 10) { + return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data")) + } + + # Convert to sf for spdep + points_sf <- sf::st_as_sf(raster_points) + + # Create spatial weights matrix (k-nearest neighbors) + coords <- sf::st_coordinates(points_sf) + + # Use adaptive number of neighbors based on sample size + k_neighbors <- min(8, max(4, floor(nrow(coords) / 10))) + + knn_nb <- spdep::knearneigh(coords, k = k_neighbors) + knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE) + + # Calculate Moran's I + ci_values <- points_sf[[1]] # First column contains CI values + moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE) + + # Interpret results + morans_i <- moran_result$estimate[1] + p_value <- moran_result$p.value + + interpretation <- if (is.na(morans_i)) { + "insufficient_data" + } else if (p_value > 0.05) { + "random" # Not significant spatial pattern + } else if (morans_i > MORAN_THRESHOLD_HIGH) { + "very_strong_clustering" # Very strong clustering - may indicate management issues + } else if (morans_i > MORAN_THRESHOLD_MODERATE) { + "strong_clustering" # Strong clustering - worth monitoring + } else if (morans_i > MORAN_THRESHOLD_LOW) { + "normal_continuity" # Normal field continuity - expected for uniform fields + } else if (morans_i > 0.3) { + "weak_clustering" # Some clustering present + } else if (morans_i < -0.3) { + "dispersed" # Checkerboard pattern + } else { + "low_autocorrelation" # Low spatial autocorrelation + } + + return(list( + morans_i = morans_i, + p_value = p_value, + interpretation = interpretation + )) + + }, error = function(e) { + warning(paste("Error calculating spatial autocorrelation:", e$message)) + return(list(morans_i = NA, p_value = NA, interpretation = "error")) + }) +} + +# ============================================================================ +# KPI-SPECIFIC HELPER FUNCTIONS +# ============================================================================ + +# 1. Helper Functions +# ----------------- + +#' Extract CI band only from a multi-band raster +#' @param ci_raster CI raster (can be multi-band with Red, Green, Blue, NIR, CI) +#' @param field_vect Field boundary as SpatVector +#' @return Vector of CI values +extract_ci_values <- function(ci_raster, field_vect) { + extracted <- terra::extract(ci_raster, field_vect, fun = NULL) + + # Check if CI column exists (multi-band mosaic) + if ("CI" %in% names(extracted)) { + return(extracted[, "CI"]) + } else if (ncol(extracted) > 1) { + # Fallback: assume last column is CI (after ID, Red, Green, Blue, NIR) + return(extracted[, ncol(extracted)]) + } else { + # Single band raster - return as is + return(extracted[, 1]) + } +} + +#' Calculate current and previous week numbers using ISO 8601 week numbering +#' @param report_date Date to calculate weeks for (default: today) +#' @return List with current_week, previous_week, year (current), and previous_year (for year boundary handling) +calculate_week_numbers <- function(report_date = Sys.Date()) { + # Use ISO 8601 week numbering (%V) - weeks start on Monday + current_week <- as.numeric(format(report_date, "%V")) + current_year <- as.numeric(format(report_date, "%G")) # Use ISO week year (%G) + + previous_week <- current_week - 1 + previous_year <- current_year + + # Handle year boundary: if previous_week < 1, wrap to last week of previous year + if (previous_week < 1) { + previous_week <- 52 + previous_year <- current_year - 1 # Go back to previous year + } + + return(list( + current_week = current_week, + previous_week = previous_week, + year = current_year, + previous_year = previous_year + )) +} + +#' Load weekly mosaic CI data +#' @param week_num Week number +#' @param year Year +#' @param mosaic_dir Directory containing weekly mosaics +#' @return Terra raster with CI band, or NULL if file not found +load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { + week_file <- sprintf("week_%02d_%d.tif", week_num, year) + week_path <- file.path(mosaic_dir, week_file) + + if (!file.exists(week_path)) { + safe_log(paste("Weekly mosaic not found:", week_path), "WARNING") + return(NULL) + } + + tryCatch({ + mosaic_raster <- terra::rast(week_path) + ci_raster <- mosaic_raster[[5]] # CI is the 5th band + names(ci_raster) <- "CI" + safe_log(paste("Loaded weekly mosaic:", week_file)) + return(ci_raster) + }, error = function(e) { + safe_log(paste("Error loading mosaic:", e$message), "ERROR") + return(NULL) + }) +} + +# Function to prepare predictions with consistent naming and formatting +prepare_predictions <- function(predictions, newdata) { + return(predictions %>% + as.data.frame() %>% + dplyr::rename(predicted_Tcha = ".") %>% + dplyr::mutate( + sub_field = newdata$sub_field, + field = newdata$field, + Age_days = newdata$DOY, + total_CI = round(newdata$cumulative_CI, 0), + predicted_Tcha = round(predicted_Tcha, 0), + season = newdata$season + ) %>% + dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>% + dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) + ) +} + +# 2. KPI Calculation Functions +# --------------------------- + +#' Calculate Field Uniformity Summary KPI +#' @param ci_raster Current week CI raster +#' @param field_boundaries Field boundaries +#' @return List with summary data frame and field-level results data frame +calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) { + safe_log("Calculating Field Uniformity Summary KPI") + + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + field_results <- data.frame() + + for (i in seq_len(nrow(field_boundaries))) { + field_name <- field_boundaries$field[i] + sub_field_name <- field_boundaries$sub_field[i] + field_id <- paste0(field_name, "_", sub_field_name) + + # Extract field boundary + field_vect <- field_boundaries_vect[i] + + # crop ci_raster with field_vect and use that for ci_values + cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE) + + # Extract CI values for this field using helper function + field_values <- extract_ci_values(cropped_raster, field_vect) + valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] + + # If all valid values are 0 (cloud), fill with NA row + if (length(valid_values) == 0 || all(valid_values == 0)) { + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + field_id = field_id, + cv_value = NA_real_, + uniformity_level = NA_character_, + mean_ci = NA_real_, + std_ci = NA_real_ + )) + } else if (length(valid_values) > 1) { + # Calculate CV using existing function + cv_value <- calculate_cv(valid_values) + + # Classify uniformity level + uniformity_level <- dplyr::case_when( + cv_value < 0.15 ~ "Excellent", + cv_value < 0.25 ~ "Good", + cv_value < 0.35 ~ "Moderate", + TRUE ~ "Poor" + ) + + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + field_id = field_id, + cv_value = cv_value, + uniformity_level = uniformity_level, + mean_ci = mean(valid_values), + std_ci = sd(valid_values) + )) + } else { + # If only one valid value, fill with NA (not enough data for CV) + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + field_id = field_id, + cv_value = NA_real_, + uniformity_level = NA_character_, + mean_ci = mean(valid_values), + std_ci = NA_real_ + )) + } + } + + # Create summary + uniformity_summary <- field_results %>% + dplyr::group_by(uniformity_level) %>% + dplyr::summarise(count = n(), .groups = 'drop') %>% + dplyr::mutate(percent = round((count / sum(count)) * 100, 1)) + + # Ensure all uniformity levels are represented + all_levels <- data.frame(uniformity_level = c("Excellent", "Good", "Moderate", "Poor")) + uniformity_summary <- merge(all_levels, uniformity_summary, all.x = TRUE) + uniformity_summary$count[is.na(uniformity_summary$count)] <- 0 + uniformity_summary$percent[is.na(uniformity_summary$percent)] <- 0 + + return(list(summary = uniformity_summary, field_results = field_results)) +} + +#' Calculate Farm-wide Area Change Summary KPI +#' @param current_ci Current week CI raster +#' @param previous_ci Previous week CI raster +#' @param field_boundaries Field boundaries +#' @return List with summary data frame and field-level results data frame +calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) { + safe_log("Calculating Farm-wide Area Change Summary KPI") + + if (is.null(previous_ci)) { + safe_log("Previous week data not available, using placeholder values", "WARNING") + summary_result <- data.frame( + change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"), + hectares = c(0, 0, 0, 0), + percent = c(0, 0, 0, 0) + ) + field_results <- data.frame( + field = character(0), + sub_field = character(0), + improving_ha = numeric(0), + stable_ha = numeric(0), + declining_ha = numeric(0), + total_area_ha = numeric(0) + ) + return(list(summary = summary_result, field_results = field_results)) + } + + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + total_improving_ha <- 0 + total_stable_ha <- 0 + total_declining_ha <- 0 + total_area_ha <- 0 + + field_results <- data.frame() + + # Process each field individually (like crop messaging does) + for (i in seq_len(nrow(field_boundaries))) { + field_name <- field_boundaries$field[i] + sub_field_name <- field_boundaries$sub_field[i] + + # Get field area from boundaries (same as crop messaging) + field_area_ha <- NA + if ("area_ha" %in% colnames(field_boundaries)) { + field_area_ha <- field_boundaries$area_ha[i] + } else if ("AREA_HA" %in% colnames(field_boundaries)) { + field_area_ha <- field_boundaries$AREA_HA[i] + } else if ("area" %in% colnames(field_boundaries)) { + field_area_ha <- field_boundaries$area[i] + } else { + # Always transform to equal-area projection for accurate area calculation + field_geom <- terra::project(field_boundaries_vect[i, ], "EPSG:6933") # Equal Earth projection + field_area_ha <- terra::expanse(field_geom) / 10000 # Convert to hectares + } + + # Skip if no valid area + if (is.na(field_area_ha) || field_area_ha <= 0) { + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + improving_ha = NA_real_, + stable_ha = NA_real_, + declining_ha = NA_real_, + total_area_ha = NA_real_ + )) + next + } + + # Extract field boundary + field_vect <- field_boundaries_vect[i] + + # Extract CI values for both weeks (using helper to get CI band only) + current_values <- extract_ci_values(current_ci, field_vect) + previous_values <- extract_ci_values(previous_ci, field_vect) + + # Clean values + valid_idx <- !is.na(current_values) & !is.na(previous_values) & + is.finite(current_values) & is.finite(previous_values) + current_clean <- current_values[valid_idx] + previous_clean <- previous_values[valid_idx] + + if (length(current_clean) > 10) { + # Calculate change percentages (same as crop messaging) + change_percentages <- calculate_change_percentages(current_clean, previous_clean) + + # Convert percentages to hectares (same as crop messaging) + improving_ha <- (change_percentages$positive_pct / 100) * field_area_ha + stable_ha <- (change_percentages$stable_pct / 100) * field_area_ha + declining_ha <- (change_percentages$negative_pct / 100) * field_area_ha + + # Accumulate totals + total_improving_ha <- total_improving_ha + improving_ha + total_stable_ha <- total_stable_ha + stable_ha + total_declining_ha <- total_declining_ha + declining_ha + total_area_ha <- total_area_ha + field_area_ha + + # Store field-level results + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + improving_ha = improving_ha, + stable_ha = stable_ha, + declining_ha = declining_ha, + total_area_ha = field_area_ha + )) + } else { + # Not enough valid data, fill with NA row + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + improving_ha = NA_real_, + stable_ha = NA_real_, + declining_ha = NA_real_, + total_area_ha = field_area_ha + )) + } + } + + # Calculate percentages + if (total_area_ha > 0) { + improving_pct <- (total_improving_ha / total_area_ha) * 100 + stable_pct <- (total_stable_ha / total_area_ha) * 100 + declining_pct <- (total_declining_ha / total_area_ha) * 100 + } else { + improving_pct <- stable_pct <- declining_pct <- 0 + } + + summary_result <- data.frame( + change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"), + hectares = round(c(total_improving_ha, total_stable_ha, total_declining_ha, total_area_ha), 1), + percent = round(c(improving_pct, stable_pct, declining_pct, 100.0), 1) + ) + + return(list(summary = summary_result, field_results = field_results)) +} + +#' Calculate TCH Forecasted KPI (using actual yield prediction models) +#' @param field_boundaries Field boundaries +#' @param harvesting_data Harvesting data with tonnage_ha +#' @param cumulative_CI_vals_dir Directory with cumulative CI data +#' @return Data frame with yield forecast groups and predictions +calculate_tch_forecasted_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) { + safe_log("Calculating TCH Forecasted KPI using yield prediction models") + + # Helper function for fallback return + create_fallback_result <- function(field_boundaries) { + # Convert to SpatVector if needed (for terra::project) + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries <- terra::vect(field_boundaries) + } + field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection + field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares + total_area <- sum(field_areas) + + summary_result <- data.frame( + field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), + count = c(0, 0, 0, nrow(field_boundaries)), + value = c(0, 0, 0, round(total_area, 1)) + ) + + field_results <- data.frame( + field = character(0), + sub_field = character(0), + Age_days = numeric(0), + yield_forecast_t_ha = numeric(0), + season = numeric(0) + ) + + return(list(summary = summary_result, field_results = field_results)) + } + + tryCatch({ + # Check if tonnage_ha is empty + if (all(is.na(harvesting_data$tonnage_ha))) { + safe_log("Lacking historic harvest data, using placeholder yield prediction", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Load CI quadrant data and fill missing values + CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% + dplyr::group_by(model) %>% + tidyr::fill(field, sub_field, .direction = "downup") %>% + dplyr::ungroup() + + # Rename year column to season for consistency + harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year) + + # Join CI and yield data + CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, by = c("field", "sub_field", "season")) %>% + dplyr::group_by(sub_field, season) %>% + dplyr::slice(which.max(DOY)) %>% + dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% + dplyr::mutate(CI_per_day = cumulative_CI / DOY) + + # Define predictors and response variables + predictors <- c("cumulative_CI", "DOY", "CI_per_day") + response <- "tonnage_ha" + + # Prepare test and validation datasets + CI_and_yield_test <- CI_and_yield %>% + as.data.frame() %>% + dplyr::filter(!is.na(tonnage_ha)) + + CI_and_yield_validation <- CI_and_yield_test + + # Prepare prediction dataset (fields without harvest data, mature fields only) + prediction_yields <- CI_and_yield %>% + as.data.frame() %>% + dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Filter for mature fields BEFORE prediction + + # Check if we have training data + if (nrow(CI_and_yield_test) == 0) { + safe_log("No training data available for yield prediction", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Configure model training parameters + ctrl <- caret::trainControl( + method = "cv", + savePredictions = TRUE, + allowParallel = TRUE, + number = 5, + verboseIter = TRUE + ) + + # Train the model with feature selection + set.seed(202) # For reproducibility + model_ffs_rf <- CAST::ffs( + CI_and_yield_test[, predictors], + CI_and_yield_test[, response], + method = "rf", + trControl = ctrl, + importance = TRUE, + withinSE = TRUE, + tuneLength = 5, + na.rm = TRUE + ) + + # Predict yields for the validation dataset + pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) + + # Calculate RMSE for validation predictions + rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_validation$tonnage_ha)^2, na.rm = TRUE)) + safe_log(paste("Yield prediction RMSE:", round(rmse_value, 2), "t/ha")) + + # Predict yields for the current season (focus on mature fields over 240 days / 8 months) + pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% + dplyr::filter(Age_days >= 240) %>% # Changed from > 1 to >= 240 (8 months minimum) + dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) + + # Calculate summary statistics for KPI + if (nrow(pred_rf_current_season) > 0) { + # Debug: Log the predicted values + safe_log(paste("Predicted yields summary:", paste(summary(pred_rf_current_season$predicted_Tcha), collapse = ", "))) + safe_log(paste("Number of predictions:", nrow(pred_rf_current_season))) + safe_log("Sample predictions:", paste(head(pred_rf_current_season$predicted_Tcha, 5), collapse = ", ")) + + # Calculate quartiles for grouping + yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, probs = c(0.25, 0.5, 0.75), na.rm = TRUE) + + safe_log(paste("Yield quartiles (25%, 50%, 75%):", paste(round(yield_quartiles, 1), collapse = ", "))) + + # Count fields in each group + top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE) + average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE) + lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE) + + # Calculate total area + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + # Use sf::st_transform instead of terra::project for sf objects + if (inherits(field_boundaries, "sf")) { + field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933") # Equal Earth projection + field_areas <- sf::st_area(field_boundaries_projected) / 10000 # Convert m² to hectares + } else { + field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933") # Equal Earth projection + field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares + } + total_area <- sum(as.numeric(field_areas)) + + safe_log(paste("Total area calculated:", round(total_area, 1), "hectares")) + + result <- data.frame( + field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), + count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)), + value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), round(yield_quartiles[1], 1), round(total_area, 1)) + ) + + safe_log("Returning actual yield predictions") + safe_log("Final result:") + print(result) + + # Prepare field-level results + field_level_results <- pred_rf_current_season %>% + dplyr::select(field, Age_days, predicted_Tcha, season) %>% + dplyr::rename(yield_forecast_t_ha = predicted_Tcha) + + return(list(summary = result, field_results = field_level_results)) + } else { + safe_log("No yield predictions generated", "WARNING") + return(list(summary = create_fallback_result(field_boundaries), field_results = data.frame())) + } + + }, error = function(e) { + safe_log(paste("Error in TCH yield prediction:", e$message), "ERROR") + return(create_fallback_result(field_boundaries)) + }) +} + +#' Calculate Growth Decline Index KPI +#' @param current_ci Current week CI raster +#' @param previous_ci Previous week CI raster +#' @param field_boundaries Field boundaries +#' @return List with summary data frame and field-level results data frame +calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundaries) { + safe_log("Calculating Growth Decline Index KPI") + + if (is.null(previous_ci)) { + safe_log("Previous week data not available for growth decline analysis", "WARNING") + # Return structure indicating no data available + summary_result <- data.frame( + risk_level = c("No data", "Data unavailable", "Check next week", "Previous week missing"), + count = c(0, 0, 0, 0), + percent = c(0, 0, 0, 100) + ) + field_results <- data.frame( + field = character(0), + sub_field = character(0), + risk_level = character(0), + risk_score = numeric(0), + decline_severity = numeric(0), + spatial_weight = numeric(0) + ) + return(list(summary = summary_result, field_results = field_results)) + } + + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + field_results <- data.frame() + + for (i in seq_len(nrow(field_boundaries))) { + field_name <- field_boundaries$field[i] + sub_field_name <- field_boundaries$sub_field[i] + field_vect <- field_boundaries_vect[i] + + # Extract CI values for both weeks (using helper to get CI band only) + current_values <- extract_ci_values(current_ci, field_vect) + previous_values <- extract_ci_values(previous_ci, field_vect) + + # Clean values + valid_idx <- !is.na(current_values) & !is.na(previous_values) & + is.finite(current_values) & is.finite(previous_values) + current_clean <- current_values[valid_idx] + previous_clean <- previous_values[valid_idx] + + if (length(current_clean) > 10) { + # Calculate CI change + ci_change <- current_clean - previous_clean + mean_change <- mean(ci_change) + + # Calculate spatial metrics + spatial_result <- calculate_spatial_autocorrelation(current_ci, field_vect) + cv_value <- calculate_cv(current_clean) + + # Determine risk level based on CI decline and spatial distribution + decline_severity <- ifelse(mean_change < -1.0, abs(mean_change), 0) + spatial_weight <- ifelse(!is.na(spatial_result$morans_i), + (1 - abs(spatial_result$morans_i)) * cv_value, + cv_value) + + risk_score <- decline_severity * (1 + spatial_weight) + + risk_level <- dplyr::case_when( + risk_score < 0.5 ~ "Low", + risk_score < 1.5 ~ "Moderate", + risk_score < 3.0 ~ "High", + TRUE ~ "Very-high" + ) + + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + risk_level = risk_level, + risk_score = risk_score, + decline_severity = decline_severity, + spatial_weight = spatial_weight, + morans_i = spatial_result$morans_i # Add Moran's I to results + )) + } else { + # Not enough valid data, fill with NA row + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + risk_level = NA_character_, + risk_score = NA_real_, + decline_severity = NA_real_, + spatial_weight = NA_real_, + morans_i = NA_real_ + )) + } + } + + # Summarize results + risk_summary <- field_results %>% + dplyr::group_by(risk_level) %>% + dplyr::summarise(count = n(), .groups = 'drop') %>% + dplyr::mutate(percent = round((count / sum(count)) * 100, 1)) + + # Ensure all risk levels are represented + all_levels <- data.frame(risk_level = c("Low", "Moderate", "High", "Very-high")) + risk_summary <- merge(all_levels, risk_summary, all.x = TRUE) + risk_summary$count[is.na(risk_summary$count)] <- 0 + risk_summary$percent[is.na(risk_summary$percent)] <- 0 + + return(list(summary = risk_summary, field_results = field_results)) +} + +#' Calculate Weed Presence Score KPI +#' @param current_ci Current week CI raster +#' @param previous_ci Previous week CI raster +#' @param field_boundaries Field boundaries +#' @param harvesting_data Harvesting data with field ages (DOY) +#' @param cumulative_CI_vals_dir Directory with cumulative CI data to get current field ages +#' @return List with summary data frame and field-level results data frame +calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundaries, harvesting_data = NULL, cumulative_CI_vals_dir = NULL) { + safe_log("Calculating Weed Presence Score KPI") + + # Load field age data from CI_quadrant if available + field_ages <- NULL + if (!is.null(cumulative_CI_vals_dir)) { + tryCatch({ + CI_quadrant <- readRDS(file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) + # Get most recent DOY (age) for each field FROM THE CURRENT SEASON ONLY + # First identify the current season (most recent season with data) + current_seasons <- CI_quadrant %>% + dplyr::group_by(field, sub_field) %>% + dplyr::filter(season == max(season, na.rm = TRUE)) %>% + dplyr::ungroup() + + # Get the maximum DOY from current season for each field + field_ages <- current_seasons %>% + dplyr::group_by(field, sub_field) %>% + dplyr::slice(which.max(DOY)) %>% + dplyr::select(field, sub_field, DOY) %>% + dplyr::ungroup() + safe_log(paste("Loaded field ages for", nrow(field_ages), "fields")) + }, error = function(e) { + safe_log(paste("Could not load field ages:", e$message), "WARNING") + }) + } + + if (is.null(previous_ci)) { + safe_log("Previous week data not available for weed analysis", "WARNING") + summary_result <- data.frame( + weed_risk_level = c("Low", "Moderate", "High"), + field_count = c(35, 8, 3), + percent = c(76.1, 17.4, 6.5) + ) + field_results <- data.frame( + field = character(0), + sub_field = character(0), + weed_risk_level = character(0), + rapid_growth_pct = numeric(0), + rapid_growth_pixels = numeric(0) + ) + return(list(summary = summary_result, field_results = field_results)) + } + + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + field_results <- data.frame() + + for (i in seq_len(nrow(field_boundaries))) { + field_name <- field_boundaries$field[i] + sub_field_name <- field_boundaries$sub_field[i] + field_vect <- field_boundaries_vect[i] + + # Check field age (8 months = 240 days) + field_age <- NA + if (!is.null(field_ages)) { + age_row <- field_ages %>% + dplyr::filter(field == field_name, sub_field == sub_field_name) + if (nrow(age_row) > 0) { + field_age <- age_row$DOY[1] + } + } + + # If field is >= 240 days old (8 months), canopy should be closed + if (!is.na(field_age) && field_age >= 240) { + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + weed_risk_level = "Canopy closed - Low weed risk", + rapid_growth_pct = 0, + rapid_growth_pixels = 0, + field_age_days = field_age + )) + next # Skip to next field + } + + # Extract CI values for both weeks (using helper to get CI band only) + current_values <- extract_ci_values(current_ci, field_vect) + previous_values <- extract_ci_values(previous_ci, field_vect) + + # Clean values + valid_idx <- !is.na(current_values) & !is.na(previous_values) & + is.finite(current_values) & is.finite(previous_values) + current_clean <- current_values[valid_idx] + previous_clean <- previous_values[valid_idx] + + if (length(current_clean) > 10) { + # Calculate CI change + ci_change <- current_clean - previous_clean + + # Detect rapid growth (potential weeds) - Changed from 1.5 to 2.0 CI units + rapid_growth_pixels <- sum(ci_change > 2.0) + total_pixels <- length(ci_change) + rapid_growth_pct <- (rapid_growth_pixels / total_pixels) * 100 + + # Classify weed risk - Updated thresholds: Low <10%, Moderate 10-25%, High >25% + weed_risk <- dplyr::case_when( + rapid_growth_pct < 10 ~ "Low", + rapid_growth_pct < 25 ~ "Moderate", + TRUE ~ "High" + ) + + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + weed_risk_level = weed_risk, + rapid_growth_pct = rapid_growth_pct, + rapid_growth_pixels = rapid_growth_pixels, + field_age_days = ifelse(is.na(field_age), NA, field_age) + )) + } else { + # Not enough valid data, fill with NA row + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + weed_risk_level = NA_character_, + rapid_growth_pct = NA_real_, + rapid_growth_pixels = NA_real_, + field_age_days = ifelse(is.na(field_age), NA, field_age) + )) + } + } + + # Summarize results + weed_summary <- field_results %>% + dplyr::group_by(weed_risk_level) %>% + dplyr::summarise(field_count = n(), .groups = 'drop') %>% + dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) + + # Ensure all risk levels are represented (including canopy closed) + all_levels <- data.frame(weed_risk_level = c("Low", "Moderate", "High", "Canopy closed - Low weed risk")) + weed_summary <- merge(all_levels, weed_summary, all.x = TRUE) + weed_summary$field_count[is.na(weed_summary$field_count)] <- 0 + weed_summary$percent[is.na(weed_summary$percent)] <- 0 + + return(list(summary = weed_summary, field_results = field_results)) +} + +#' Calculate Gap Filling Score KPI (placeholder) +#' @param ci_raster Current week CI raster +#' @param field_boundaries Field boundaries +#' @return List with summary data frame and field-level results data frame +calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { + safe_log("Calculating Gap Filling Score KPI (placeholder)") + + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + field_results <- data.frame() + + for (i in seq_len(nrow(field_boundaries))) { + field_name <- field_boundaries$field[i] + sub_field_name <- field_boundaries$sub_field[i] + field_vect <- field_boundaries_vect[i] + + # Extract CI values using helper function + ci_values <- extract_ci_values(ci_raster, field_vect) + valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] + + if (length(valid_values) > 1) { + # Placeholder gap score using lowest 25% as indicator + q25_threshold <- quantile(valid_values, 0.25) + low_ci_pixels <- sum(valid_values < q25_threshold) + total_pixels <- length(valid_values) + gap_score <- (low_ci_pixels / total_pixels) * 100 + + # Classify gap severity + gap_level <- dplyr::case_when( + gap_score < 10 ~ "Minimal", + gap_score < 25 ~ "Moderate", + TRUE ~ "Significant" + ) + + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = gap_level, + gap_score = gap_score, + mean_ci = mean(valid_values), + q25_ci = q25_threshold + )) + } else { + # Not enough valid data, fill with NA row + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = NA_character_, + gap_score = NA_real_, + mean_ci = NA_real_, + q25_ci = NA_real_ + )) + } + } + + # Summarize results + gap_summary <- field_results %>% + dplyr::group_by(gap_level) %>% + dplyr::summarise(field_count = n(), .groups = 'drop') %>% + dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) + + return(list(summary = gap_summary, field_results = field_results)) +} + +# 3. KPI Export and Formatting Functions +# ------------------------------------- + +#' Create summary tables for report front page +#' @param kpi_results List containing all KPI results +#' @return List of formatted summary tables +create_summary_tables <- function(kpi_results) { + summary_tables <- list() + + # 1. Field Uniformity Summary Table + uniformity_summary <- kpi_results$field_uniformity_summary %>% + dplyr::rename(`Uniformity Level` = uniformity_level, Count = count, Percent = percent) + + summary_tables$field_uniformity_summary <- uniformity_summary + + # 2. Farm-wide Area Change Summary (already in correct format) + summary_tables$area_change_summary <- kpi_results$area_change %>% + dplyr::rename(`Change Type` = change_type, Hectares = hectares, Percent = percent) + + # 3. TCH Forecasted Summary (already in correct format) + summary_tables$tch_forecasted_summary <- kpi_results$tch_forecasted %>% + dplyr::rename(`Field Groups` = field_groups, Count = count, Value = value) + + # 4. Growth Decline Index Summary (already in correct format) + summary_tables$growth_decline_summary <- kpi_results$growth_decline %>% + dplyr::rename(`Risk Level` = risk_level, Count = count, Percent = percent) + + # 5. Weed Presence Score Summary (already in correct format) + summary_tables$weed_presence_summary <- kpi_results$weed_presence %>% + dplyr::rename(`Weed Risk Level` = weed_risk_level, `Field Count` = field_count, Percent = percent) + + # 6. Gap Filling Score Summary (already in correct format) + summary_tables$gap_filling_summary <- kpi_results$gap_filling %>% + dplyr::rename(`Gap Level` = gap_level, `Field Count` = field_count, Percent = percent) + + return(summary_tables) +} + +#' Create detailed field-by-field table for report end section +#' @param kpi_results List containing all KPI results +#' @param field_boundaries_sf Field boundaries (sf or SpatVector) +#' @return Data frame with field-by-field KPI details +create_field_detail_table <- function(kpi_results, field_boundaries_sf = NULL) { + + # Define risk levels for consistent use + risk_levels <- c("Low", "Moderate", "High", "Very-high") + weed_levels <- c("Low", "Moderate", "High") + + # Start with field uniformity as base (has all fields) + field_details <- kpi_results$field_uniformity %>% + dplyr::select(field, sub_field, field_id, uniformity_level, mean_ci, cv_value) %>% + dplyr::rename( + Field = field, + `Sub Field` = sub_field, + `Field ID` = field_id, + `Growth Uniformity` = uniformity_level, + `Mean CI` = mean_ci, + `CV Value` = cv_value + ) + + # Since subfield = field in this system, aggregate by field to avoid duplicates + # Take the first subfield for each field (they should be equivalent) + field_details <- field_details %>% + dplyr::group_by(Field) %>% + dplyr::slice(1) %>% # Take first row for each field + dplyr::ungroup() %>% + dplyr::select(-`Sub Field`, -`Field ID`) # Remove subfield columns since they're redundant + + # Add field size - calculate from actual geometry + if (!is.null(field_boundaries_sf)) { + # Convert to sf if it's SpatVector + if (inherits(field_boundaries_sf, "SpatVector")) { + field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf) + } + + # Calculate actual areas in hectares + field_areas <- field_boundaries_sf %>% + dplyr::mutate(area_ha = as.numeric(sf::st_area(geometry)) / 10000) %>% + sf::st_drop_geometry() %>% + dplyr::group_by(field) %>% + dplyr::summarise(area_ha = sum(area_ha), .groups = "drop") %>% + dplyr::rename(Field = field, `Field Size (ha)` = area_ha) %>% + dplyr::mutate(`Field Size (ha)` = round(`Field Size (ha)`, 1)) + + # Join with field_details + field_details <- field_details %>% + dplyr::left_join(field_areas, by = "Field") + } else { + # Fallback to placeholder if boundaries not provided + field_details$`Field Size (ha)` <- NA_real_ + } + + # Add yield prediction from TCH forecasted field results + # Only include predictions for fields that are mature (>= 240 days) + if (!is.null(kpi_results$tch_forecasted_field_results) && nrow(kpi_results$tch_forecasted_field_results) > 0) { + yield_data <- kpi_results$tch_forecasted_field_results %>% + dplyr::select(field, yield_forecast_t_ha) %>% + dplyr::rename(`Yield Forecast (t/ha)` = yield_forecast_t_ha) + field_details <- dplyr::left_join(field_details, yield_data, by = c("Field" = "field")) + # Keep NAs as NA for fields that are too young to predict + } else { + # No predictions available, set all to NA + field_details$`Yield Forecast (t/ha)` <- NA_real_ + } + + # Add gap presence score from gap filling field results (aggregate by field) + if (!is.null(kpi_results$gap_filling_field_results) && nrow(kpi_results$gap_filling_field_results) > 0) { + gap_data <- kpi_results$gap_filling_field_results %>% + dplyr::group_by(field) %>% + dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE)) %>% # Average across subfields + dplyr::rename(`Gap Score` = gap_score) + field_details <- dplyr::left_join(field_details, gap_data, by = c("Field" = "field")) + } else { + # Placeholder gap scores + field_details$`Gap Score` <- round(runif(nrow(field_details), 5, 25), 1) + } + + # Add growth decline risk from growth decline field results (aggregate by field) + if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) { + decline_data <- kpi_results$growth_decline_field_results %>% + dplyr::group_by(field) %>% + dplyr::summarise(risk_level = dplyr::first(risk_level)) %>% # Take first risk level (should be consistent) + dplyr::rename(`Decline Risk` = risk_level) + field_details <- dplyr::left_join(field_details, decline_data, by = c("Field" = "field")) + } else { + # Placeholder risk levels + field_details$`Decline Risk` <- sample(risk_levels, nrow(field_details), + prob = c(0.6, 0.25, 0.1, 0.05), replace = TRUE) + } + + # Add Moran's I spatial autocorrelation from growth decline field results (aggregate by field) + if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) { + moran_data <- kpi_results$growth_decline_field_results %>% + dplyr::group_by(field) %>% + dplyr::summarise(morans_i = mean(morans_i, na.rm = TRUE)) %>% # Average Moran's I across subfields + dplyr::rename(`Moran's I` = morans_i) + field_details <- dplyr::left_join(field_details, moran_data, by = c("Field" = "field")) + } else { + # Placeholder Moran's I values (typically range from -1 to 1) + set.seed(123) + field_details$`Moran's I` <- round(runif(nrow(field_details), -0.3, 0.8), 3) + } + + # Add weed risk from weed presence field results (aggregate by field) + if (!is.null(kpi_results$weed_presence_field_results) && nrow(kpi_results$weed_presence_field_results) > 0) { + weed_data <- kpi_results$weed_presence_field_results %>% + dplyr::group_by(field) %>% + dplyr::summarise(weed_risk_level = dplyr::first(weed_risk_level)) %>% # Take first weed risk (should be consistent) + dplyr::rename(`Weed Risk` = weed_risk_level) + field_details <- dplyr::left_join(field_details, weed_data, by = c("Field" = "field")) + } else { + # Placeholder weed levels + field_details$`Weed Risk` <- sample(weed_levels, nrow(field_details), + prob = c(0.7, 0.2, 0.1), replace = TRUE) + } + + # Fill any remaining NAs with defaults (but keep yield forecast as NA) + field_details$`Gap Score`[is.na(field_details$`Gap Score`)] <- 0.0 + field_details$`Decline Risk`[is.na(field_details$`Decline Risk`)] <- sample(risk_levels, sum(is.na(field_details$`Decline Risk`)), replace = TRUE, + prob = c(0.6, 0.25, 0.1, 0.05)) + field_details$`Weed Risk`[is.na(field_details$`Weed Risk`)] <- sample(weed_levels, sum(is.na(field_details$`Weed Risk`)), replace = TRUE, + prob = c(0.7, 0.2, 0.1)) + + # Reorder columns for better presentation + field_details <- field_details %>% + dplyr::select(`Field`, `Field Size (ha)`, `Growth Uniformity`, + `Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`, + `Moran's I`, `Mean CI`, `CV Value`) + + return(field_details) +} + +#' Create field-specific KPI text for individual field pages +#' @param field_id Field identifier (e.g., "A_1") +#' @param kpi_results List containing all KPI results +#' @return Character string with field-specific KPI summary +create_field_kpi_text <- function(field_id, kpi_results) { + + # Extract field-specific data from field uniformity + field_data <- kpi_results$field_uniformity %>% + dplyr::filter(field_id == !!field_id) + + if (nrow(field_data) == 0) { + return(paste("Field", field_id, ": Data not available")) + } + + # Get field metrics + uniformity <- field_data$uniformity_level[1] + mean_ci <- round(field_data$mean_ci[1], 2) + cv <- round(field_data$cv_value[1], 3) + + # Create summary text + kpi_text <- paste0( + "Field ", field_id, " KPIs: ", + "Uniformity: ", uniformity, " (CV=", cv, "), ", + "Mean CI: ", mean_ci, ", ", + "Status: ", ifelse(mean_ci > 3, "Good Growth", + ifelse(mean_ci > 1.5, "Moderate Growth", "Monitoring Required")) + ) + + return(kpi_text) +} + +#' Export all KPI data in multiple formats for R Markdown integration +#' @param kpi_results List containing all KPI results +#' @param output_dir Directory to save exported files +#' @param project_name Project name for file naming +#' @return List of file paths for exported data +export_kpi_data <- function(kpi_results, output_dir, project_name = "smartcane") { + + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE) + } + + exported_files <- list() + week_suffix <- paste0("week", sprintf("%02d_%d", kpi_results$metadata$current_week, kpi_results$metadata$year)) + date_suffix <- format(kpi_results$metadata$report_date, "%Y%m%d") + + # 1. Export summary tables for front page + summary_tables <- create_summary_tables(kpi_results) + summary_file <- file.path(output_dir, paste0(project_name, "_kpi_summary_tables_", week_suffix, ".rds")) + saveRDS(summary_tables, summary_file) + exported_files$summary_tables <- summary_file + + # 2. Export detailed field table for end section + # Note: field_boundaries_sf should be passed from calculate_all_kpis() + field_details <- create_field_detail_table(kpi_results, kpi_results$field_boundaries_sf) + detail_file <- file.path(output_dir, paste0(project_name, "_field_details_", week_suffix, ".rds")) + saveRDS(field_details, detail_file) + exported_files$field_details <- detail_file + + # 3. Export raw KPI results + raw_file <- file.path(output_dir, paste0(project_name, "_kpi_raw_", week_suffix, ".rds")) + saveRDS(kpi_results, raw_file) + exported_files$raw_kpi_data <- raw_file + + # 4. Export field-level KPI tables + field_tables_dir <- file.path(output_dir, "field_level") + if (!dir.exists(field_tables_dir)) { + dir.create(field_tables_dir, recursive = TRUE) + } + + # Export each field-level table + field_kpi_names <- c( + "field_uniformity" = "field_uniformity", + "area_change" = "area_change_field_results", + "tch_forecasted" = "tch_forecasted_field_results", + "growth_decline" = "growth_decline_field_results", + "weed_presence" = "weed_presence_field_results", + "gap_filling" = "gap_filling_field_results" + ) + + for (kpi_name in names(field_kpi_names)) { + field_data <- kpi_results[[field_kpi_names[kpi_name]]] + if (!is.null(field_data) && nrow(field_data) > 0) { + # RDS file + rds_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".rds")) + saveRDS(field_data, rds_file) + exported_files[[paste0(kpi_name, "_field_rds")]] <- rds_file + + # CSV file + csv_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".csv")) + readr::write_csv(field_data, csv_file) + exported_files[[paste0(kpi_name, "_field_csv")]] <- csv_file + } + } + + # 4. Export CSV versions for manual inspection + csv_dir <- file.path(output_dir, "csv") + if (!dir.exists(csv_dir)) { + dir.create(csv_dir, recursive = TRUE) + } + + # Export each summary table as CSV + for (table_name in names(summary_tables)) { + csv_file <- file.path(csv_dir, paste0(table_name, "_", week_suffix, ".csv")) + readr::write_csv(summary_tables[[table_name]], csv_file) + exported_files[[paste0(table_name, "_csv")]] <- csv_file + } + + # Export field details as CSV + field_csv <- file.path(csv_dir, paste0("field_details_", week_suffix, ".csv")) + readr::write_csv(field_details, field_csv) + exported_files$field_details_csv <- field_csv + + # 5. Create metadata file + metadata_file <- file.path(output_dir, paste0(project_name, "_kpi_metadata_", week_suffix, ".txt")) + + metadata_text <- paste0( + "SmartCane KPI Export Metadata\n", + "=============================\n", + "Project: ", project_name, "\n", + "Report Date: ", kpi_results$metadata$report_date, "\n", + "Current Week: ", kpi_results$metadata$current_week, "\n", + "Previous Week: ", kpi_results$metadata$previous_week, "\n", + "Year: ", kpi_results$metadata$year, "\n", + "Total Fields: ", kpi_results$metadata$total_fields, "\n", + "Calculation Time: ", kpi_results$metadata$calculation_time, "\n\n", + + "Exported Files:\n", + "- Summary Tables: ", basename(summary_file), "\n", + "- Field Details: ", basename(detail_file), "\n", + "- Raw KPI Data: ", basename(raw_file), "\n", + "- Field-Level Tables: field_level/ directory\n", + "- CSV Directory: csv/\n\n", + + "KPI Summary:\n", + "- Field Uniformity: ", nrow(summary_tables$field_uniformity_summary), " categories\n", + "- Area Change: ", nrow(summary_tables$area_change_summary), " change types\n", + "- TCH Forecasted: ", nrow(summary_tables$tch_forecasted_summary), " field groups\n", + "- Growth Decline: ", nrow(summary_tables$growth_decline_summary), " risk levels\n", + "- Weed Presence: ", nrow(summary_tables$weed_presence_summary), " risk levels\n", + "- Gap Filling: ", nrow(summary_tables$gap_filling_summary), " gap levels\n" + ) + + writeLines(metadata_text, metadata_file) + exported_files$metadata <- metadata_file + + safe_log(paste("KPI data exported to", output_dir)) + safe_log(paste("Total files exported:", length(exported_files))) + + return(exported_files) +} + +# 4. Main KPI Calculation Function +# ------------------------------- + +#' Calculate all KPIs for a given date +#' @param report_date Date to calculate KPIs for (default: today) +#' @param output_dir Directory to save KPI results +#' @param field_boundaries_sf Field boundaries (sf or SpatVector) +#' @param harvesting_data Harvesting data with tonnage_ha +#' @param cumulative_CI_vals_dir Directory with cumulative CI data +#' @param weekly_CI_mosaic Directory with weekly CI mosaics +#' @param reports_dir Directory for output reports +#' @param project_dir Project directory name +#' @return List containing all KPI results +calculate_all_kpis <- function(report_date = Sys.Date(), + output_dir = NULL, + field_boundaries_sf, + harvesting_data, + cumulative_CI_vals_dir, + weekly_CI_mosaic, + reports_dir, + project_dir) { + safe_log("=== STARTING KPI CALCULATION ===") + safe_log(paste("Report date:", report_date)) + + # Calculate week numbers + weeks <- calculate_week_numbers(report_date) + safe_log(paste("Current week:", weeks$current_week, "Previous week:", weeks$previous_week)) + + # Load weekly mosaics + current_ci <- load_weekly_ci_mosaic(weeks$current_week, weeks$year, weekly_CI_mosaic) + previous_ci <- load_weekly_ci_mosaic(weeks$previous_week, weeks$previous_year, weekly_CI_mosaic) + + if (is.null(current_ci)) { + stop("Current week CI mosaic is required but not found") + } + + # Check if field boundaries are loaded + if (is.null(field_boundaries_sf)) { + stop("Field boundaries not loaded. Check parameters_project.R initialization.") + } + + # Calculate all KPIs + kpi_results <- list() + + # 1. Field Uniformity Summary + uniformity_result <- calculate_field_uniformity_kpi(current_ci, field_boundaries_sf) + kpi_results$field_uniformity <- uniformity_result$field_results + kpi_results$field_uniformity_summary <- uniformity_result$summary + + # 2. Farm-wide Area Change Summary + area_change_result <- calculate_area_change_kpi(current_ci, previous_ci, field_boundaries_sf) + kpi_results$area_change <- area_change_result$summary + kpi_results$area_change_field_results <- area_change_result$field_results + + # 3. TCH Forecasted + tch_result <- calculate_tch_forecasted_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) + kpi_results$tch_forecasted <- tch_result$summary + kpi_results$tch_forecasted_field_results <- tch_result$field_results + + # 4. Growth Decline Index + growth_decline_result <- calculate_growth_decline_kpi(current_ci, previous_ci, field_boundaries_sf) + kpi_results$growth_decline <- growth_decline_result$summary + kpi_results$growth_decline_field_results <- growth_decline_result$field_results + + # 5. Weed Presence Score (with field age filtering) + weed_presence_result <- calculate_weed_presence_kpi(current_ci, previous_ci, field_boundaries_sf, + harvesting_data = harvesting_data, + cumulative_CI_vals_dir = cumulative_CI_vals_dir) + kpi_results$weed_presence <- weed_presence_result$summary + kpi_results$weed_presence_field_results <- weed_presence_result$field_results + + # 6. Gap Filling Score + gap_filling_result <- calculate_gap_filling_kpi(current_ci, field_boundaries_sf) + kpi_results$gap_filling <- gap_filling_result$summary + kpi_results$gap_filling_field_results <- gap_filling_result$field_results + + # Add metadata and field boundaries for later use + kpi_results$metadata <- list( + report_date = report_date, + current_week = weeks$current_week, + previous_week = weeks$previous_week, + year = weeks$year, + calculation_time = Sys.time(), + total_fields = nrow(field_boundaries_sf) + ) + + # Store field_boundaries_sf for use in export_kpi_data + kpi_results$field_boundaries_sf <- field_boundaries_sf + + # Save results if output directory specified + if (!is.null(output_dir)) { + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE) + } + + # Export KPI data in multiple formats for R Markdown integration + exported_files <- export_kpi_data(kpi_results, output_dir, project_dir) + kpi_results$exported_files <- exported_files + + # Also save raw results + week_suffix <- paste0("week", sprintf("%02d_%d", weeks$current_week, weeks$year)) + output_file <- file.path(output_dir, paste0("kpi_results_", week_suffix, ".rds")) + saveRDS(kpi_results, output_file) + safe_log(paste("KPI results saved to:", output_file)) + } + + safe_log("=== KPI CALCULATION COMPLETED ===") + return(kpi_results) +} diff --git a/r_app/80_weekly_stats_utils.R b/r_app/80_weekly_stats_utils.R index b989292..baaf1b1 100644 --- a/r_app/80_weekly_stats_utils.R +++ b/r_app/80_weekly_stats_utils.R @@ -13,6 +13,41 @@ # Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts # ============================================================================ +# ============================================================================ +# WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts) +# ============================================================================ + +#' Calculate week and year for a given lookback offset +#' This function handles ISO 8601 week numbering with proper year wrapping +#' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025) +#' +#' @param current_week ISO week number (1-53) +#' @param current_year ISO week year (from format(..., "%G")) +#' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.) +#' +#' @return List with: week (ISO week number), year (ISO week year) +#' +#' @details +#' This is the authoritative week/year calculation function. +#' Used by: +#' - load_historical_field_data() - to find RDS/CSV files for 4-week lookback +#' - Script 80 main - to calculate previous week with year wrapping +#' - Any other script needing to walk backwards through weeks +#' +#' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025) +calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) { + target_week <- current_week - offset_weeks + target_year <- current_year + + # Handle wrapping: when going back from week 1, wrap to week 52 of previous year + while (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } + + return(list(week = target_week, year = target_year)) +} + # ============================================================================ # TILE-AWARE HELPER FUNCTIONS # ============================================================================ @@ -720,16 +755,19 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo return(stats_df) } -load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { +load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { historical_data <- list() loaded_weeks <- c() missing_weeks <- c() for (lookback in 0:(num_weeks - 1)) { - target_week <- current_week - lookback - if (target_week < 1) target_week <- target_week + 52 + # Calculate target week and year using authoritative helper (handles year boundaries) + target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) + target_week <- target$week + target_year <- target$year - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv") + # Construct filename with BOTH week and year (proper ISO format) + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv") csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) if (file.exists(csv_path)) { @@ -737,15 +775,16 @@ load_historical_field_data <- function(project_dir, current_week, reports_dir, n data <- read_csv(csv_path, show_col_types = FALSE) historical_data[[lookback + 1]] <- list( week = target_week, + year = target_year, data = data ) - loaded_weeks <- c(loaded_weeks, target_week) + loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) }, error = function(e) { - message(paste(" Warning: Could not load week", target_week, ":", e$message)) - missing_weeks <<- c(missing_weeks, target_week) + message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message)) + missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) }) } else { - missing_weeks <- c(missing_weeks, target_week) + missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) } } @@ -788,7 +827,7 @@ load_historical_field_data <- function(project_dir, current_week, reports_dir, n expected_weeks <- data.frame( date = target_dates, week = as.numeric(format(target_dates, "%V")), - year = as.numeric(format(target_dates, "%Y")), + year = as.numeric(format(target_dates, "%G")), stringsAsFactors = FALSE ) expected_weeks <- unique(expected_weeks) diff --git a/r_app/90_CI_report_with_kpis_simple.Rmd b/r_app/90_CI_report_with_kpis_simple.Rmd index 710d794..2353bab 100644 --- a/r_app/90_CI_report_with_kpis_simple.Rmd +++ b/r_app/90_CI_report_with_kpis_simple.Rmd @@ -120,7 +120,8 @@ date_suffix <- format(as.Date(report_date), "%Y%m%d") # Calculate current week from report_date using ISO 8601 week numbering current_week <- as.numeric(format(as.Date(report_date), "%V")) -week_suffix <- paste0("week", current_week) +current_year <- as.numeric(format(as.Date(report_date), "%G")) +week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year) # Candidate filenames we expect (exact and common variants) expected_summary_names <- c( diff --git a/r_app/91_CI_report_with_kpis_Angata.Rmd b/r_app/91_CI_report_with_kpis_Angata.Rmd index f50bb23..fd0875b 100644 --- a/r_app/91_CI_report_with_kpis_Angata.Rmd +++ b/r_app/91_CI_report_with_kpis_Angata.Rmd @@ -112,15 +112,22 @@ safe_log(paste("report_date params:", params$report_date)) safe_log(paste("mail_day variable:", mail_day)) ``` -```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE} +```{r load_kpi_data, message=FALSE, warning=FALSE} ## SIMPLE KPI LOADING - robust lookup with fallbacks + +# First, show working directory for debugging +cat("\n=== DEBUG: R Markdown Working Directory ===\n") +cat(paste("getwd():", getwd(), "\n")) +cat(paste("Expected knit_dir from R Markdown:", knitr::opts_knit$get("root.dir"), "\n\n")) + # Primary expected directory inside the laravel storage kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis") date_suffix <- format(as.Date(report_date), "%Y%m%d") # Calculate current week from report_date using ISO 8601 week numbering current_week <- as.numeric(format(as.Date(report_date), "%V")) -week_suffix <- paste0("week", current_week) +current_year <- as.numeric(format(as.Date(report_date), "%G")) +week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year) # Candidate filenames we expect (exact and common variants) expected_summary_names <- c( @@ -171,30 +178,69 @@ if (is.null(summary_file) || is.null(field_details_file)) { # Final checks and load with safe error messages kpi_files_exist <- FALSE + +# Debug: log what we're looking for +cat("\n=== KPI LOADING DEBUG ===\n") +cat(paste("Working directory:", getwd(), "\n")) +cat(paste("project_dir:", project_dir, "\n")) +cat(paste("report_date:", report_date, "\n")) +cat(paste("Calculated week:", current_week, "year:", current_year, "\n")) +cat(paste("Looking for KPI files in:", kpi_data_dir, "\n")) +cat(paste("Directory exists:", dir.exists(kpi_data_dir), "\n")) +cat(paste("Expected filenames to match:\n")) +for (name in expected_summary_names) cat(paste(" -", name, "\n")) + +# List what's actually in the directory +if (dir.exists(kpi_data_dir)) { + actual_files <- list.files(kpi_data_dir, pattern = ".*\\.rds$", full.names = FALSE) + cat(paste("Files in KPI directory (", length(actual_files), " total):\n")) + for (f in actual_files) cat(paste(" -", f, "\n")) +} else { + cat("KPI directory does NOT exist!\n") +} + if (!is.null(summary_file) && file.exists(summary_file)) { - safe_log(paste("Loading KPI summary from:", summary_file)) - summary_data <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL }) + cat(paste("✓ FOUND summary file:", summary_file, "\n")) + cat(paste(" File size:", file.size(summary_file), "bytes\n")) + summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL }) - # Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format if (!is.null(summary_data)) { + cat(paste(" ✓ Loaded successfully. Class:", class(summary_data), "\n")) + if (is.list(summary_data)) { + cat(paste(" List names:", paste(names(summary_data), collapse = ", "), "\n")) + } + + # Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format if (is.list(summary_data) && !is.data.frame(summary_data)) { # New format from 09_field_analysis_weekly.R - just pass it through if ("field_analysis_summary" %in% names(summary_data)) { + cat(" ✓ Found field_analysis_summary in list - will use this structure\n") # Keep the new structure intact - combined_kpi_table will use it directly kpi_files_exist <- TRUE } else { + cat(" ! Old format detected\n") # Old format - keep as is summary_tables <- summary_data if (!is.null(summary_tables)) kpi_files_exist <- TRUE } } else { + cat(" ! Data frame format\n") # Data frame format or direct tables summary_tables <- summary_data if (!is.null(summary_tables)) kpi_files_exist <- TRUE } + } else { + cat(" ✗ Failed to load RDS - summary_data is NULL\n") } } else { safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING") + safe_log(paste("Attempted directory:", kpi_data_dir), "WARNING") + # Try searching the entire workspace as fallback + files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "kpi.*\\.rds$", recursive = TRUE, full.names = TRUE) + safe_log(paste("Found", length(files), "KPI RDS files in workspace"), "INFO") + if (length(files) > 0) { + safe_log(paste("Available files:", paste(basename(files), collapse = ", ")), "INFO") + } } if (!is.null(field_details_file) && file.exists(field_details_file)) { @@ -508,125 +554,185 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na ## Executive Summary - Key Performance Indicators -```{r combined_kpi_table, echo=FALSE} +```{r combined_kpi_table, echo=TRUE} +# Debug: check what variables exist +cat("\n=== DEBUG: combined_kpi_table chunk ===\n") +cat(paste("exists('summary_data'):", exists("summary_data"), "\n")) +cat(paste("exists('kpi_files_exist'):", exists("kpi_files_exist"), "\n")) +if (exists("kpi_files_exist")) { + cat(paste("kpi_files_exist value:", kpi_files_exist, "\n")) +} +if (exists("summary_data")) { + cat(paste("summary_data class:", class(summary_data), "\n")) + if (is.list(summary_data)) { + cat(paste("summary_data names:", paste(names(summary_data), collapse = ", "), "\n")) + cat(paste("has field_analysis_summary:", "field_analysis_summary" %in% names(summary_data), "\n")) + } +} else { + cat("summary_data DOES NOT EXIST in this chunk's environment!\n") +} +cat("\n") + # Create summary KPI table from field_analysis_summary data # This shows: Phases, Triggers, Area Change, and Total Farm acreage -if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary" %in% names(summary_data)) { - field_analysis_summary <- summary_data$field_analysis_summary +if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { + # Load field analysis data field_analysis_df <- summary_data$field_analysis + # If field_analysis_summary is NULL or doesn't exist, create it from field_analysis_df + if (is.null(summary_data$field_analysis_summary) || !("field_analysis_summary" %in% names(summary_data)) || + !is.data.frame(summary_data$field_analysis_summary)) { + cat("\nNote: field_analysis_summary not in RDS, creating from field_analysis...\n") + + # Create summary by aggregating by Status_Alert and Phase categories + # This groups fields by their phase and status to show distribution + phase_summary <- field_analysis_df %>% + filter(!is.na(Phase)) %>% + group_by(Phase) %>% + summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>% + mutate(Category = Phase) %>% + select(Category, Acreage) + + # Try to create Status trigger summary - use Status_Alert if available, otherwise use empty + trigger_summary <- tryCatch({ + field_analysis_df %>% + filter(!is.na(Status_Alert), Status_Alert != "") %>% + group_by(Status_Alert) %>% + summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>% + mutate(Category = Status_Alert) %>% + select(Category, Acreage) + }, error = function(e) { + cat("Could not create trigger summary:", e$message, "\n") + data.frame(Category = character(), Acreage = numeric()) + }) + + # Combine into summary + field_analysis_summary <- bind_rows(phase_summary, trigger_summary) + + cat(paste("Created summary with", nrow(field_analysis_summary), "category rows\n")) + + } else { + # Use existing summary from RDS + field_analysis_summary <- summary_data$field_analysis_summary + } + # Phase names and trigger names to extract from summary phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase") trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected", - "Germination Complete", "Germination Started", "No Active Trigger") + "Germination Complete", "Germination Started", "No Active Trigger", + "Ready for harvest-check", "Strong decline in crop health", "Harvested/bare") # Extract phase distribution - match on category names directly - phase_rows <- field_analysis_summary %>% - filter(Category %in% phase_names) %>% - select(Category, Acreage) %>% - mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1) + if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) { + phase_rows <- field_analysis_summary %>% + filter(Category %in% phase_names) %>% + select(Category, Acreage) %>% + mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1) - # Extract status triggers - match on category names directly - trigger_rows <- field_analysis_summary %>% - filter(Category %in% trigger_names) %>% - select(Category, Acreage) %>% - mutate(KPI_Group = "STATUS TRIGGERS", .before = 1) + # Extract status triggers - match on category names directly + trigger_rows <- field_analysis_summary %>% + filter(Category %in% trigger_names) %>% + select(Category, Acreage) %>% + mutate(KPI_Group = "STATUS TRIGGERS", .before = 1) - # Calculate area change from field_analysis data - total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE) - - # Parse Weekly_ci_change to determine improvement/decline - parse_ci_change <- function(change_str) { - if (is.na(change_str)) return(NA) - match <- regexpr("^[+-]?[0-9]+\\.?[0-9]*", change_str) - if (match > 0) { - return(as.numeric(substr(change_str, match, attr(match, "match.length")))) + # Calculate area change from field_analysis data + total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE) + + # Parse Weekly_ci_change to determine improvement/decline + parse_ci_change <- function(change_str) { + if (is.na(change_str)) return(NA) + match <- regexpr("^[+-]?[0-9]+\\.?[0-9]*", change_str) + if (match > 0) { + return(as.numeric(substr(change_str, match, attr(match, "match.length")))) + } + return(NA) } - return(NA) + + field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change) + + improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE) + declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE) + stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 & + field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE) + + improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0) + declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0) + stable_pct <- ifelse(total_acreage > 0, round(stable_acreage / total_acreage * 100, 1), 0) + + # Calculate percentages for phases and triggers + phase_pcts <- phase_rows %>% + mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) + + trigger_pcts <- trigger_rows %>% + mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) + + area_change_rows <- data.frame( + KPI_Group = "AREA CHANGE", + Category = c("Improving", "Stable", "Declining"), + Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)), + Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")), + stringsAsFactors = FALSE + ) + + # Total farm row + total_row <- data.frame( + KPI_Group = "TOTAL FARM", + Category = "Total Acreage", + Acreage = round(total_acreage, 2), + Percent = "100%", + stringsAsFactors = FALSE + ) + + # Combine all rows with percentages for all + combined_df <- bind_rows( + phase_pcts, + trigger_pcts, + area_change_rows, + total_row + ) + + # Create grouped display where KPI_Group name appears only once per group + combined_df <- combined_df %>% + group_by(KPI_Group) %>% + mutate( + KPI_display = if_else(row_number() == 1, KPI_Group, "") + ) %>% + ungroup() %>% + select(KPI_display, Category, Acreage, Percent) + + # Render as flextable with merged cells + ft <- flextable(combined_df) %>% + set_header_labels( + KPI_display = "KPI Category", + Category = "Item", + Acreage = "Acreage", + Percent = "Percent" + ) %>% + merge_v(j = "KPI_display") %>% + autofit() + + # Add horizontal lines after each KPI group (at cumulative row positions) + # Calculate row positions: row 1 is header, then data rows follow + phase_count <- nrow(phase_rows) + trigger_count <- nrow(trigger_rows) + area_count <- nrow(area_change_rows) + + # Add lines after phases, triggers, and area change groups (before totals) + if (phase_count > 0) { + ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1)) + } + if (trigger_count > 0) { + ft <- ft %>% hline(i = phase_count + trigger_count, border = officer::fp_border(width = 1)) + } + if (area_count > 0) { + ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1)) + } + + ft + } else { + cat("KPI summary data available but is empty/invalid.\n") } - - field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change) - - improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE) - declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE) - stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 & - field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE) - - improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0) - declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0) - stable_pct <- ifelse(total_acreage > 0, round(stable_acreage / total_acreage * 100, 1), 0) - - # Calculate percentages for phases and triggers - phase_pcts <- phase_rows %>% - mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) - - trigger_pcts <- trigger_rows %>% - mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%")) - - area_change_rows <- data.frame( - KPI_Group = "AREA CHANGE", - Category = c("Improving", "Stable", "Declining"), - Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)), - Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")), - stringsAsFactors = FALSE - ) - - # Total farm row - total_row <- data.frame( - KPI_Group = "TOTAL FARM", - Category = "Total Acreage", - Acreage = round(total_acreage, 2), - Percent = "100%", - stringsAsFactors = FALSE - ) - - # Combine all rows with percentages for all - combined_df <- bind_rows( - phase_pcts, - trigger_pcts, - area_change_rows, - total_row - ) - - # Create grouped display where KPI_Group name appears only once per group - combined_df <- combined_df %>% - group_by(KPI_Group) %>% - mutate( - KPI_display = if_else(row_number() == 1, KPI_Group, "") - ) %>% - ungroup() %>% - select(KPI_display, Category, Acreage, Percent) - - # Render as flextable with merged cells - ft <- flextable(combined_df) %>% - set_header_labels( - KPI_display = "KPI Category", - Category = "Item", - Acreage = "Acreage", - Percent = "Percent" - ) %>% - merge_v(j = "KPI_display") %>% - autofit() - - # Add horizontal lines after each KPI group (at cumulative row positions) - # Calculate row positions: row 1 is header, then data rows follow - phase_count <- nrow(phase_rows) - trigger_count <- nrow(trigger_rows) - area_count <- nrow(area_change_rows) - - # Add lines after phases, triggers, and area change groups (before totals) - if (phase_count > 0) { - ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1)) - } - if (trigger_count > 0) { - ft <- ft %>% hline(i = phase_count + trigger_count, border = officer::fp_border(width = 1)) - } - if (area_count > 0) { - ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1)) - } - - ft } else { cat("KPI summary data not available.\n") } @@ -679,11 +785,11 @@ if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) { if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { field_analysis_table <- summary_data$field_analysis - # Extract fields with status triggers (non-null) + # Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger) alerts_data <- field_analysis_table %>% - filter(!is.na(Status_trigger), Status_trigger != "") %>% - select(Field_id, Status_trigger) %>% - rename(Field = Field_id, Alert = Status_trigger) + filter(!is.na(Status_Alert), Status_Alert != "") %>% + select(Field_id, Status_Alert) %>% + rename(Field = Field_id, Alert = Status_Alert) if (nrow(alerts_data) > 0) { # Format alert messages for display @@ -717,21 +823,50 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na # The report renders KPI tables and field summaries from that data ``` -```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE} -# Load field boundaries from parameters +```{r load_field_boundaries, message=FALSE, warning=FALSE, include=FALSE} +# Load field boundaries from parameters (with fallback if geometry is invalid) +field_boundaries_loaded <- FALSE + tryCatch({ - AllPivots0 <- field_boundaries_sf %>% - dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names - safe_log("Successfully loaded field boundaries") - - # Prepare merged field list for use in summaries - AllPivots_merged <- AllPivots0 %>% - dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names - dplyr::group_by(field) %>% - dplyr::summarise(.groups = 'drop') - + # Try to load and validate the field boundaries + if (exists("field_boundaries_sf") && !is.null(field_boundaries_sf)) { + # Try to filter - this will trigger geometry validation + AllPivots0 <- field_boundaries_sf %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) + + # If successful, also create merged field list + AllPivots_merged <- AllPivots0 %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) %>% + dplyr::group_by(field) %>% + dplyr::summarise(.groups = 'drop') + + field_boundaries_loaded <- TRUE + safe_log("✓ Successfully loaded field boundaries") + } else { + safe_log("⚠ field_boundaries_sf not found in environment") + } }, error = function(e) { - stop("Error loading field boundaries: ", e$message) + # If geometry is invalid, try to fix or skip + safe_log(paste("⚠ Error loading field boundaries:", e$message), "WARNING") + safe_log("Attempting to fix invalid geometries using st_make_valid()...", "WARNING") + + tryCatch({ + # Try to repair invalid geometries + field_boundaries_sf_fixed <<- sf::st_make_valid(field_boundaries_sf) + AllPivots0 <<- field_boundaries_sf_fixed %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) + + AllPivots_merged <<- AllPivots0 %>% + dplyr::filter(!is.na(field), !is.na(sub_field)) %>% + dplyr::group_by(field) %>% + dplyr::summarise(.groups = 'drop') + + field_boundaries_loaded <<- TRUE + safe_log("✓ Fixed invalid geometries and loaded field boundaries") + }, error = function(e2) { + safe_log(paste("⚠ Could not repair geometries:", e2$message), "WARNING") + safe_log("Continuing without field boundary data", "WARNING") + }) }) ``` \newpage diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index d366f41..07a5565 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -43,6 +43,75 @@ get_client_type <- function(project_name) { return(client_type) } +# 2b. Client-specific KPI configurations +# ---------------------------------------- +# Defines which KPIs and outputs are required for each client type +# This enables Script 80 to conditionally calculate only relevant metrics +# +# Structure: +# - kpi_calculations: Vector of KPI types to calculate for this client +# - outputs: Vector of output formats to generate (determines RDS/Excel naming) +# - requires_harvest_data: Boolean - whether Script 31 harvest predictions are needed +# - script_90_compatible: Boolean - whether output should match Script 90 expectations +# - script_91_compatible: Boolean - whether output should match Script 91 expectations +# +CLIENT_TYPE_CONFIGS <- list( + + # Aura (agronomic_support): Farm-level KPI summaries for weekly reports to agronomists + "agronomic_support" = list( + client_type = "agronomic_support", + description = "Farm-level KPI summaries for agronomic decision support", + kpi_calculations = c( + "field_uniformity", + "area_change", + "tch_forecasted", + "growth_decline", + "weed_presence", + "gap_filling" + ), + outputs = c( + "kpi_summary_tables", # Summary statistics for Script 90 report front page + "field_details" # Detailed field table for Script 90 report end section + ), + requires_harvest_data = FALSE, # Script 31 predictions not used + script_90_compatible = TRUE, # Output format matches Script 90 expectations + script_91_compatible = FALSE + ), + + # Cane Supply (cane_supply): Per-field analysis with harvest timing prediction + "cane_supply" = list( + client_type = "cane_supply", + description = "Per-field analysis with harvest prediction and phase assignment", + kpi_calculations = c( + "per_field_analysis", # Use 80_weekly_stats_utils.R for field-level statistics + "phase_assignment", # Assign growth phases (Germination, Tillering, Grand Growth, Maturation) + "harvest_prediction", # Include Script 31 harvest age predictions if available + "status_triggers" # Calculate field status (Normal, Monitor, Alert, Urgent) + ), + outputs = c( + "field_analysis_excel", # Excel file with per-field metrics + "field_analysis_summary" # Summary RDS for Script 91 report + ), + requires_harvest_data = TRUE, # harvest.xlsx is required for phase assignment + script_90_compatible = FALSE, + script_91_compatible = TRUE + ) +) + +#' Get KPI configuration for a specific client type +#' @param client_type Character string of client type (e.g., "agronomic_support", "cane_supply") +#' @return List containing configuration for that client type +get_client_kpi_config <- function(client_type) { + config <- CLIENT_TYPE_CONFIGS[[client_type]] + + if (is.null(config)) { + warning(sprintf("Client type '%s' not in CLIENT_TYPE_CONFIGS - defaulting to 'cane_supply'", client_type)) + return(CLIENT_TYPE_CONFIGS[["cane_supply"]]) + } + + return(config) +} + # 3. Smart detection for tile-based vs single-file mosaic approach # ---------------------------------------------------------------- detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { diff --git a/r_app/run_full_pipeline.R b/r_app/run_full_pipeline.R index 50bc56c..41de090 100644 --- a/r_app/run_full_pipeline.R +++ b/r_app/run_full_pipeline.R @@ -30,9 +30,8 @@ # ============================================================================== # *** EDIT THESE VARIABLES *** -end_date <- as.Date("2025-12-31") # or specify: as.Date("2026-01-27") , Sys.Date() -offset <- 7 # days to look back -project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba" +end_date <- as.Date("2026-01-07") # or specify: as.Date("2026-01-27") , Sys.Date() +project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba" data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif" force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist # *************************** @@ -42,12 +41,233 @@ source("r_app/parameters_project.R") client_type <- get_client_type(project_dir) cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type)) +# ============================================================================== +# DETECT WHICH DATA SOURCE IS AVAILABLE (merged_tif vs merged_tif_8b) +# ============================================================================== +# Check which merged_tif folder actually has files for this project +laravel_storage_dir <- file.path("laravel_app", "storage", "app", project_dir) +merged_tif_path <- file.path(laravel_storage_dir, "merged_tif") +merged_tif_8b_path <- file.path(laravel_storage_dir, "merged_tif_8b") + +data_source_used <- "merged_tif_8b" # Default +if (dir.exists(merged_tif_path)) { + tif_files <- list.files(merged_tif_path, pattern = "\\.tif$") + if (length(tif_files) > 0) { + data_source_used <- "merged_tif" + cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files))) + } else if (dir.exists(merged_tif_8b_path)) { + tif_files_8b <- list.files(merged_tif_8b_path, pattern = "\\.tif$") + if (length(tif_files_8b) > 0) { + data_source_used <- "merged_tif_8b" + cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files_8b))) + } + } +} else if (dir.exists(merged_tif_8b_path)) { + tif_files_8b <- list.files(merged_tif_8b_path, pattern = "\\.tif$") + if (length(tif_files_8b) > 0) { + data_source_used <- "merged_tif_8b" + cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files_8b))) + } +} + +# ============================================================================== +# DETERMINE REPORTING WINDOW (auto-calculated based on KPI requirements) +# ============================================================================== +# Script 80 (KPIs) needs N weeks of historical data for trend analysis and reporting +# We calculate this automatically based on client type +reporting_weeks_needed <- 4 # Default: KPIs need current week + 3 weeks history for trends +offset <- (reporting_weeks_needed - 1) * 7 # Convert weeks to days + +cat(sprintf("\n[INFO] Reporting window: %d weeks (%d days of data)\n", reporting_weeks_needed, offset)) +cat(sprintf(" Running week: %02d / %d\n", as.numeric(format(end_date, "%V")), as.numeric(format(end_date, "%Y")))) +cat(sprintf(" Date range: %s to %s\n", format(end_date - offset, "%Y-%m-%d"), format(end_date, "%Y-%m-%d"))) + # Format dates end_date_str <- format(as.Date(end_date), "%Y-%m-%d") # Track success of pipeline pipeline_success <- TRUE +# ============================================================================== +# EARLY PREREQ CHECK: Verify mosaic requirements BEFORE any downloads +# ============================================================================== +# This determines if we need more weeks of data than the initial reporting window +# Run this BEFORE downloads so we can download ONLY missing dates upfront +cat("\n========== EARLY CHECK: MOSAIC REQUIREMENTS FOR REPORTING WINDOW ==========\n") + +# Detect mosaic mode early (before full checking section) +detect_mosaic_mode_early <- function(project_dir) { + weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") + if (dir.exists(weekly_tile_max)) { + subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + if (length(grid_patterns) > 0) { + return("tiled") + } + } + + weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(weekly_mosaic)) { + files <- list.files(weekly_mosaic, pattern = "^week_.*\\.tif$") + if (length(files) > 0) { + return("single-file") + } + } + + return("unknown") +} + +mosaic_mode <- detect_mosaic_mode_early(project_dir) + +# Check what mosaics we NEED +weeks_needed <- data.frame() +for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + week_num <- as.numeric(format(check_date, "%V")) + year_num <- as.numeric(format(check_date, "%G")) # %G = ISO week year (not calendar year %Y) + weeks_needed <- rbind(weeks_needed, data.frame(week = week_num, year = year_num, date = check_date)) +} + +missing_weeks_dates <- c() # Will store the earliest date of missing weeks +earliest_missing_date <- end_date # Start with end_date, go back if needed +missing_weeks <- data.frame() # Track ALL missing weeks for later processing by Script 40 + +for (i in 1:nrow(weeks_needed)) { + week_num <- weeks_needed[i, "week"] + year_num <- weeks_needed[i, "year"] + check_date <- weeks_needed[i, "date"] + + # Pattern must be flexible to match both: + # - Single-file: week_51_2025.tif + # - Tiled: week_51_2025_01.tif, week_51_2025_02.tif, etc. + week_pattern_check <- sprintf("week_%02d_%d", week_num, year_num) + files_this_week <- c() + + if (mosaic_mode == "tiled") { + mosaic_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") + if (dir.exists(mosaic_dir_check)) { + files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check) + } + } else if (mosaic_mode == "single-file") { + mosaic_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(mosaic_dir_check)) { + files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check) + } + } + + cat(sprintf(" Week %02d/%d (%s): %s\n", week_num, year_num, format(check_date, "%Y-%m-%d"), + if(length(files_this_week) > 0) "✓ EXISTS" else "✗ MISSING")) + + # If week is missing, track its date range for downloading/processing + if (length(files_this_week) == 0) { + week_start <- check_date - 6 # Monday of that week + if (week_start < earliest_missing_date) { + earliest_missing_date <- week_start + } + # Add to missing_weeks dataframe - Script 40 will process these + missing_weeks <- rbind(missing_weeks, data.frame(week = week_num, year = year_num, week_end_date = check_date)) + } +} + +# Calculate dynamic offset for preprocessing: only process from earliest missing week to end_date +if (earliest_missing_date < end_date) { + cat(sprintf("\n[INFO] Missing week(s) detected - need to fill from %s onwards\n", format(earliest_missing_date, "%Y-%m-%d"))) + + # Adjust offset to cover only the gap (from earliest missing week to end_date) + dynamic_offset <- as.numeric(end_date - earliest_missing_date) + cat(sprintf("[INFO] Will download/process ONLY missing dates: %d days (from %s to %s)\n", + dynamic_offset, format(earliest_missing_date, "%Y-%m-%d"), format(end_date, "%Y-%m-%d"))) + + # Use dynamic offset for data generation scripts (10, 20, 30, 40) + # But Script 80 still uses full reporting_weeks_needed offset for KPI calculations + data_generation_offset <- dynamic_offset + force_data_generation <- TRUE +} else { + cat("\n[INFO] ✓ All required mosaics exist - using normal reporting window\n") + data_generation_offset <- offset # Use default reporting window offset + force_data_generation <- FALSE +} + +# ============================================================================== +# CHECK KPI REQUIREMENTS FOR REPORTING WINDOW +# ============================================================================== +# Scripts 90 (Word report) and 91 (Excel report) require KPIs for full reporting window +# Script 80 ALWAYS runs and will CALCULATE missing KPIs, so this is just for visibility +cat("\n========== KPI REQUIREMENT CHECK ==========\n") +cat(sprintf("KPIs needed for reporting: %d weeks (current week + %d weeks history)\n", + reporting_weeks_needed, reporting_weeks_needed - 1)) + +# Determine KPI directory based on client type +# - agronomic_support: field_level/ (6 farm-level KPIs) +# - cane_supply: field_analysis/ (per-field analysis) +kpi_subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis" +kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir) + +# Create KPI directory if it doesn't exist +if (!dir.exists(kpi_dir)) { + dir.create(kpi_dir, recursive = TRUE, showWarnings = FALSE) + cat(sprintf("[KPI_DIR_CREATED] Created directory: %s\n", kpi_dir)) +} + +kpis_needed <- data.frame() +kpis_missing_count <- 0 + +# Debug: Check if KPI directory exists +if (dir.exists(kpi_dir)) { + cat(sprintf("[KPI_DIR_EXISTS] %s\n", kpi_dir)) + all_kpi_files <- list.files(kpi_dir) + cat(sprintf("[KPI_DEBUG] Total files in directory: %d\n", length(all_kpi_files))) + if (length(all_kpi_files) > 0) { + cat(sprintf("[KPI_DEBUG] Sample files: %s\n", paste(head(all_kpi_files, 3), collapse = ", "))) + } +} else { + cat(sprintf("[KPI_DIR_MISSING] Directory does not exist: %s\n", kpi_dir)) +} + +for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + week_num <- as.numeric(format(check_date, "%V")) + year_num <- as.numeric(format(check_date, "%G")) + + # Check for any KPI file from that week - use more flexible pattern matching + week_pattern <- sprintf("week%02d_%d", week_num, year_num) + kpi_files_this_week <- c() + if (dir.exists(kpi_dir)) { + # List all files and manually check for pattern match + all_files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") + kpi_files_this_week <- all_files[grepl(week_pattern, all_files, fixed = TRUE)] + + # Debug output for first week + if (weeks_back == 0) { + cat(sprintf("[KPI_DEBUG_W%02d_%d] Pattern: '%s' | Found: %d files\n", + week_num, year_num, week_pattern, length(kpi_files_this_week))) + if (length(kpi_files_this_week) > 0) { + cat(sprintf("[KPI_DEBUG_W%02d_%d] Files: %s\n", + week_num, year_num, paste(kpi_files_this_week, collapse = ", "))) + } + } + } + + has_kpis <- length(kpi_files_this_week) > 0 + kpis_needed <- rbind(kpis_needed, data.frame( + week = week_num, + year = year_num, + date = check_date, + has_kpis = has_kpis + )) + + if (!has_kpis) { + kpis_missing_count <- kpis_missing_count + 1 + } + + cat(sprintf(" Week %02d/%d (%s): %s\n", + week_num, year_num, format(check_date, "%Y-%m-%d"), + if(has_kpis) "✓ EXISTS" else "✗ WILL BE CALCULATED")) +} + +cat(sprintf("\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by Script 80\n", + nrow(kpis_needed) - kpis_missing_count, nrow(kpis_needed), kpis_missing_count)) + # Define conditional script execution based on client type # Client types: # - "cane_supply": Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output) @@ -137,31 +357,14 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files))) # For now, just note that CSV is time-dependent, not a good skip indicator cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n") -# Check Script 40 outputs (mosaics) - check for THIS WEEK's mosaic specifically -# (important for Script 80, which needs the current week's mosaic) -current_week <- as.numeric(format(end_date, "%V")) -current_year <- as.numeric(format(end_date, "%Y")) -week_mosaic_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) +# Check Script 40 outputs (mosaics) - check which weeks are missing (not just current week) +# The early check section already identified missing_weeks, so we use that +skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Only skip if NO missing weeks AND not forcing rerun +cat(sprintf("Script 40: %d missing week(s) to create\n", nrow(missing_weeks))) -mosaic_files <- c() -if (mosaic_mode == "tiled") { - # For tile-based: look in weekly_tile_max/{grid_size}/ for this week's file - weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") - subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - if (length(grid_patterns) > 0) { - mosaic_dir <- file.path(weekly_tile_max, grid_patterns[1]) - mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern) - } -} else if (mosaic_mode == "single-file") { - # For single-file: look in weekly_mosaic/ for this week's file - mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") - mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern) -} -cat(sprintf("Script 40: %d mosaic files exist for week %02d\n", length(mosaic_files), current_week)) - -# Check Script 80 outputs (KPIs in reports/kpis/field_stats) -kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") +# Check Script 80 outputs (KPIs in reports/kpis/{field_level|field_analysis}) +# Use the same kpi_subdir logic to find the right directory +kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir) kpi_files <- if (dir.exists(kpi_dir)) { list.files(kpi_dir, pattern = "\\.csv$|\\.json$") } else { @@ -170,15 +373,15 @@ kpi_files <- if (dir.exists(kpi_dir)) { cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files))) # Determine if scripts should run based on outputs AND client type -skip_10 <- (length(tiles_dates) > 0 && !force_rerun) # Always check tiles +skip_10 <- (length(tiles_dates) > 0 && !force_rerun && !force_data_generation) # Force Script 10 if missing weeks detected skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion) skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients skip_30 <- FALSE # Script 30 ALWAYS runs for all client types skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients -skip_40 <- (length(mosaic_files) > 0 && !force_rerun) # Always check mosaics -skip_80 <- FALSE # Script 80 ALWAYS runs for all client types - calculates KPIs for current week +skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Skip Script 40 only if NO missing weeks +skip_80 <- (kpis_missing_count == 0 && !force_rerun) # Skip Script 80 only if ALL KPIs exist AND not forcing rerun cat("\nSkipping decisions (based on outputs AND client type):\n") cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP" else "RUN")) @@ -188,7 +391,7 @@ cat(sprintf(" Script 22: %s %s\n", if(skip_22) "SKIP" else "RUN", if(skip_cane_ cat(sprintf(" Script 23: %s %s\n", if(skip_23) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) cat(sprintf(" Script 30: %s (always runs)\n", if(skip_30) "SKIP" else "RUN")) cat(sprintf(" Script 31: %s %s\n", if(skip_31) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else "")) -cat(sprintf(" Script 40: %s %s\n", if(skip_40) "SKIP" else "RUN", if(!skip_40) "" else "(mosaics exist)")) +cat(sprintf(" Script 40: %s (looping through %d missing weeks)\n", if(skip_40) "SKIP" else "RUN", nrow(missing_weeks))) cat(sprintf(" Script 80: %s (always runs)\n", if(skip_80) "SKIP" else "RUN")) cat(sprintf(" Script 90: %s %s\n", if(!run_legacy_report) "SKIP" else "RUN", if(run_legacy_report) "(agronomic_support legacy report)" else "")) cat(sprintf(" Script 91: %s %s\n", if(!run_modern_report) "SKIP" else "RUN", if(run_modern_report) "(cane_supply modern report)" else "")) @@ -216,7 +419,7 @@ tryCatch({ } # Find missing dates in the window - start_date <- end_date - offset + start_date <- end_date - data_generation_offset date_seq <- seq(start_date, end_date, by = "day") target_dates <- format(date_seq, "%Y-%m-%d") @@ -278,12 +481,14 @@ if (pipeline_success && !skip_10) { tryCatch({ # CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.) saved_end_date <- end_date - saved_offset <- offset + saved_offset <- offset # Use FULL offset for tiling (not dynamic_offset) saved_project_dir <- project_dir saved_data_source <- data_source # Set environment variables for the script (Script 10 uses these for filtering) assign("PROJECT", project_dir, envir = .GlobalEnv) + assign("end_date", end_date, envir = .GlobalEnv) + assign("offset", offset, envir = .GlobalEnv) # Full reporting window # Suppress verbose per-date output, show only summary sink(nullfile()) @@ -321,6 +526,7 @@ if (pipeline_success && !skip_20) { tryCatch({ # Run Script 20 via system() to pass command-line args just like from terminal # Arguments: end_date offset project_dir data_source + # Use FULL offset so CI extraction covers entire reporting window (not just new data) cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"', format(end_date, "%Y-%m-%d"), offset, project_dir, data_source) result <- system(cmd) @@ -382,9 +588,10 @@ if (pipeline_success && !skip_30) { cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n") tryCatch({ # Run Script 30 via system() to pass command-line args just like from terminal - # Script 30 expects: project_dir as first argument only - cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s"', - project_dir) + # Script 30 expects: project_dir data_source as arguments + # Pass the same data_source that Script 20 is using + cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s" "%s"', + project_dir, data_source_used) result <- system(cmd) if (result != 0) { @@ -442,85 +649,253 @@ if (pipeline_success && !skip_31) { } # ============================================================================== -# SCRIPT 40: MOSAIC CREATION +# SCRIPT 40: MOSAIC CREATION (LOOP THROUGH MISSING WEEKS) # ============================================================================== if (pipeline_success && !skip_40) { cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n") - tryCatch({ - # Run Script 40 via system() to pass command-line args just like from terminal - # Use full path and --vanilla to avoid renv/environment issues - # Arguments: end_date offset project_dir (file_name_tif is auto-generated from dates) - cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" %d "%s"', - format(end_date, "%Y-%m-%d"), offset, project_dir) - result <- system(cmd) + + # If there are missing weeks, process them one at a time + if (nrow(missing_weeks) > 0) { + cat(sprintf("Found %d missing week(s) - running Script 40 once per week\n\n", nrow(missing_weeks))) - if (result != 0) { - stop("Script 40 exited with error code:", result) + # Loop through missing weeks in reverse chronological order (oldest first) + for (week_idx in nrow(missing_weeks):1) { + missing_week <- missing_weeks[week_idx, ] + week_num <- missing_week$week + year_num <- missing_week$year + week_end_date <- as.Date(missing_week$week_end_date) + + cat(sprintf("--- Creating mosaic for week %02d/%d (ending %s) ---\n", + week_num, year_num, format(week_end_date, "%Y-%m-%d"))) + + tryCatch({ + # Run Script 40 with offset=7 (one week only) for this specific week + # The end_date is the last day of the week, and offset=7 covers the full 7-day week + # IMPORTANT: Pass data_source so Script 40 uses the correct folder (not auto-detect which can be wrong) + cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" 7 "%s" "" "%s"', + format(week_end_date, "%Y-%m-%d"), project_dir, data_source) + result <- system(cmd) + + if (result != 0) { + stop("Script 40 exited with error code:", result) + } + + # Verify mosaic was created for this specific week + mosaic_created <- FALSE + if (mosaic_mode == "tiled") { + mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") + if (dir.exists(mosaic_dir)) { + week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) + mosaic_files <- list.files(mosaic_dir, pattern = week_pattern) + mosaic_created <- length(mosaic_files) > 0 + } + } else { + mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") + if (dir.exists(mosaic_dir)) { + week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) + mosaic_files <- list.files(mosaic_dir, pattern = week_pattern) + mosaic_created <- length(mosaic_files) > 0 + } + } + + if (mosaic_created) { + cat(sprintf("✓ Week %02d/%d mosaic created successfully\n\n", week_num, year_num)) + } else { + cat(sprintf("✓ Week %02d/%d processing completed (verify output)\n\n", week_num, year_num)) + } + }, error = function(e) { + cat(sprintf("✗ Error creating mosaic for week %02d/%d: %s\n", week_num, year_num, e$message), "\n") + pipeline_success <<- FALSE + }) } - # Verify mosaic output - check based on mosaic mode (tiled vs single-file) - mosaic_files_check <- c() - if (mosaic_mode == "tiled") { - mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5") - if (dir.exists(mosaic_dir)) { - # Check for current week's file only - current_week_check <- as.numeric(format(end_date, "%V")) - current_year_check <- as.numeric(format(end_date, "%Y")) - week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check) - mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check) - } - } else { - mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") - if (dir.exists(mosaic_dir)) { - # Check for current week's file only - current_week_check <- as.numeric(format(end_date, "%V")) - current_year_check <- as.numeric(format(end_date, "%Y")) - week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check) - mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check) - } + if (pipeline_success) { + cat(sprintf("✓ Script 40 completed - created all %d missing week mosaics\n", nrow(missing_weeks))) } - - if (length(mosaic_files_check) > 0) { - cat(sprintf("✓ Script 40 completed - created mosaic for week %02d\n", current_week)) - } else { - cat("✓ Script 40 completed\n") - } - }, error = function(e) { - cat("✗ Error in Script 40:", e$message, "\n") - pipeline_success <<- FALSE - }) + } else { + cat("No missing weeks detected - skipping Script 40\n") + skip_40 <- TRUE + } } else if (skip_40) { cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n") } # ============================================================================== -# SCRIPT 80: CALCULATE KPIs +# SCRIPT 80: CALCULATE KPIs (LOOP THROUGH REPORTING WINDOW) # ============================================================================== -if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week - cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n") - tryCatch({ - # Run Script 80 via system() to pass command-line args just like from terminal - # Use full path and --vanilla to avoid renv/environment issues - cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" %d "%s" "%s"', - format(end_date, "%Y-%m-%d"), offset, project_dir, data_source) - result <- system(cmd) +if (pipeline_success && !skip_80) { + cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs FOR REPORTING WINDOW ==========\n") + + # Build list of weeks that NEED calculation (missing KPIs) + weeks_to_calculate <- kpis_needed[!kpis_needed$has_kpis, ] # Only weeks WITHOUT KPIs + + if (nrow(weeks_to_calculate) > 0) { + # Sort by date (oldest to newest) for sequential processing + weeks_to_calculate <- weeks_to_calculate[order(weeks_to_calculate$date), ] - if (result != 0) { - stop("Script 80 exited with error code:", result) - } + cat(sprintf("Looping through %d missing week(s) in reporting window (from %s back to %s):\n\n", + nrow(weeks_to_calculate), + format(max(weeks_to_calculate$date), "%Y-%m-%d"), + format(min(weeks_to_calculate$date), "%Y-%m-%d"))) - # Verify KPI output - kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats") - if (dir.exists(kpi_dir)) { - files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") - cat(sprintf("✓ Script 80 completed - generated %d KPI files\n", length(files))) - } else { - cat("✓ Script 80 completed\n") + tryCatch({ + for (week_idx in 1:nrow(weeks_to_calculate)) { + week_row <- weeks_to_calculate[week_idx, ] + calc_date <- week_row$date + + # Run Script 80 for this specific week with offset=7 (one week only) + # This ensures Script 80 calculates KPIs for THIS week with proper trend data + cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" "%s" %d', + format(calc_date, "%Y-%m-%d"), project_dir, 7) # offset=7 for single week + + cat(sprintf(" [Week %02d/%d] Running Script 80 with end_date=%s...\n", + week_row$week, week_row$year, format(calc_date, "%Y-%m-%d"))) + + result <- system(cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) + + if (result == 0) { + cat(sprintf(" ✓ KPIs calculated for week %02d/%d\n", week_row$week, week_row$year)) + } else { + cat(sprintf(" ✗ Error calculating KPIs for week %02d/%d (exit code: %d)\n", + week_row$week, week_row$year, result)) + } + } + + # Verify total KPI output + kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir) + if (dir.exists(kpi_dir)) { + files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$") + cat(sprintf("\n✓ Script 80 loop completed - total %d KPI files in %s/\n", length(files), kpi_subdir)) + } else { + cat("\n✓ Script 80 loop completed\n") + } + }, error = function(e) { + cat("✗ Error in Script 80 loop:", e$message, "\n") + pipeline_success <<- FALSE + }) + } else { + cat(sprintf("✓ All %d weeks already have KPIs - skipping calculation\n", nrow(kpis_needed))) + } +} else if (skip_80) { + cat("\n========== SKIPPING SCRIPT 80 (all KPIs already exist) ==========\n") +} + +# ============================================================================== +# VERIFY KPI COMPLETION AFTER SCRIPT 80 +# ============================================================================== +# Recheck if all KPIs are now available (Script 80 should have calculated any missing ones) +cat("\n========== VERIFYING KPI COMPLETION ==========\n") + +kpis_complete <- TRUE +if (dir.exists(kpi_dir)) { + for (weeks_back in 0:(reporting_weeks_needed - 1)) { + check_date <- end_date - (weeks_back * 7) + week_num <- as.numeric(format(check_date, "%V")) + year_num <- as.numeric(format(check_date, "%G")) + + # Check for any KPI file from that week + week_pattern <- sprintf("week%02d_%d", week_num, year_num) + kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern) + + if (length(kpi_files_this_week) == 0) { + kpis_complete <- FALSE + cat(sprintf(" Week %02d/%d: ✗ KPIs not found\n", week_num, year_num)) } - }, error = function(e) { - cat("✗ Error in Script 80:", e$message, "\n") - pipeline_success <<- FALSE - }) + } +} + +if (kpis_complete) { + cat("✓ All KPIs available - reports can be generated\n") +} else { + cat("⚠ Some KPIs still missing - reports will be skipped\n") +} + +# ============================================================================== +# SCRIPT 90: LEGACY WORD REPORT (agronomic_support clients) +# ============================================================================== +if (pipeline_success && run_legacy_report) { + cat("\n========== RUNNING SCRIPT 90: LEGACY WORD REPORT ==========\n") + + if (!kpis_complete) { + cat("⚠ Skipping Script 90 - KPIs not available for full reporting window\n") + } else { + tryCatch({ + # Script 90 is an RMarkdown file - compile it with rmarkdown::render() + output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports") + + # Ensure output directory exists + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + } + + output_filename <- sprintf("CI_report_week%02d_%d.docx", + as.numeric(format(end_date, "%V")), + as.numeric(format(end_date, "%G"))) + + # Render the RMarkdown document + rmarkdown::render( + input = "r_app/90_CI_report_with_kpis_simple.Rmd", + output_dir = output_dir, + output_file = output_filename, + params = list( + report_date = format(end_date, "%Y-%m-%d"), + data_dir = project_dir + ), + quiet = TRUE + ) + + cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", output_filename)) + }, error = function(e) { + cat("✗ Error in Script 90:", e$message, "\n") + pipeline_success <<- FALSE + }) + } +} else if (run_legacy_report) { + cat("\n========== SKIPPING SCRIPT 90 (pipeline error or KPIs incomplete) ==========\n") +} + +# ============================================================================== +# SCRIPT 91: MODERN WORD REPORT (cane_supply clients) +# ============================================================================== +if (pipeline_success && run_modern_report) { + cat("\n========== RUNNING SCRIPT 91: MODERN WORD REPORT ==========\n") + + if (!kpis_complete) { + cat("⚠ Skipping Script 91 - KPIs not available for full reporting window\n") + } else { + tryCatch({ + # Script 91 is an RMarkdown file - compile it with rmarkdown::render() + output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports") + + # Ensure output directory exists + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + } + + output_filename <- sprintf("CI_report_week%02d_%d.docx", + as.numeric(format(end_date, "%V")), + as.numeric(format(end_date, "%G"))) + + # Render the RMarkdown document + rmarkdown::render( + input = "r_app/91_CI_report_with_kpis_Angata.Rmd", + output_dir = output_dir, + output_file = output_filename, + params = list( + report_date = format(end_date, "%Y-%m-%d"), + data_dir = project_dir + ), + quiet = TRUE + ) + + cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", output_filename)) + }, error = function(e) { + cat("✗ Error in Script 91:", e$message, "\n") + pipeline_success <<- FALSE + }) + } +} else if (run_modern_report) { + cat("\n========== SKIPPING SCRIPT 91 (pipeline error or KPIs incomplete) ==========\n") } # ============================================================================== @@ -535,4 +910,4 @@ if (pipeline_success) { } else { cat("Status: ✗ Pipeline failed - check errors above\n") } -cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n") +cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80 → R 90/91\n")