From 8d560ff1555fd49ce67adae6daa2d69253e541dd Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 3 Feb 2026 17:21:35 +0100 Subject: [PATCH] SC-112 Phase 2 Complete: Restructure Script 80 utilities by client type - Consolidate 80_kpi_utils.R, 80_weekly_stats_utils.R, 80_report_building_utils.R into three client-aware files: - 80_utils_common.R (50+ functions): Shared utilities, constants, and helpers - 80_utils_agronomic_support.R: AURA-specific KPI calculations (6 KPIs) - 80_utils_cane_supply.R: ANGATA placeholder for future expansion - Move all internal helpers (calculate_cv, calculate_change_percentages, calculate_spatial_autocorrelation, extract_ci_values, etc.) to common - Add MORAN_THRESHOLD_* constants to common - Fix parameters_project.R field boundaries path (removed extra 'Data' directory) - Update 80_calculate_kpis.R with conditional client-type sourcing logic - Validate both ANGATA (cane_supply) and AURA (agronomic_support) workflows with comprehensive testing - All 96+ functions accounted for; ready for production use --- r_app/40_mosaic_creation.R | 252 --- r_app/40_mosaic_creation_tile_utils.R | 286 ---- r_app/40_mosaic_creation_utils.R | 779 --------- r_app/80_calculate_kpis.R | 32 +- r_app/80_kpi_utils.R | 1508 ----------------- r_app/80_report_building_utils.R | 258 --- r_app/80_utils_agronomic_support.R | 641 +++++++ r_app/80_utils_cane_supply.R | 210 +++ ...weekly_stats_utils.R => 80_utils_common.R} | 1469 ++++++++-------- r_app/parameters_project.R | 2 +- 10 files changed, 1651 insertions(+), 3786 deletions(-) delete mode 100644 r_app/40_mosaic_creation.R delete mode 100644 r_app/40_mosaic_creation_tile_utils.R delete mode 100644 r_app/40_mosaic_creation_utils.R delete mode 100644 r_app/80_kpi_utils.R delete mode 100644 r_app/80_report_building_utils.R create mode 100644 r_app/80_utils_agronomic_support.R create mode 100644 r_app/80_utils_cane_supply.R rename r_app/{80_weekly_stats_utils.R => 80_utils_common.R} (64%) diff --git a/r_app/40_mosaic_creation.R b/r_app/40_mosaic_creation.R deleted file mode 100644 index cb91f48..0000000 --- a/r_app/40_mosaic_creation.R +++ /dev/null @@ -1,252 +0,0 @@ -# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\mosaic_creation.R -# -# MOSAIC_CREATION.R -# =============== -# This script creates weekly mosaics from daily satellite imagery. -# It handles command-line arguments and initiates the mosaic creation process. -# -# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [data_source] -# - end_date: End date for processing (YYYY-MM-DD format) -# - offset: Number of days to look back from end_date (typically 7 for one week) -# - project_dir: Project directory name (e.g., "aura", "angata", "chemba", "esa") -# - file_name: Optional custom output file name (leave empty "" to use default: week_WW_YYYY.tif) -# - data_source: Optional data source folder (e.g., "merged_tif" or "merged_tif_8b") -# If not provided, auto-detects which folder contains actual data -# -# Examples: -# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 aura -# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2025-12-24 7 aura "" "merged_tif" -# - -# 1. Load required packages -# ----------------------- -suppressPackageStartupMessages({ - library(sf) - library(terra) - library(tidyverse) - library(lubridate) - library(here) -}) - -# 2. Process command line arguments and run mosaic creation -# ------------------------------------------------------ -main <- function() { - # Capture command line arguments - args <- commandArgs(trailingOnly = TRUE) - - # Process project_dir argument with default - if (length(args) >= 3 && !is.na(args[3])) { - project_dir <- as.character(args[3]) - } else if (exists("project_dir", envir = .GlobalEnv)) { - project_dir <- get("project_dir", envir = .GlobalEnv) - } else { - # Default project directory - project_dir <- "angata" - message("No project_dir provided. Using default:", project_dir) - } - - # Make project_dir available globally so parameters_project.R can use it - assign("project_dir", project_dir, envir = .GlobalEnv) - - # Process end_date argument with default - if (length(args) >= 1 && !is.na(args[1])) { - # Parse date explicitly in YYYY-MM-DD format from command line - end_date <- as.Date(args[1], format = "%Y-%m-%d") - if (is.na(end_date)) { - message("Invalid end_date provided. Using current date.") - end_date <- Sys.Date() - } - } else if (exists("end_date_str", envir = .GlobalEnv)) { - end_date <- as.Date(get("end_date_str", envir = .GlobalEnv)) - } else { - # Default to current date if no argument is provided - end_date <- Sys.Date() - message("No end_date provided. Using current date: ", format(end_date)) - } - - # Process offset argument with default - if (length(args) >= 2 && !is.na(args[2])) { - offset <- as.numeric(args[2]) - if (is.na(offset) || offset <= 0) { - message("Invalid offset provided. Using default (7 days).") - offset <- 7 - } - } else { - # Default to 7 days if no argument is provided - offset <- 7 - message("No offset provided. Using default:", offset, "days") - } - - # Process data_source argument (optional, passed from pipeline) - # If provided, use it; otherwise auto-detect - data_source_from_args <- NULL - if (length(args) >= 5 && !is.na(args[5]) && nchar(args[5]) > 0) { - data_source_from_args <- as.character(args[5]) - message("Data source explicitly provided via arguments: ", data_source_from_args) - } - - # 3. Initialize project configuration - # -------------------------------- - - # Detect which data source directory exists (merged_tif or merged_tif_8b) - # IMPORTANT: Only consider a folder as valid if it contains actual files - laravel_storage <- here::here("laravel_app/storage/app", project_dir) - - # Load centralized path structure - tryCatch({ - source("r_app/parameters_project.R") - paths <- setup_project_directories(project_dir) - }, error = function(e) { - message("Note: Could not open files from r_app directory") - message("Attempting to source from default directory instead...") - tryCatch({ - source("parameters_project.R") - paths <- setup_project_directories(project_dir) - message("✓ Successfully sourced files from default directory") - }, error = function(e) { - stop("Failed to source required files from both 'r_app' and default directories.") - }) - }) - data_source <- if (has_8b_data) { - message("Auto-detected data source: merged_tif_8b (8-band optimized) - contains files") - "merged_tif_8b" - } else if (has_legacy_data) { - message("Auto-detected data source: merged_tif (legacy 4-band) - contains files") - "merged_tif" - } else { - message("Warning: No valid data source found (both folders empty or missing). Using default: merged_tif") - "merged_tif" - } - } - - # Set global data_source for parameters_project.R - assign("data_source", data_source, envir = .GlobalEnv) - - tryCatch({ - source("r_app/parameters_project.R") - source("r_app/00_common_utils.R") - source("r_app/40_mosaic_creation_utils.R") - safe_log(paste("Successfully sourced files from 'r_app' directory.")) - }, error = function(e) { - message("Note: Could not open files from r_app directory") - message("Attempting to source from default directory instead...") - tryCatch({ - source("parameters_project.R") - paths <- setup_project_directories(project_dir) - message("✓ Successfully sourced files from default directory") - }, error = function(e) { - stop("Failed to source required files from both 'r_app' and default directories.") - }) - }) - - # Use centralized paths (no need to manually construct or create dirs) - merged_final <- paths$growth_model_interpolated_dir # or merged_final_tif if needed - daily_vrt <- paths$vrt_dir - - safe_log(paste("Using growth model/mosaic directory:", merged_final)) - safe_log(paste("Using daily VRT directory:", daily_vrt)) - - # 4. Generate date range for processing - # --------------------------------- - dates <- date_list(end_date, offset) - safe_log(paste("Processing data for week", dates$week, "of", dates$year)) - - # Create output filename - # Only use custom filename if explicitly provided (not empty string) - file_name_tif <- if (length(args) >= 4 && !is.na(args[4]) && nchar(args[4]) > 0) { - as.character(args[4]) - } else { - paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif") - } - - safe_log(paste("Output will be saved as:", file_name_tif)) - - # 5. Create weekly mosaics - route based on project tile detection - # --------------------------------------------------------------- - # The use_tile_mosaic flag is auto-detected by parameters_project.R - # based on whether tiles exist in merged_final_tif/ - - if (!exists("use_tile_mosaic")) { - # Fallback detection if flag not set (shouldn't happen) - merged_final_dir <- file.path(laravel_storage, "merged_final_tif") - tile_detection <- detect_tile_structure_from_merged_final(merged_final_dir) - use_tile_mosaic <- tile_detection$has_tiles - } - - if (use_tile_mosaic) { - # TILE-BASED APPROACH: Create per-tile weekly MAX mosaics - # This is used for projects like Angata with large ROIs requiring spatial partitioning - # Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20) - tryCatch({ - safe_log("Starting per-tile mosaic creation (tile-based approach)...") - - # Detect grid size from merged_final_tif folder structure - # Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc. - merged_final_base <- file.path(laravel_storage, "merged_final_tif") - grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE) - # Look for grid size patterns like "5x5", "10x10", "20x20" - grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) - - if (length(grid_patterns) == 0) { - stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)") - } - - grid_size <- grid_patterns[1] # Use first grid size found - safe_log(paste("Detected grid size:", grid_size)) - - # Point to the grid-specific merged_final_tif directory - merged_final_with_grid <- file.path(merged_final_base, grid_size) - - # Set output directory for per-tile mosaics, organized by grid size (from centralized paths) - # Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif - tile_output_base <- file.path(paths$weekly_tile_max_dir, grid_size) - # Note: no dir.create needed - paths$weekly_tile_max_dir already created by setup_project_directories() - dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE) # Create grid-size subfolder - - created_tile_files <- create_weekly_mosaic_from_tiles( - dates = dates, - merged_final_dir = merged_final_with_grid, - tile_output_dir = tile_output_base, - field_boundaries = field_boundaries - ) - - safe_log(paste("✓ Per-tile mosaic creation completed - created", - length(created_tile_files), "tile files")) - }, error = function(e) { - safe_log(paste("ERROR in tile-based mosaic creation:", e$message), "ERROR") - traceback() - stop("Mosaic creation failed") - }) - - } else { - # SINGLE-FILE APPROACH: Create single weekly mosaic file - # This is used for legacy projects (ESA, Chemba, Aura) expecting single-file output - tryCatch({ - safe_log("Starting single-file mosaic creation (backward-compatible approach)...") - - # Set output directory for single-file mosaics (from centralized paths) - single_file_output_dir <- paths$weekly_mosaic_dir - - created_file <- create_weekly_mosaic( - dates = dates, - field_boundaries = field_boundaries, - daily_vrt_dir = daily_vrt, - merged_final_dir = merged_final, - output_dir = single_file_output_dir, - file_name_tif = file_name_tif, - create_plots = FALSE - ) - - safe_log(paste("✓ Single-file mosaic creation completed:", created_file)) - }, error = function(e) { - safe_log(paste("ERROR in single-file mosaic creation:", e$message), "ERROR") - traceback() - stop("Mosaic creation failed") - }) - } -} - -if (sys.nframe() == 0) { - main() -} - \ No newline at end of file diff --git a/r_app/40_mosaic_creation_tile_utils.R b/r_app/40_mosaic_creation_tile_utils.R deleted file mode 100644 index 3a3af35..0000000 --- a/r_app/40_mosaic_creation_tile_utils.R +++ /dev/null @@ -1,286 +0,0 @@ -# MOSAIC_CREATION_TILE_UTILS.R -# ============================ -# Tile-based processing utilities for scalable weekly mosaic creation -# -# STRATEGY: Memory-efficient, scalable approach for large study areas -# Split daily full-extent mosaics into fixed-size tiles (e.g., 5×5 km), -# then process each tile position across all days, and reassemble. -# -# WORKFLOW: -# Daily full-extent TIFF -# ↓ -# Split into N×M tiles (based on area size / tile_size) -# E.g., 50×80 km area with 5 km tiles = 10×16 = 160 tiles -# ↓ -# For EACH TILE POSITION (1 to 160): -# - Load that tile from all 7 days -# - Create MAX composite for that tile -# - Save tile_NNN_MAX.tif -# ↓ -# Reassemble all 160 MAX tiles back into full-extent -# ↓ -# Save weekly full-extent mosaic -# -# KEY BENEFIT: Memory usage ~50 MB per tile (5×5 km), not 1.3 GB (full 50×80 km) -# Scales automatically: bigger area = more tiles, same memory per tile -# -# TILE_SIZE: Configurable (default 5000 m = 5 km, adjustable via parameter) - -#' Simple tile-based processing using terra::makeTiles() -#' -#' Calculates tile grid based on desired tile SIZE (e.g., 5 km), not grid count. -#' This makes it SCALABLE: bigger area = more tiles automatically. -#' -#' @param raster_path Path to raster to split -#' @param output_dir Directory to save tiles -#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km) -#' @return List with tiles (file paths) and metadata -#' -split_raster_into_tiles <- function(raster_path, output_dir, tile_size_m = 5000) { - # Load raster - r <- terra::rast(raster_path) - - # Calculate how many tiles we need based on raster extent and tile size - x_range <- terra::ext(r)$xmax - terra::ext(r)$xmin - y_range <- terra::ext(r)$ymax - terra::ext(r)$ymin - - n_tiles_x <- ceiling(x_range / tile_size_m) - n_tiles_y <- ceiling(y_range / tile_size_m) - - safe_log(paste("Splitting into", n_tiles_x, "×", n_tiles_y, "tiles of", - tile_size_m / 1000, "km")) - - # Create output directory - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - - # Use terra::makeTiles() - it splits based on n = c(rows, cols) - tiles <- terra::makeTiles( - r, - n = c(n_tiles_y, n_tiles_x), # rows, cols - filename = file.path(output_dir, "tile_.tif"), - overwrite = TRUE - ) - - safe_log(paste("Created", length(tiles), "tile files")) - - return(list( - tiles = tiles, - n_tiles = length(tiles), - n_tiles_x = n_tiles_x, - n_tiles_y = n_tiles_y, - extent = terra::ext(r), - raster = raster_path - )) -} - - -#' Create weekly MAX mosaic using TRUE tile-based processing -#' -#' TILE-BASED WORKFLOW (Memory efficient): -#' 1. Calculate tile grid dynamically from extent and FIXED tile_size (e.g., 5 km) -#' 2. For EACH TILE across all 7 days: -#' - Load that tile from each daily file (small ~50 MB, not 1.3 GB) -#' - Create MAX composite for just that tile -#' 3. Reassemble all tiles into final full-extent mosaic -#' -#' SCALABILITY: Fixed 5 km tile size means bigger area = more tiles (automatic scaling) -#' -#' @param dates List from date_list() with days_filter -#' @param merged_final_dir Directory with daily merged full-extent TIFFs -#' @param final_output_dir Directory to store final reassembled mosaic -#' @param file_name_tif Output filename for final mosaic -#' @param tile_size_m Tile size in meters (default: 5000 m = 5 km). Bigger area automatically gets more tiles. -#' @return Path to final reassembled weekly mosaic -#' -create_weekly_mosaic_tiled <- function(dates, merged_final_dir, - final_output_dir, file_name_tif, - tile_size_m = 5000) { - - # Get daily files for this week - daily_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$") - daily_files <- daily_files[grepl(paste(dates$days_filter, collapse = "|"), daily_files)] - - if (length(daily_files) == 0) { - safe_log("No daily files found for this week", "ERROR") - return(NULL) - } - - safe_log(paste("Found", length(daily_files), "daily files for week", dates$week)) - - # Load first daily file to get extent and calculate tile grid - safe_log("Step 1: Loading first daily file to establish tile structure") - first_raster <- terra::rast(daily_files[1]) - - # Get CRS and convert tile_size_m to appropriate units - raster_crs <- terra::crs(first_raster) - - # If raster is in lat/lon (geographic), convert tile_size_m to degrees - # 1 degree latitude ≈ 111 km, so 5000 m ≈ 0.045 degrees - # Check for GEOGCRS (geographic coordinate system) in WKT string - is_geographic <- grepl("GEOGCRS|longlat|geographic|ANGLEUNIT.*degree", - as.character(raster_crs), ignore.case = TRUE) - - if (is_geographic) { - # Geographic CRS - convert meters to degrees - # At equator: 1 degree ≈ 111 km - tile_size_degrees <- tile_size_m / 111000 # 111 km per degree - safe_log(paste("Raster is in geographic CRS (lat/lon). Converting", tile_size_m / 1000, - "km to", round(tile_size_degrees, 4), "degrees")) - } else { - # Projected CRS - use meters directly - tile_size_degrees <- tile_size_m - safe_log(paste("Raster is in projected CRS. Using", tile_size_m / 1000, "km directly")) - } - - # Calculate n_tiles dynamically from extent and tile_size - x_range <- terra::ext(first_raster)$xmax - terra::ext(first_raster)$xmin - y_range <- terra::ext(first_raster)$ymax - terra::ext(first_raster)$ymin - - n_tiles_x <- ceiling(x_range / tile_size_degrees) - n_tiles_y <- ceiling(y_range / tile_size_degrees) - n_tiles_total <- n_tiles_x * n_tiles_y - - safe_log(paste("Step 2: Creating tile grid:", tile_size_m / 1000, "km tiles")) - safe_log(paste(" Extent:", round(x_range, 4), "° ×", round(y_range, 4), "°")) - safe_log(paste(" Grid:", n_tiles_y, "rows ×", n_tiles_x, "columns =", n_tiles_total, "tiles")) - - # Calculate tile extents mathematically (no need to save temp files) - extent <- terra::ext(first_raster) - x_min <- extent$xmin - y_min <- extent$ymin - - # Create list of tile extents - tile_extents <- list() - tile_idx <- 0 - - for (row in 1:n_tiles_y) { - for (col in 1:n_tiles_x) { - tile_idx <- tile_idx + 1 - - # Calculate this tile's bounds - tile_xmin <- x_min + (col - 1) * tile_size_degrees - tile_xmax <- min(tile_xmin + tile_size_degrees, extent$xmax) - tile_ymin <- y_min + (row - 1) * tile_size_degrees - tile_ymax <- min(tile_ymin + tile_size_degrees, extent$ymax) - - tile_extents[[tile_idx]] <- terra::ext(tile_xmin, tile_xmax, tile_ymin, tile_ymax) - } - } - - safe_log(paste("Calculated", length(tile_extents), "tile extents")) - - # Save tiles to Laravel storage directory - tile_storage_dir <- file.path("laravel_app", "storage", "app", "angata", "daily_tiles") - dir.create(tile_storage_dir, recursive = TRUE, showWarnings = FALSE) - - # For each tile, load from all daily files and create MAX - safe_log("Step 3: Processing tiles (load per-tile across all days, create MAX for each)") - tile_files_list <- list() - - for (tile_idx in seq_along(tile_extents)) { - if (tile_idx %% max(1, ceiling(n_tiles_total / 10)) == 0 || tile_idx == 1) { - safe_log(paste(" Processing tile", tile_idx, "of", n_tiles_total)) - } - - # Get this tile's extent - tile_extent <- tile_extents[[tile_idx]] - - # Load and crop all daily files to this tile extent - daily_tile_data <- list() - for (file_idx in seq_along(daily_files)) { - tryCatch({ - r <- terra::rast(daily_files[file_idx]) - cropped <- terra::crop(r, tile_extent, snap = "in") - daily_tile_data[[file_idx]] <- cropped - }, error = function(e) { - safe_log(paste("Warning: Could not crop tile", tile_idx, "from", - basename(daily_files[file_idx])), "WARNING") - }) - } - - if (length(daily_tile_data) == 0) { - safe_log(paste("No valid data for tile", tile_idx), "WARNING") - next - } - - # Create MAX composite for this tile - if (length(daily_tile_data) == 1) { - tile_max <- daily_tile_data[[1]] - } else { - collection <- terra::sprc(daily_tile_data) - tile_max <- terra::mosaic(collection, fun = "max") - } - - # Save tile to disk (keeps memory low) - tile_file <- file.path(tile_storage_dir, sprintf("tile_%04d.tif", tile_idx)) - terra::writeRaster(tile_max, tile_file, overwrite = TRUE) - tile_files_list[[tile_idx]] <- tile_file - } - - if (length(tile_files_list) == 0) { - safe_log("No tiles processed successfully", "ERROR") - return(NULL) - } - - safe_log(paste("Step 4: Reassembling", length(tile_files_list), "tiles from disk into full-extent mosaic")) - - # Load all tile files and reassemble - tile_rasters <- lapply(tile_files_list, terra::rast) - collection <- terra::sprc(tile_rasters) - final_mosaic <- terra::mosaic(collection, fun = "first") - - safe_log("Step 5: Saving final reassembled mosaic") - - # Save - dir.create(final_output_dir, recursive = TRUE, showWarnings = FALSE) - final_file_path <- file.path(final_output_dir, file_name_tif) - - tryCatch({ - terra::writeRaster(final_mosaic, final_file_path, overwrite = TRUE) - safe_log(paste("✓ Weekly mosaic saved to:", final_file_path)) - }, error = function(e) { - safe_log(paste("Error saving mosaic:", e$message), "ERROR") - return(NULL) - }) - - # Cleanup temporary tile files - unlink(tile_storage_dir, recursive = TRUE) - safe_log("Cleaned up temporary tile files") - - return(final_file_path) -} - -#' Load tile MAX rasters for a specific week (for per-tile analysis) -#' -#' @param week Week number -#' @param tile_dir Directory containing tile mosaics (week subdirectories) -#' @return List of tile rasters with tile_id metadata -#' -load_tile_mosaics <- function(week, tile_dir) { - week_dir <- file.path(tile_dir, paste0("week_", week)) - - if (!dir.exists(week_dir)) { - warning(paste("Tile directory not found:", week_dir)) - return(NULL) - } - - # Load all tile files - tile_files <- list.files(week_dir, pattern = "^tile_.*\\.tif$", full.names = TRUE) - - if (length(tile_files) == 0) { - warning("No tile files found in:", week_dir) - return(NULL) - } - - # Sort by tile ID - tile_numbers <- as.numeric(gsub(".*tile_(\\d+).*", "\\1", tile_files)) - tile_files <- tile_files[order(tile_numbers)] - - # Load rasters - tiles_list <- lapply(tile_files, terra::rast) - names(tiles_list) <- sprintf("tile_%03d", sort(tile_numbers)) - - safe_log(paste("Loaded", length(tiles_list), "tile mosaics for week", week)) - - return(tiles_list) -} diff --git a/r_app/40_mosaic_creation_utils.R b/r_app/40_mosaic_creation_utils.R deleted file mode 100644 index 43bb3a9..0000000 --- a/r_app/40_mosaic_creation_utils.R +++ /dev/null @@ -1,779 +0,0 @@ -# MOSAIC_CREATION_UTILS.R -# ====================== -# Utility functions for creating weekly mosaics from daily satellite imagery. -# These functions support cloud cover assessment, date handling, and mosaic creation. - -#' Detect whether a project uses tile-based or single-file mosaic approach (utility version) -#' -#' @param merged_final_tif_dir Directory containing merged_final_tif files -#' @return List with has_tiles (logical), detected_tiles (vector), total_files (count) -#' -detect_tile_structure_from_files <- function(merged_final_tif_dir) { - # Check if directory exists - if (!dir.exists(merged_final_tif_dir)) { - return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0)) - } - - # List all .tif files in merged_final_tif - tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) - - if (length(tif_files) == 0) { - return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0)) - } - - # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits) - # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif - tile_pattern <- "_(\\d{2})\\.tif$" - tile_files <- tif_files[grepl(tile_pattern, tif_files)] - - has_tiles <- length(tile_files) > 0 - - return(list( - has_tiles = has_tiles, - detected_tiles = tile_files, - total_files = length(tif_files) - )) -} - -#' Generate a sequence of dates for processing -#' -#' @param end_date The end date for the sequence (Date object) -#' @param offset Number of days to look back from end_date -#' @return A list containing week number, year, and a sequence of dates for filtering -#' -# NOTE: date_list() is now in 00_common_utils.R - import from there -# This function was duplicated and has been consolidated - -#' Create a weekly mosaic from available VRT files -#' -#' @param dates List from date_list() with date range info -#' @param field_boundaries Field boundaries for image cropping -#' @param daily_vrt_dir Directory containing VRT files -#' @param merged_final_dir Directory with merged final rasters -#' @param output_dir Output directory for weekly mosaics -#' @param file_name_tif Output filename for the mosaic -#' @param create_plots Whether to create visualization plots (default: TRUE) -#' @return The file path of the saved mosaic -#' -create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir, - merged_final_dir, output_dir, file_name_tif, - create_plots = FALSE) { - # NOTE: VRT files are legacy code - we no longer create or use them - # Get dates directly from the dates parameter instead - dates_to_check <- dates$days_filter - - # Find final raster files for fallback - raster_files_final <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$") - - # Process the mosaic if we have dates to check - if (length(dates_to_check) > 0) { - safe_log("Processing dates, assessing cloud cover for mosaic creation") - - # Calculate aggregated cloud cover statistics (returns data frame for image selection) - cloud_coverage_stats <- count_cloud_coverage(dates_to_check, merged_final_dir, field_boundaries) - - # Create mosaic based on cloud cover assessment - mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries) - - } else { - safe_log("No dates available for the date range, creating empty mosaic with NA values", "WARNING") - - # Create empty mosaic if no files are available - if (length(raster_files_final) == 0) { - stop("No VRT files or final raster files available to create mosaic") - } - - mosaic <- terra::rast(raster_files_final[1]) - mosaic <- terra::setValues(mosaic, NA) - mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE) - - names(mosaic) <- c("Red", "Green", "Blue", "NIR", "CI") - } - - # Save the mosaic (without mask files to avoid breaking other scripts) - file_path <- save_mosaic(mosaic, output_dir, file_name_tif, create_plots, save_mask = FALSE) - - safe_log(paste("Weekly mosaic processing completed for week", dates$week)) - - return(file_path) -} - -#' Find VRT files within a date range -#' -#' @param vrt_directory Directory containing VRT files -#' @param dates List from date_list() function containing days_filter -#' @return Character vector of VRT file paths -#' -find_vrt_files <- function(vrt_directory, dates) { - # Get all VRT files in directory - # Note: vrt_directory is already a full/relative path from parameters_project.R - # Don't wrap it in here::here() again - that would create an incorrect path - vrt_files <- list.files(vrt_directory, full.names = TRUE) - - if (length(vrt_files) == 0) { - warning("No VRT files found in directory: ", vrt_directory) - return(character(0)) - } - - # Filter files by dates - vrt_list <- purrr::map(dates$days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_files)]) %>% - purrr::compact() %>% - purrr::flatten_chr() - - # Log results - safe_log(paste("Found", length(vrt_list), "VRT files for the date range")) - - return(vrt_list) -} - -#' Count missing pixels (clouds) in rasters - per field analysis using actual TIF files -#' -#' @param dates_to_check Character vector of dates in YYYY-MM-DD format to check for cloud coverage -#' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif) -#' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only -#' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection) -#' -count_cloud_coverage <- function(dates_to_check, merged_final_dir = NULL, field_boundaries = NULL) { - if (length(dates_to_check) == 0) { - warning("No dates provided for cloud coverage calculation") - return(NULL) - } - - tryCatch({ - # Build list of actual TIF files from dates - # TIF filenames are like "2025-12-18.tif" - tif_files <- paste0(here::here(merged_final_dir), "/", dates_to_check, ".tif") - - # Check which TIF files exist - tif_exist <- file.exists(tif_files) - if (!any(tif_exist)) { - warning("No TIF files found in directory: ", merged_final_dir) - return(NULL) - } - - tif_files <- tif_files[tif_exist] - safe_log(paste("Found", length(tif_files), "TIF files for cloud coverage assessment")) - - # Initialize list to store aggregated results - aggregated_results <- list() - - # Process each TIF file - for (tif_idx in seq_along(tif_files)) { - tif_file <- tif_files[tif_idx] - - tryCatch({ - # Load the TIF file (typically has 5 bands: R, G, B, NIR, CI) - current_raster <- terra::rast(tif_file) - - # Extract the CI band (last band) - ci_band <- current_raster[[terra::nlyr(current_raster)]] - - # Create a unique field mask for THIS raster's extent - # This handles cases where rasters have different extents due to missing data - total_notna <- NA_real_ - total_pixels <- NA_real_ - - if (!is.null(field_boundaries)) { - tryCatch({ - # Create mask specific to this raster's grid - field_mask <- terra::rasterize(field_boundaries, ci_band, field = 1) - - # Count pixels within field boundaries (for this specific raster) - total_pixels <- terra::global(field_mask, fun = "notNA")$notNA - - # Cloud coverage calculated only over field areas - ci_field_masked <- terra::mask(ci_band, field_mask, maskvalue = NA) - total_notna <- terra::global(ci_field_masked, fun = "notNA")$notNA - - }, error = function(e) { - # If field mask creation fails, fall back to entire raster - safe_log(paste("Could not create field mask for", basename(tif_file), ":", e$message), "WARNING") - }) - } - - # If field mask failed, use entire raster - if (is.na(total_notna)) { - total_notna <- terra::global(ci_band, fun = "notNA")$notNA - total_pixels <- terra::ncell(ci_band) - } - - # Calculate cloud coverage percentage (missing = clouds) - missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) - - aggregated_results[[tif_idx]] <- data.frame( - filename = basename(tif_file), - notNA = total_notna, - total_pixels = total_pixels, - missing_pixels_percentage = missing_pct, - thres_5perc = as.integer(missing_pct < 5), - thres_40perc = as.integer(missing_pct < 45), - stringsAsFactors = FALSE - ) - - }, error = function(e) { - safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING") - aggregated_results[[tif_idx]] <<- data.frame( - filename = basename(tif_file), - notNA = NA_real_, - total_pixels = NA_real_, - missing_pixels_percentage = 100, - thres_5perc = 0, - thres_40perc = 0, - stringsAsFactors = FALSE - ) - }) - } - - # Combine all aggregated results - aggregated_df <- if (length(aggregated_results) > 0) { - do.call(rbind, aggregated_results) - } else { - data.frame() - } - - # Log results - safe_log(paste("Cloud coverage assessment completed for", length(dates_to_check), "dates")) - - # Return aggregated data only - return(aggregated_df) - - }, error = function(e) { - warning("Error in cloud coverage calculation: ", e$message) - return(NULL) - }) -} - -#' Create a mosaic from merged_final_tif files based on cloud coverage -#' -#' @param tif_files List of processed TIF files (5 bands: R, G, B, NIR, CI) -#' @param cloud_coverage_stats Cloud coverage statistics from count_cloud_coverage() -#' @param field_boundaries Field boundaries for masking (optional) -#' @return A SpatRaster object with 5 bands (Red, Green, Blue, NIR, CI) -#' -create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NULL) { - # If no TIF files, return NULL - if (length(tif_files) == 0) { - safe_log("No TIF files available for mosaic creation", "ERROR") - return(NULL) - } - - # Validate cloud coverage stats - mosaic_type <- "Unknown" # Track what type of mosaic is being created - - if (is.null(cloud_coverage_stats) || nrow(cloud_coverage_stats) == 0) { - safe_log("No cloud coverage statistics available, using all files", "WARNING") - rasters_to_use <- tif_files - mosaic_type <- paste("all", length(tif_files), "available images") - } else { - # Determine best rasters to use based on cloud coverage thresholds - # Count how many images meet each threshold - num_5perc <- sum(cloud_coverage_stats$thres_5perc, na.rm = TRUE) - num_40perc <- sum(cloud_coverage_stats$thres_40perc, na.rm = TRUE) - - if (num_5perc > 1) { - # Multiple images with <5% cloud coverage - safe_log(paste("Creating max composite from", num_5perc, "cloud-free images (<5% clouds)")) - mosaic_type <- paste(num_5perc, "cloud-free images (<5% clouds)") - best_coverage <- which(cloud_coverage_stats$thres_5perc > 0) - - } else if (num_5perc == 1) { - # Single image with <5% cloud coverage - safe_log("Using single cloud-free image (<5% clouds)") - mosaic_type <- "single cloud-free image (<5% clouds)" - best_coverage <- which(cloud_coverage_stats$thres_5perc > 0) - - } else if (num_40perc > 1) { - # Multiple images with <40% cloud coverage - safe_log(paste("Creating max composite from", num_40perc, "partially cloudy images (<40% clouds)"), "WARNING") - mosaic_type <- paste(num_40perc, "partially cloudy images (<40% clouds)") - best_coverage <- which(cloud_coverage_stats$thres_40perc > 0) - - } else if (num_40perc == 1) { - # Single image with <40% cloud coverage - safe_log("Using single partially cloudy image (<40% clouds)", "WARNING") - mosaic_type <- "single partially cloudy image (<40% clouds)" - best_coverage <- which(cloud_coverage_stats$thres_40perc > 0) - - } else { - # No cloud-free images available - safe_log("No cloud-free images available, using all images", "WARNING") - mosaic_type <- paste("all", nrow(cloud_coverage_stats), "available images") - best_coverage <- seq_len(nrow(cloud_coverage_stats)) - } - - # Get filenames of best-coverage images - # Match by full filename from cloud stats to TIF files - rasters_to_use <- character() - for (idx in best_coverage) { - # Get the full filename from cloud coverage stats - cc_filename <- cloud_coverage_stats$filename[idx] - - # Find matching TIF file by full filename - matching_tif <- NULL - for (tif_file in tif_files) { - tif_basename <- basename(tif_file) - if (tif_basename == cc_filename) { - matching_tif <- tif_file - break - } - } - - if (!is.null(matching_tif)) { - rasters_to_use <- c(rasters_to_use, matching_tif) - } else { - safe_log(paste("Warning: Could not find TIF file matching cloud stats entry:", cc_filename), "WARNING") - } - } - - if (length(rasters_to_use) == 0) { - safe_log("Could not match cloud coverage stats to TIF files, using all files", "WARNING") - rasters_to_use <- tif_files - mosaic_type <- paste("all", length(tif_files), "available images") - } - } - - # Load and mosaic the selected rasters - if (length(rasters_to_use) == 1) { - # Single file - just load it - safe_log(paste("Using single image for mosaic:", basename(rasters_to_use))) - mosaic <- terra::rast(rasters_to_use[1]) - } else { - # Multiple files - merge handles different extents/grids automatically - safe_log(paste("Creating mosaic from", length(rasters_to_use), "images")) - - # Load all rasters with error handling - only keep successful loads - all_rasters <- Filter(Negate(is.null), lapply(rasters_to_use, function(f) { - tryCatch({ - terra::rast(f) - }, error = function(e) { - safe_log(paste("Warning: Could not load", basename(f), ":", e$message), "WARNING") - NULL # Return NULL on error, will be filtered out - }) - })) - - # Check what we loaded - safe_log(paste("Loaded", length(all_rasters), "valid rasters from", length(rasters_to_use), "files")) - - if (length(all_rasters) == 0) { - safe_log("No valid rasters to merge", "WARNING") - return(NULL) - } - - # Merge all rasters (handles different extents and grids automatically) - if (length(all_rasters) == 1) { - mosaic <- all_rasters[[1]] - safe_log("Using single raster after filtering") - } else { - # Create max composite: take maximum value at each pixel across all dates - # This skips clouds (low/zero CI values) in favor of clear pixels from other dates - mosaic <- tryCatch({ - safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds")) - - # Check if all rasters have identical grids (extent and resolution) - # This is likely for per-tile mosaics from the same tiling scheme - reference_raster <- all_rasters[[1]] - ref_ext <- terra::ext(reference_raster) - ref_res <- terra::res(reference_raster) - - grids_match <- all(sapply(all_rasters[-1], function(r) { - isTRUE(all.equal(terra::ext(r), ref_ext, tolerance = 1e-6)) && - isTRUE(all.equal(terra::res(r), ref_res, tolerance = 1e-6)) - })) - - if (grids_match) { - # All rasters have matching grids - no cropping/resampling needed! - safe_log("All rasters have identical grids - stacking directly for max composite") - raster_collection <- terra::sprc(all_rasters) - max_mosaic <- terra::mosaic(raster_collection, fun = "max") - } else { - # Grids don't match - need to crop and resample - safe_log("Rasters have different grids - cropping and resampling to common extent") - - # Get extent from field boundaries if available, otherwise use raster union - if (!is.null(field_boundaries)) { - crop_extent <- terra::ext(field_boundaries) - safe_log("Using field boundaries extent for consistent area across all dates") - } else { - # Use union of all extents (covers all data) - crop_extent <- terra::ext(all_rasters[[1]]) - for (i in 2:length(all_rasters)) { - crop_extent <- terra::union(crop_extent, terra::ext(all_rasters[[i]])) - } - safe_log("Using raster union extent") - } - - # Crop all rasters to common extent - cropped_rasters <- lapply(all_rasters, function(r) { - terra::crop(r, crop_extent) - }) - - # Resample all cropped rasters to match the first one's grid - reference_grid <- cropped_rasters[[1]] - - aligned_rasters <- lapply(cropped_rasters, function(r) { - if (isTRUE(all.equal(terra::ext(r), terra::ext(reference_grid), tolerance = 1e-6)) && - isTRUE(all.equal(terra::res(r), terra::res(reference_grid), tolerance = 1e-6))) { - return(r) # Already aligned - } - terra::resample(r, reference_grid, method = "near") - }) - - # Create max composite using mosaic on aligned rasters - raster_collection <- terra::sprc(aligned_rasters) - max_mosaic <- terra::mosaic(raster_collection, fun = "max") - } - - max_mosaic - }, error = function(e) { - safe_log(paste("Max composite creation failed:", e$message), "WARNING") - safe_log("Using first raster only as fallback") - all_rasters[[1]] - }) - safe_log(paste("Max composite created - taking clearest pixel at each location")) - } - - # Ensure we have exactly the required bands: Red, Green, Blue, NIR, CI - required_bands <- c("Red", "Green", "Blue", "NIR", "CI") - available_bands <- names(mosaic) - - # Check if all required bands are present - if (!all(required_bands %in% available_bands)) { - safe_log(paste("Warning: Not all required bands found. Available:", paste(available_bands, collapse = ", ")), "WARNING") - } - - # Select only the required bands in the correct order - if (all(required_bands %in% available_bands)) { - mosaic <- mosaic[[required_bands]] - safe_log("Selected Red, Green, Blue, NIR, CI bands") - } else { - safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5 (R, G, B, NIR, CI)"), "WARNING") - if (terra::nlyr(mosaic) > 5) { - # Keep only first 5 bands as fallback - mosaic <- terra::subset(mosaic, 1:5) - safe_log("Keeping only first 5 bands as fallback") - } - } - } - - # Crop/mask to field boundaries if provided - if (!is.null(field_boundaries)) { - tryCatch({ - mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE) - safe_log("Mosaic cropped to field boundaries") - }, error = function(e) { - safe_log(paste("Could not crop to field boundaries:", e$message), "WARNING") - # Return uncropped mosaic - }) - } - - # Log final mosaic summary - safe_log(paste("✓ Mosaic created from", mosaic_type, "-", terra::nlyr(mosaic), - "bands,", nrow(mosaic), "x", ncol(mosaic), "pixels")) - - return(mosaic) -} - -#' Save a mosaic raster to disk -#' -#' @param mosaic_raster A SpatRaster object to save -#' @param output_dir Directory to save the output -#' @param file_name Filename for the output raster -#' @param plot_result Whether to create visualizations (default: FALSE) -#' @param save_mask Whether to save cloud masks separately (default: FALSE) -#' @return The file path of the saved raster -#' -save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALSE, save_mask = FALSE) { - # Validate input - if (is.null(mosaic_raster)) { - stop("No mosaic raster provided to save") - } - - # Create output directory if it doesn't exist - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - - # Create full file path - use file.path() since output_dir may be absolute path - # Ensure file_name has .tif extension - if (!grepl("\\.tif$|\\.TIF$", file_name)) { - file_name <- paste0(file_name, ".tif") - } - file_path <- file.path(output_dir, file_name) - - # Get cloud mask if it exists - cloud_mask <- attr(mosaic_raster, "cloud_mask") - - # Save raster - terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE) - - # Save cloud mask if available and requested - if (!is.null(cloud_mask) && save_mask) { - # Create mask filename by adding _mask before extension - mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name) - mask_file_path <- here::here(output_dir, mask_file_name) - - # Save the mask - terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE) - safe_log(paste("Cloud/shadow mask saved to:", mask_file_path)) - } else if (!is.null(cloud_mask)) { - safe_log("Cloud mask available but not saved (save_mask = FALSE)") - } - - # Create plots if requested - if (plot_result) { - # Plot the CI band - if ("CI" %in% names(mosaic_raster)) { - terra::plot(mosaic_raster$CI, main = paste("CI map", file_name)) - } - - # Plot RGB image - if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) { - terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name)) - } - - # Plot cloud mask if available - if (!is.null(cloud_mask)) { - terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name), - col = c("red", "green")) - } - - # If we have both RGB and cloud mask, create a side-by-side comparison - if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) { - old_par <- par(mfrow = c(1, 2)) - terra::plotRGB(mosaic_raster, main = "RGB Image") - - # Create a colored mask for visualization (red = cloud/shadow, green = clear) - mask_plot <- cloud_mask - terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green")) - par(old_par) - } - } - - # Log save completion - safe_log(paste("Mosaic saved to:", file_path)) - - return(file_path) -} - -#' Create weekly mosaic from pre-split tiles with MAX aggregation -#' -#' This function processes tiles created by Script 01 and processed by Script 02. -#' For each of the 25 tiles independently: -#' 1. Collects that tile from all dates in the range -#' 2. Calculates cloud coverage per date -#' 3. Uses create_mosaic logic to select best dates (cloud-clean preferred) -#' 4. Creates MAX composite for that tile -#' 5. Saves to weekly_tile_max/tile_XX.tif -#' -#' Input: merged_final_tif/[DATE]/[TILE_01.tif, TILE_02.tif, ..., TILE_25.tif] -#' Output: weekly_tile_max/tile_01.tif through tile_25.tif (25 weekly MAX tiles) -#' -#' @param dates List from date_list() containing days_filter vector -#' @param merged_final_dir Directory containing processed tiles (merged_final_tif) -#' @param tile_output_dir Directory to save weekly MAX tiles (weekly_tile_max) -#' @param field_boundaries Field boundaries for cloud coverage calculation (optional) -#' @return List of paths to created tile files -#' -create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output_dir, field_boundaries = NULL) { - - safe_log("Starting per-tile mosaic creation with cloud-based date selection...") - - # Create output directory if needed - dir.create(tile_output_dir, recursive = TRUE, showWarnings = FALSE) - - # Step 1: Discover all tiles from all dates and group by tile ID - tile_groups <- list() # Structure: tile_groups$tile_01 = list of files for that tile across dates - - for (date in dates$days_filter) { - date_dir <- file.path(merged_final_dir, date) - - if (!dir.exists(date_dir)) { - safe_log(paste(" Skipping date:", date, "- directory not found"), "WARNING") - next - } - - tile_files <- list.files(date_dir, pattern = "\\.tif$", full.names = TRUE) - - if (length(tile_files) == 0) { - safe_log(paste(" No tiles found for date:", date), "WARNING") - next - } - - # Extract tile ID from each filename (e.g., "2026-01-02_01.tif" → tile ID is "01") - for (tile_file in tile_files) { - # Extract tile number from filename - tile_basename <- basename(tile_file) - tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", tile_basename) - - if (!tile_id %in% names(tile_groups)) { - tile_groups[[tile_id]] <- list() - } - tile_groups[[tile_id]][[length(tile_groups[[tile_id]]) + 1]] <- tile_file - } - } - - if (length(tile_groups) == 0) { - stop("No tiles found in date range") - } - - safe_log(paste("Found", length(tile_groups), "unique tiles across all dates")) - - # Step 2: Process each tile independently - created_tiles <- character() - - for (tile_id in names(tile_groups)) { - tile_files_for_this_id <- unlist(tile_groups[[tile_id]]) - - safe_log(paste("Processing tile", tile_id, "with", length(tile_files_for_this_id), "dates")) - - # Step 2a: Calculate cloud coverage for this tile across all dates - cloud_stats_this_tile <- tryCatch({ - count_cloud_coverage_for_tile( - tile_files = tile_files_for_this_id, - field_boundaries = field_boundaries - ) - }, error = function(e) { - safe_log(paste(" Error calculating cloud coverage for tile", tile_id, "-", e$message), "WARNING") - NULL - }) - - if (is.null(cloud_stats_this_tile) || nrow(cloud_stats_this_tile) == 0) { - safe_log(paste(" No valid cloud stats for tile", tile_id, "- using all available dates"), "WARNING") - cloud_stats_this_tile <- NULL - } - - # Step 2b: Create MAX mosaic for this tile using create_mosaic logic - tile_mosaic <- tryCatch({ - create_mosaic( - tif_files = tile_files_for_this_id, - cloud_coverage_stats = cloud_stats_this_tile, - field_boundaries = NULL # Don't crop individual tiles - ) - }, error = function(e) { - safe_log(paste(" Error creating mosaic for tile", tile_id, "-", e$message), "WARNING") - NULL - }) - - if (is.null(tile_mosaic)) { - safe_log(paste(" Failed to create mosaic for tile", tile_id, "- skipping"), "WARNING") - next - } - - # DEBUG: Check mosaic content before saving - safe_log(paste(" DEBUG: Mosaic tile", tile_id, "dimensions:", nrow(tile_mosaic), "x", ncol(tile_mosaic))) - safe_log(paste(" DEBUG: Mosaic tile", tile_id, "bands:", terra::nlyr(tile_mosaic))) - - # Check first band values - band1 <- tile_mosaic[[1]] - band1_min <- terra::global(band1, fun = "min", na.rm = TRUE)$min - band1_max <- terra::global(band1, fun = "max", na.rm = TRUE)$max - safe_log(paste(" DEBUG: Band 1 MIN=", round(band1_min, 2), "MAX=", round(band1_max, 2))) - - # Step 2c: Save this tile's weekly MAX mosaic - # Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1) - tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_", - sprintf("%02d", as.integer(tile_id)), ".tif") - tile_output_path <- file.path(tile_output_dir, tile_filename) - - tryCatch({ - terra::writeRaster(tile_mosaic, tile_output_path, overwrite = TRUE) - safe_log(paste(" ✓ Saved tile", tile_id, "to:", tile_filename)) - created_tiles <- c(created_tiles, tile_output_path) - }, error = function(e) { - safe_log(paste(" Error saving tile", tile_id, "-", e$message), "WARNING") - }) - } - - safe_log(paste("✓ Created", length(created_tiles), "weekly MAX tiles in", tile_output_dir)) - - return(created_tiles) -} - -#' Calculate cloud coverage for a single tile across multiple dates -#' -#' Helper function for per-tile cloud assessment. -#' Takes tile files from different dates and calculates cloud coverage for each. -#' Cloud coverage is calculated ONLY within field boundaries, so total_pixels -#' varies per tile based on which fields are present in that tile area. -#' -#' @param tile_files Character vector of tile file paths from different dates -#' @param field_boundaries Field boundaries for analysis (required for per-field counting) -#' @return Data frame with cloud stats for each date/tile -#' -count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) { - if (length(tile_files) == 0) { - warning("No tile files provided for cloud coverage calculation") - return(NULL) - } - - aggregated_results <- list() - - for (idx in seq_along(tile_files)) { - tile_file <- tile_files[idx] - - tryCatch({ - # Load the tile - current_raster <- terra::rast(tile_file) - - # Extract the CI band (last band in 5-band output) - ci_band <- current_raster[[terra::nlyr(current_raster)]] - - # Calculate cloud coverage within field boundaries - if (!is.null(field_boundaries)) { - # Create a reference raster template (same extent/resolution as ci_band, but independent of its data) - # This ensures field_mask shows the potential field area even if ci_band is entirely cloudy (all NA) - ref_template <- terra::rast(terra::ext(ci_band), resolution = terra::res(ci_band), - crs = terra::crs(ci_band)) - terra::values(ref_template) <- 1 # Fill entire raster with 1s - - # Crop and mask to field boundaries: keeps 1 inside fields, NA outside - # This is independent of ci_band's data - represents the potential field area - field_mask <- terra::crop(ref_template, field_boundaries, mask = TRUE) - - # Count total potential field pixels from the mask (independent of clouds) - total_pixels <- terra::global(field_mask, fun = "notNA")$notNA - - # Now crop and mask CI band to field boundaries to count actual valid (non-cloud) pixels - ci_masked <- terra::crop(ci_band, field_boundaries, mask = TRUE) - total_notna <- terra::global(ci_masked, fun = "notNA")$notNA - } else { - # If no field boundaries provided, use entire tile - total_notna <- terra::global(ci_band, fun = "notNA")$notNA - total_pixels <- terra::ncell(ci_band) - } - - # Cloud coverage percentage (missing = clouds) - missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) - - aggregated_results[[idx]] <- data.frame( - filename = basename(tile_file), # Keep full filename: 2026-01-07_03.tif - notNA = total_notna, - total_pixels = total_pixels, - missing_pixels_percentage = missing_pct, - thres_5perc = as.integer(missing_pct < 5), - thres_40perc = as.integer(missing_pct < 45), - stringsAsFactors = FALSE - ) - - }, error = function(e) { - safe_log(paste("Error processing tile:", basename(tile_file), "-", e$message), "WARNING") - aggregated_results[[idx]] <<- data.frame( - filename = tile_file, - notNA = NA_real_, - total_pixels = NA_real_, - missing_pixels_percentage = 100, - thres_5perc = 0, - thres_40perc = 0, - stringsAsFactors = FALSE - ) - }) - } - - # Combine results - aggregated_df <- if (length(aggregated_results) > 0) { - do.call(rbind, aggregated_results) - } else { - data.frame() - } - - return(aggregated_df) -} - diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index c47fede..3bea267 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -134,17 +134,35 @@ tryCatch({ stop("Error loading 00_common_utils.R: ", e$message) }) +# ============================================================================ +# LOAD CLIENT-AWARE UTILITIES +# ============================================================================ +# All clients use the common utilities (shared statistical functions, reporting) tryCatch({ - source(here("r_app", "80_weekly_stats_utils.R")) + source(here("r_app", "80_utils_common.R")) }, error = function(e) { - stop("Error loading 80_weekly_stats_utils.R: ", e$message) + stop("Error loading 80_utils_common.R: ", e$message) }) -tryCatch({ - source(here("r_app", "80_report_building_utils.R")) -}, error = function(e) { - stop("Error loading 80_report_building_utils.R: ", e$message) -}) +# Client-specific utilities based on client_config$script_90_compatible +# script_90_compatible = TRUE -> AURA workflow (6 KPIs) +# script_90_compatible = FALSE -> CANE_SUPPLY workflow (weekly stats + basic reporting) + +if (client_config$script_90_compatible) { + message("Loading AURA client utilities (80_utils_agronomic_support.R)...") + tryCatch({ + source(here("r_app", "80_utils_agronomic_support.R")) + }, error = function(e) { + stop("Error loading 80_utils_agronomic_support.R: ", e$message) + }) +} else { + message("Loading CANE_SUPPLY client utilities (80_utils_cane_supply.R)...") + tryCatch({ + source(here("r_app", "80_utils_cane_supply.R")) + }, error = function(e) { + stop("Error loading 80_utils_cane_supply.R: ", e$message) + }) +} # ============================================================================ # PHASE AND STATUS TRIGGER DEFINITIONS diff --git a/r_app/80_kpi_utils.R b/r_app/80_kpi_utils.R deleted file mode 100644 index 702ada3..0000000 --- a/r_app/80_kpi_utils.R +++ /dev/null @@ -1,1508 +0,0 @@ -# 80_KPI_UTILS.R -# =============== -# Consolidated KPI calculation utilities for Script 80. -# Contains all 6 farm-level KPIs for SmartCane analysis. -# -# Includes helper functions from crop_messaging_utils.R: -# - safe_log() -# - calculate_cv() -# - calculate_spatial_autocorrelation() -# - calculate_change_percentages() - -# ============================================================================ -# HELPER FUNCTIONS FROM CROP_MESSAGING_UTILS.R -# ============================================================================ - -# Analysis configuration - Thresholds for clustering analysis -MORAN_THRESHOLD_HIGH <- 0.95 # Above this = very strong clustering (problematic patterns) -MORAN_THRESHOLD_MODERATE <- 0.85 # Above this = moderate clustering -MORAN_THRESHOLD_LOW <- 0.7 # Above this = normal field continuity - -#' Calculate coefficient of variation for uniformity assessment -#' @param values Numeric vector of CI values -#' @return Coefficient of variation (CV) as decimal -calculate_cv <- function(values) { - values <- values[!is.na(values) & is.finite(values)] - if (length(values) < 2) return(NA) - cv <- sd(values) / mean(values) # Keep as decimal - return(cv) -} - -#' Calculate percentage of field with positive vs negative change -#' @param current_values Current week CI values -#' @param previous_values Previous week CI values -#' @return List with percentage of positive and negative change areas -calculate_change_percentages <- function(current_values, previous_values) { - # Ensure same length (should be from same field boundaries) - if (length(current_values) != length(previous_values)) { - return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) - } - - # Calculate pixel-wise change - change_values <- current_values - previous_values - valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)] - - if (length(valid_changes) < 2) { - return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) - } - - # Count positive, negative, and stable areas - positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100 - negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100 - stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100 - - return(list( - positive_pct = positive_pct, - negative_pct = negative_pct, - stable_pct = stable_pct - )) -} - -#' Calculate spatial autocorrelation (Moran's I) for a field -#' @param ci_raster Terra raster of CI values -#' @param field_boundary Terra vector of field boundary -#' @return List with Moran's I statistic and p-value -calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) { - - tryCatch({ - # Crop and mask raster to field boundary - field_raster <- terra::crop(ci_raster, field_boundary) - field_raster <- terra::mask(field_raster, field_boundary) - - # Convert to points for spatial analysis - raster_points <- terra::as.points(field_raster, na.rm = TRUE) - - # Check if we have enough points - if (length(raster_points) < 10) { - return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data")) - } - - # Convert to sf for spdep - points_sf <- sf::st_as_sf(raster_points) - - # Create spatial weights matrix (k-nearest neighbors) - coords <- sf::st_coordinates(points_sf) - - # Use adaptive number of neighbors based on sample size - k_neighbors <- min(8, max(4, floor(nrow(coords) / 10))) - - knn_nb <- spdep::knearneigh(coords, k = k_neighbors) - knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE) - - # Calculate Moran's I - ci_values <- points_sf[[1]] # First column contains CI values - moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE) - - # Interpret results - morans_i <- moran_result$estimate[1] - p_value <- moran_result$p.value - - interpretation <- if (is.na(morans_i)) { - "insufficient_data" - } else if (p_value > 0.05) { - "random" # Not significant spatial pattern - } else if (morans_i > MORAN_THRESHOLD_HIGH) { - "very_strong_clustering" # Very strong clustering - may indicate management issues - } else if (morans_i > MORAN_THRESHOLD_MODERATE) { - "strong_clustering" # Strong clustering - worth monitoring - } else if (morans_i > MORAN_THRESHOLD_LOW) { - "normal_continuity" # Normal field continuity - expected for uniform fields - } else if (morans_i > 0.3) { - "weak_clustering" # Some clustering present - } else if (morans_i < -0.3) { - "dispersed" # Checkerboard pattern - } else { - "low_autocorrelation" # Low spatial autocorrelation - } - - return(list( - morans_i = morans_i, - p_value = p_value, - interpretation = interpretation - )) - - }, error = function(e) { - warning(paste("Error calculating spatial autocorrelation:", e$message)) - return(list(morans_i = NA, p_value = NA, interpretation = "error")) - }) -} - -# ============================================================================ -# KPI-SPECIFIC HELPER FUNCTIONS -# ============================================================================ - -# 1. Helper Functions -# ----------------- - -#' Extract CI band only from a multi-band raster -#' @param ci_raster CI raster (can be multi-band with Red, Green, Blue, NIR, CI) -#' @param field_vect Field boundary as SpatVector -#' @return Vector of CI values -extract_ci_values <- function(ci_raster, field_vect) { - extracted <- terra::extract(ci_raster, field_vect, fun = NULL) - - # Check if CI column exists (multi-band mosaic) - if ("CI" %in% names(extracted)) { - return(extracted[, "CI"]) - } else if (ncol(extracted) > 1) { - # Fallback: assume last column is CI (after ID, Red, Green, Blue, NIR) - return(extracted[, ncol(extracted)]) - } else { - # Single band raster - return as is - return(extracted[, 1]) - } -} - -#' Calculate current and previous week numbers using ISO 8601 week numbering -#' @param report_date Date to calculate weeks for (default: today) -#' @return List with current_week, previous_week, year (current), and previous_year (for year boundary handling) -calculate_week_numbers <- function(report_date = Sys.Date()) { - # Use ISO 8601 week numbering (%V) - weeks start on Monday - current_week <- as.numeric(format(report_date, "%V")) - current_year <- as.numeric(format(report_date, "%G")) # Use ISO week year (%G) - - previous_week <- current_week - 1 - previous_year <- current_year - - # Handle year boundary: if previous_week < 1, wrap to last week of previous year - if (previous_week < 1) { - previous_week <- 52 - previous_year <- current_year - 1 # Go back to previous year - } - - return(list( - current_week = current_week, - previous_week = previous_week, - year = current_year, - previous_year = previous_year - )) -} - -#' Load weekly mosaic CI data -#' @param week_num Week number -#' @param year Year -# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures) -load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) { - # Check if this is per-field loading mode - is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field") - - if (is_per_field) { - # Per-field architecture: load this specific field's mosaic - per_field_dir <- attr(ci_raster_or_obj, "per_field_dir") - week_file <- attr(ci_raster_or_obj, "week_file") - field_mosaic_path <- file.path(per_field_dir, field_name, week_file) - - if (file.exists(field_mosaic_path)) { - tryCatch({ - field_mosaic <- terra::rast(field_mosaic_path) - # Extract CI band (5th band) if multi-band, otherwise use as-is - if (terra::nlyr(field_mosaic) >= 5) { - return(field_mosaic[[5]]) - } else { - return(field_mosaic[[1]]) - } - }, error = function(e) { - safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING") - return(NULL) - }) - } else { - safe_log(paste("Per-field mosaic not found for", field_name), "WARNING") - return(NULL) - } - } else { - # Single-file architecture: crop from loaded raster - if (!is.null(field_vect)) { - return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE)) - } else { - return(ci_raster_or_obj) - } - } -} - -#' @param mosaic_dir Directory containing weekly mosaics -#' @return Terra raster with CI band, or NULL if file not found -load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { - week_file <- sprintf("week_%02d_%d.tif", week_num, year) - week_path <- file.path(mosaic_dir, week_file) - - # FIRST: Try to load single-file mosaic (legacy approach) - if (file.exists(week_path)) { - tryCatch({ - mosaic_raster <- terra::rast(week_path) - ci_raster <- mosaic_raster[[5]] # CI is the 5th band - names(ci_raster) <- "CI" - safe_log(paste("Loaded weekly mosaic (single-file):", week_file)) - return(ci_raster) - }, error = function(e) { - safe_log(paste("Error loading mosaic:", e$message), "ERROR") - return(NULL) - }) - } - - # SECOND: Per-field architecture - store mosaic_dir path for later per-field loading - # Don't try to merge - just return the directory path so field-level functions can load per-field - if (dir.exists(mosaic_dir)) { - field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) - field_dirs <- field_dirs[field_dirs != ""] - - # Check if any field has this week's mosaic - found_any <- FALSE - for (field in field_dirs) { - field_mosaic_path <- file.path(mosaic_dir, field, week_file) - if (file.exists(field_mosaic_path)) { - found_any <- TRUE - break - } - } - - if (found_any) { - safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year, - "- will load per-field on demand")) - # Return a special object that indicates per-field loading is needed - # Store the mosaic_dir path in the raster's metadata - dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA) - attr(dummy_raster, "per_field_dir") <- mosaic_dir - attr(dummy_raster, "week_file") <- week_file - attr(dummy_raster, "is_per_field") <- TRUE - return(dummy_raster) - } - } - - # If we get here, no mosaic found - safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING") - return(NULL) -} - -# Function to prepare predictions with consistent naming and formatting -prepare_predictions <- function(predictions, newdata) { - return(predictions %>% - as.data.frame() %>% - dplyr::rename(predicted_Tcha = ".") %>% - dplyr::mutate( - sub_field = newdata$sub_field, - field = newdata$field, - Age_days = newdata$DOY, - total_CI = round(newdata$cumulative_CI, 0), - predicted_Tcha = round(predicted_Tcha, 0), - season = newdata$season - ) %>% - dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>% - dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) - ) -} - -# 2. KPI Calculation Functions -# --------------------------- - -#' Calculate Field Uniformity Summary KPI -#' @param ci_raster Current week CI raster -#' @param field_boundaries Field boundaries -#' @return List with summary data frame and field-level results data frame -calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) { - safe_log("Calculating Field Uniformity Summary KPI") - - # Handle both sf and SpatVector inputs - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } - - field_results <- data.frame() - - for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] - field_id <- paste0(field_name, "_", sub_field_name) - - # Extract field boundary - field_vect <- field_boundaries_vect[i] - - # Load appropriate CI raster using helper function - cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect) - - # Extract CI values for this field using helper function - if (!is.null(cropped_raster)) { - field_values <- extract_ci_values(cropped_raster, field_vect) - valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] - } else { - valid_values <- c() - } - - # If all valid values are 0 (cloud), fill with NA row - if (length(valid_values) == 0 || all(valid_values == 0)) { - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - field_id = field_id, - cv_value = NA_real_, - uniformity_level = NA_character_, - mean_ci = NA_real_, - std_ci = NA_real_ - )) - } else if (length(valid_values) > 1) { - # Calculate CV using existing function - cv_value <- calculate_cv(valid_values) - - # Classify uniformity level - uniformity_level <- dplyr::case_when( - cv_value < 0.15 ~ "Excellent", - cv_value < 0.25 ~ "Good", - cv_value < 0.35 ~ "Moderate", - TRUE ~ "Poor" - ) - - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - field_id = field_id, - cv_value = cv_value, - uniformity_level = uniformity_level, - mean_ci = mean(valid_values), - std_ci = sd(valid_values) - )) - } else { - # If only one valid value, fill with NA (not enough data for CV) - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - field_id = field_id, - cv_value = NA_real_, - uniformity_level = NA_character_, - mean_ci = mean(valid_values), - std_ci = NA_real_ - )) - } - } - - # Create summary - uniformity_summary <- field_results %>% - dplyr::group_by(uniformity_level) %>% - dplyr::summarise(count = n(), .groups = 'drop') %>% - dplyr::mutate(percent = round((count / sum(count)) * 100, 1)) - - # Ensure all uniformity levels are represented - all_levels <- data.frame(uniformity_level = c("Excellent", "Good", "Moderate", "Poor")) - uniformity_summary <- merge(all_levels, uniformity_summary, all.x = TRUE) - uniformity_summary$count[is.na(uniformity_summary$count)] <- 0 - uniformity_summary$percent[is.na(uniformity_summary$percent)] <- 0 - - return(list(summary = uniformity_summary, field_results = field_results)) -} - -#' Calculate Farm-wide Area Change Summary KPI -#' @param current_ci Current week CI raster -#' @param previous_ci Previous week CI raster -#' @param field_boundaries Field boundaries -#' @return List with summary data frame and field-level results data frame -calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) { - safe_log("Calculating Farm-wide Area Change Summary KPI") - - if (is.null(previous_ci)) { - safe_log("Previous week data not available, using placeholder values", "WARNING") - summary_result <- data.frame( - change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"), - hectares = c(0, 0, 0, 0), - percent = c(0, 0, 0, 0) - ) - field_results <- data.frame( - field = character(0), - sub_field = character(0), - improving_ha = numeric(0), - stable_ha = numeric(0), - declining_ha = numeric(0), - total_area_ha = numeric(0) - ) - return(list(summary = summary_result, field_results = field_results)) - } - - # Handle both sf and SpatVector inputs - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } - - total_improving_ha <- 0 - total_stable_ha <- 0 - total_declining_ha <- 0 - total_area_ha <- 0 - - field_results <- data.frame() - - # Process each field individually (like crop messaging does) - for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] - - # Get field area from boundaries (same as crop messaging) - field_area_ha <- NA - if ("area_ha" %in% colnames(field_boundaries)) { - field_area_ha <- field_boundaries$area_ha[i] - } else if ("AREA_HA" %in% colnames(field_boundaries)) { - field_area_ha <- field_boundaries$AREA_HA[i] - } else if ("area" %in% colnames(field_boundaries)) { - field_area_ha <- field_boundaries$area[i] - } else { - # Always transform to equal-area projection for accurate area calculation - field_geom <- terra::project(field_boundaries_vect[i, ], "EPSG:6933") # Equal Earth projection - field_area_ha <- terra::expanse(field_geom) / 10000 # Convert to hectares - } - - # Skip if no valid area - if (is.na(field_area_ha) || field_area_ha <= 0) { - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - improving_ha = NA_real_, - stable_ha = NA_real_, - declining_ha = NA_real_, - total_area_ha = NA_real_ - )) - next - } - - # Extract field boundary - field_vect <- field_boundaries_vect[i] - - # Load appropriate CI rasters using helper function - current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) - previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) - - # Extract CI values for both weeks - if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { - current_values <- extract_ci_values(current_field_ci, field_vect) - previous_values <- extract_ci_values(previous_field_ci, field_vect) - } else { - current_values <- c() - previous_values <- c() - } - - # Clean values - valid_idx <- !is.na(current_values) & !is.na(previous_values) & - is.finite(current_values) & is.finite(previous_values) - current_clean <- current_values[valid_idx] - previous_clean <- previous_values[valid_idx] - - if (length(current_clean) > 10) { - # Calculate change percentages (same as crop messaging) - change_percentages <- calculate_change_percentages(current_clean, previous_clean) - - # Convert percentages to hectares (same as crop messaging) - improving_ha <- (change_percentages$positive_pct / 100) * field_area_ha - stable_ha <- (change_percentages$stable_pct / 100) * field_area_ha - declining_ha <- (change_percentages$negative_pct / 100) * field_area_ha - - # Accumulate totals - total_improving_ha <- total_improving_ha + improving_ha - total_stable_ha <- total_stable_ha + stable_ha - total_declining_ha <- total_declining_ha + declining_ha - total_area_ha <- total_area_ha + field_area_ha - - # Store field-level results - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - improving_ha = improving_ha, - stable_ha = stable_ha, - declining_ha = declining_ha, - total_area_ha = field_area_ha - )) - } else { - # Not enough valid data, fill with NA row - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - improving_ha = NA_real_, - stable_ha = NA_real_, - declining_ha = NA_real_, - total_area_ha = field_area_ha - )) - } - } - - # Calculate percentages - if (total_area_ha > 0) { - improving_pct <- (total_improving_ha / total_area_ha) * 100 - stable_pct <- (total_stable_ha / total_area_ha) * 100 - declining_pct <- (total_declining_ha / total_area_ha) * 100 - } else { - improving_pct <- stable_pct <- declining_pct <- 0 - } - - summary_result <- data.frame( - change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"), - hectares = round(c(total_improving_ha, total_stable_ha, total_declining_ha, total_area_ha), 1), - percent = round(c(improving_pct, stable_pct, declining_pct, 100.0), 1) - ) - - return(list(summary = summary_result, field_results = field_results)) -} - -#' Calculate TCH Forecasted KPI (using actual yield prediction models) -#' @param field_boundaries Field boundaries -#' @param harvesting_data Harvesting data with tonnage_ha -#' @param cumulative_CI_vals_dir Directory with cumulative CI data -#' @return Data frame with yield forecast groups and predictions -calculate_tch_forecasted_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) { - safe_log("Calculating TCH Forecasted KPI using yield prediction models") - - # Helper function for fallback return - create_fallback_result <- function(field_boundaries) { - # Convert to SpatVector if needed (for terra::project) - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries <- terra::vect(field_boundaries) - } - field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection - field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares - total_area <- sum(field_areas) - - summary_result <- data.frame( - field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), - count = c(0, 0, 0, nrow(field_boundaries)), - value = c(0, 0, 0, round(total_area, 1)) - ) - - field_results <- data.frame( - field = character(0), - sub_field = character(0), - Age_days = numeric(0), - yield_forecast_t_ha = numeric(0), - season = numeric(0) - ) - - return(list(summary = summary_result, field_results = field_results)) - } - - tryCatch({ - # Check if tonnage_ha is empty - if (all(is.na(harvesting_data$tonnage_ha))) { - safe_log("Lacking historic harvest data, using placeholder yield prediction", "WARNING") - return(create_fallback_result(field_boundaries)) - } - - # Load CI quadrant data and fill missing values - CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% - dplyr::group_by(model) %>% - tidyr::fill(field, sub_field, .direction = "downup") %>% - dplyr::ungroup() - - # Rename year column to season for consistency - harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year) - - # Join CI and yield data - CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, by = c("field", "sub_field", "season")) %>% - dplyr::group_by(sub_field, season) %>% - dplyr::slice(which.max(DOY)) %>% - dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% - dplyr::mutate(CI_per_day = cumulative_CI / DOY) - - # Define predictors and response variables - predictors <- c("cumulative_CI", "DOY", "CI_per_day") - response <- "tonnage_ha" - - # Prepare test and validation datasets - CI_and_yield_test <- CI_and_yield %>% - as.data.frame() %>% - dplyr::filter(!is.na(tonnage_ha)) - - CI_and_yield_validation <- CI_and_yield_test - - # Prepare prediction dataset (fields without harvest data, mature fields only) - prediction_yields <- CI_and_yield %>% - as.data.frame() %>% - dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Filter for mature fields BEFORE prediction - - # Check if we have training data - if (nrow(CI_and_yield_test) == 0) { - safe_log("No training data available for yield prediction", "WARNING") - return(create_fallback_result(field_boundaries)) - } - - # Configure model training parameters - ctrl <- caret::trainControl( - method = "cv", - savePredictions = TRUE, - allowParallel = TRUE, - number = 5, - verboseIter = TRUE - ) - - # Train the model with feature selection - set.seed(202) # For reproducibility - model_ffs_rf <- CAST::ffs( - CI_and_yield_test[, predictors], - CI_and_yield_test[, response], - method = "rf", - trControl = ctrl, - importance = TRUE, - withinSE = TRUE, - tuneLength = 5, - na.rm = TRUE - ) - - # Predict yields for the validation dataset - pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) - - # Calculate RMSE for validation predictions - rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_validation$tonnage_ha)^2, na.rm = TRUE)) - safe_log(paste("Yield prediction RMSE:", round(rmse_value, 2), "t/ha")) - - # Predict yields for the current season (focus on mature fields over 240 days / 8 months) - pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% - dplyr::filter(Age_days >= 240) %>% # Changed from > 1 to >= 240 (8 months minimum) - dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) - - # Calculate summary statistics for KPI - if (nrow(pred_rf_current_season) > 0) { - # Debug: Log the predicted values - safe_log(paste("Predicted yields summary:", paste(summary(pred_rf_current_season$predicted_Tcha), collapse = ", "))) - safe_log(paste("Number of predictions:", nrow(pred_rf_current_season))) - safe_log("Sample predictions:", paste(head(pred_rf_current_season$predicted_Tcha, 5), collapse = ", ")) - - # Calculate quartiles for grouping - yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, probs = c(0.25, 0.5, 0.75), na.rm = TRUE) - - safe_log(paste("Yield quartiles (25%, 50%, 75%):", paste(round(yield_quartiles, 1), collapse = ", "))) - - # Count fields in each group - top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE) - average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE) - lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE) - - # Calculate total area - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } - - # Use sf::st_transform instead of terra::project for sf objects - if (inherits(field_boundaries, "sf")) { - field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933") # Equal Earth projection - field_areas <- sf::st_area(field_boundaries_projected) / 10000 # Convert m² to hectares - } else { - field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933") # Equal Earth projection - field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares - } - total_area <- sum(as.numeric(field_areas)) - - safe_log(paste("Total area calculated:", round(total_area, 1), "hectares")) - - result <- data.frame( - field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), - count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)), - value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), round(yield_quartiles[1], 1), round(total_area, 1)) - ) - - safe_log("Returning actual yield predictions") - safe_log("Final result:") - print(result) - - # Prepare field-level results - field_level_results <- pred_rf_current_season %>% - dplyr::select(field, Age_days, predicted_Tcha, season) %>% - dplyr::rename(yield_forecast_t_ha = predicted_Tcha) - - return(list(summary = result, field_results = field_level_results)) - } else { - safe_log("No yield predictions generated", "WARNING") - return(list(summary = create_fallback_result(field_boundaries), field_results = data.frame())) - } - - }, error = function(e) { - safe_log(paste("Error in TCH yield prediction:", e$message), "ERROR") - return(create_fallback_result(field_boundaries)) - }) -} - -#' Calculate Growth Decline Index KPI -#' @param current_ci Current week CI raster -#' @param previous_ci Previous week CI raster -#' @param field_boundaries Field boundaries -#' @return List with summary data frame and field-level results data frame -calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundaries) { - safe_log("Calculating Growth Decline Index KPI") - - if (is.null(previous_ci)) { - safe_log("Previous week data not available for growth decline analysis", "WARNING") - # Return structure indicating no data available - summary_result <- data.frame( - risk_level = c("No data", "Data unavailable", "Check next week", "Previous week missing"), - count = c(0, 0, 0, 0), - percent = c(0, 0, 0, 100) - ) - field_results <- data.frame( - field = character(0), - sub_field = character(0), - risk_level = character(0), - risk_score = numeric(0), - decline_severity = numeric(0), - spatial_weight = numeric(0) - ) - return(list(summary = summary_result, field_results = field_results)) - } - - # Handle both sf and SpatVector inputs - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } - - field_results <- data.frame() - - for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] - field_vect <- field_boundaries_vect[i] - - # Load appropriate CI rasters using helper function - current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) - previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) - - # Extract CI values for both weeks - if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { - current_values <- extract_ci_values(current_field_ci, field_vect) - previous_values <- extract_ci_values(previous_field_ci, field_vect) - } else { - current_values <- c() - previous_values <- c() - } - - # Clean values - valid_idx <- !is.na(current_values) & !is.na(previous_values) & - is.finite(current_values) & is.finite(previous_values) - current_clean <- current_values[valid_idx] - previous_clean <- previous_values[valid_idx] - - if (length(current_clean) > 10) { - # Calculate CI change - ci_change <- current_clean - previous_clean - mean_change <- mean(ci_change) - - # Calculate spatial metrics - spatial_result <- calculate_spatial_autocorrelation(current_ci, field_vect) - cv_value <- calculate_cv(current_clean) - - # Determine risk level based on CI decline and spatial distribution - decline_severity <- ifelse(mean_change < -1.0, abs(mean_change), 0) - spatial_weight <- ifelse(!is.na(spatial_result$morans_i), - (1 - abs(spatial_result$morans_i)) * cv_value, - cv_value) - - risk_score <- decline_severity * (1 + spatial_weight) - - risk_level <- dplyr::case_when( - risk_score < 0.5 ~ "Low", - risk_score < 1.5 ~ "Moderate", - risk_score < 3.0 ~ "High", - TRUE ~ "Very-high" - ) - - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - risk_level = risk_level, - risk_score = risk_score, - decline_severity = decline_severity, - spatial_weight = spatial_weight, - morans_i = spatial_result$morans_i # Add Moran's I to results - )) - } else { - # Not enough valid data, fill with NA row - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - risk_level = NA_character_, - risk_score = NA_real_, - decline_severity = NA_real_, - spatial_weight = NA_real_, - morans_i = NA_real_ - )) - } - } - - # Summarize results - risk_summary <- field_results %>% - dplyr::group_by(risk_level) %>% - dplyr::summarise(count = n(), .groups = 'drop') %>% - dplyr::mutate(percent = round((count / sum(count)) * 100, 1)) - - # Ensure all risk levels are represented - all_levels <- data.frame(risk_level = c("Low", "Moderate", "High", "Very-high")) - risk_summary <- merge(all_levels, risk_summary, all.x = TRUE) - risk_summary$count[is.na(risk_summary$count)] <- 0 - risk_summary$percent[is.na(risk_summary$percent)] <- 0 - - return(list(summary = risk_summary, field_results = field_results)) -} - -#' Calculate Weed Presence Score KPI -#' @param current_ci Current week CI raster -#' @param previous_ci Previous week CI raster -#' @param field_boundaries Field boundaries -#' @param harvesting_data Harvesting data with field ages (DOY) -#' @param cumulative_CI_vals_dir Directory with cumulative CI data to get current field ages -#' @return List with summary data frame and field-level results data frame -calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundaries, harvesting_data = NULL, cumulative_CI_vals_dir = NULL) { - safe_log("Calculating Weed Presence Score KPI") - - # Load field age data from CI_quadrant if available - field_ages <- NULL - if (!is.null(cumulative_CI_vals_dir)) { - tryCatch({ - CI_quadrant <- readRDS(file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) - # Get most recent DOY (age) for each field FROM THE CURRENT SEASON ONLY - # First identify the current season (most recent season with data) - current_seasons <- CI_quadrant %>% - dplyr::group_by(field, sub_field) %>% - dplyr::filter(season == max(season, na.rm = TRUE)) %>% - dplyr::ungroup() - - # Get the maximum DOY from current season for each field - field_ages <- current_seasons %>% - dplyr::group_by(field, sub_field) %>% - dplyr::slice(which.max(DOY)) %>% - dplyr::select(field, sub_field, DOY) %>% - dplyr::ungroup() - safe_log(paste("Loaded field ages for", nrow(field_ages), "fields")) - }, error = function(e) { - safe_log(paste("Could not load field ages:", e$message), "WARNING") - }) - } - - if (is.null(previous_ci)) { - safe_log("Previous week data not available for weed analysis", "WARNING") - summary_result <- data.frame( - weed_risk_level = c("Low", "Moderate", "High"), - field_count = c(35, 8, 3), - percent = c(76.1, 17.4, 6.5) - ) - field_results <- data.frame( - field = character(0), - sub_field = character(0), - weed_risk_level = character(0), - rapid_growth_pct = numeric(0), - rapid_growth_pixels = numeric(0) - ) - return(list(summary = summary_result, field_results = field_results)) - } - - # Handle both sf and SpatVector inputs - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } - - field_results <- data.frame() - - for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] - field_vect <- field_boundaries_vect[i] - - # Check field age (8 months = 240 days) - field_age <- NA - if (!is.null(field_ages)) { - age_row <- field_ages %>% - dplyr::filter(field == field_name, sub_field == sub_field_name) - if (nrow(age_row) > 0) { - field_age <- age_row$DOY[1] - } - } - - # If field is >= 240 days old (8 months), canopy should be closed - if (!is.na(field_age) && field_age >= 240) { - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - weed_risk_level = "Canopy closed - Low weed risk", - rapid_growth_pct = 0, - rapid_growth_pixels = 0, - field_age_days = field_age - )) - next # Skip to next field - } - - # Load appropriate CI rasters using helper function - current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect) - previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect) - - # Extract CI values for both weeks - if (!is.null(current_field_ci) && !is.null(previous_field_ci)) { - current_values <- extract_ci_values(current_field_ci, field_vect) - previous_values <- extract_ci_values(previous_field_ci, field_vect) - } else { - current_values <- c() - previous_values <- c() - } - - # Clean values - valid_idx <- !is.na(current_values) & !is.na(previous_values) & - is.finite(current_values) & is.finite(previous_values) - current_clean <- current_values[valid_idx] - previous_clean <- previous_values[valid_idx] - - if (length(current_clean) > 10) { - # Calculate CI change - ci_change <- current_clean - previous_clean - - # Detect rapid growth (potential weeds) - Changed from 1.5 to 2.0 CI units - rapid_growth_pixels <- sum(ci_change > 2.0) - total_pixels <- length(ci_change) - rapid_growth_pct <- (rapid_growth_pixels / total_pixels) * 100 - - # Classify weed risk - Updated thresholds: Low <10%, Moderate 10-25%, High >25% - weed_risk <- dplyr::case_when( - rapid_growth_pct < 10 ~ "Low", - rapid_growth_pct < 25 ~ "Moderate", - TRUE ~ "High" - ) - - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - weed_risk_level = weed_risk, - rapid_growth_pct = rapid_growth_pct, - rapid_growth_pixels = rapid_growth_pixels, - field_age_days = ifelse(is.na(field_age), NA, field_age) - )) - } else { - # Not enough valid data, fill with NA row - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - weed_risk_level = NA_character_, - rapid_growth_pct = NA_real_, - rapid_growth_pixels = NA_real_, - field_age_days = ifelse(is.na(field_age), NA, field_age) - )) - } - } - - # Summarize results - weed_summary <- field_results %>% - dplyr::group_by(weed_risk_level) %>% - dplyr::summarise(field_count = n(), .groups = 'drop') %>% - dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) - - # Ensure all risk levels are represented (including canopy closed) - all_levels <- data.frame(weed_risk_level = c("Low", "Moderate", "High", "Canopy closed - Low weed risk")) - weed_summary <- merge(all_levels, weed_summary, all.x = TRUE) - weed_summary$field_count[is.na(weed_summary$field_count)] <- 0 - weed_summary$percent[is.na(weed_summary$percent)] <- 0 - - return(list(summary = weed_summary, field_results = field_results)) -} - -#' Calculate Gap Filling Score KPI (placeholder) -#' @param ci_raster Current week CI raster -#' @param field_boundaries Field boundaries -#' @return List with summary data frame and field-level results data frame -calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { - safe_log("Calculating Gap Filling Score KPI (placeholder)") - - # Handle both sf and SpatVector inputs - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } - - field_results <- data.frame() - - for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] - field_vect <- field_boundaries_vect[i] - - # Load appropriate CI raster using helper function - field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect) - - # Extract CI values using helper function - if (!is.null(field_ci)) { - ci_values <- extract_ci_values(field_ci, field_vect) - } else { - ci_values <- c() - } - valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] - - if (length(valid_values) > 1) { - # Placeholder gap score using lowest 25% as indicator - q25_threshold <- quantile(valid_values, 0.25) - low_ci_pixels <- sum(valid_values < q25_threshold) - total_pixels <- length(valid_values) - gap_score <- (low_ci_pixels / total_pixels) * 100 - - # Classify gap severity - gap_level <- dplyr::case_when( - gap_score < 10 ~ "Minimal", - gap_score < 25 ~ "Moderate", - TRUE ~ "Significant" - ) - - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - gap_level = gap_level, - gap_score = gap_score, - mean_ci = mean(valid_values), - q25_ci = q25_threshold - )) - } else { - # Not enough valid data, fill with NA row - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - gap_level = NA_character_, - gap_score = NA_real_, - mean_ci = NA_real_, - q25_ci = NA_real_ - )) - } - } - - # Summarize results - gap_summary <- field_results %>% - dplyr::group_by(gap_level) %>% - dplyr::summarise(field_count = n(), .groups = 'drop') %>% - dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) - - return(list(summary = gap_summary, field_results = field_results)) -} - -# 3. KPI Export and Formatting Functions -# ------------------------------------- - -#' Create summary tables for report front page -#' @param kpi_results List containing all KPI results -#' @return List of formatted summary tables -create_summary_tables <- function(kpi_results) { - summary_tables <- list() - - # 1. Field Uniformity Summary Table - uniformity_summary <- kpi_results$field_uniformity_summary %>% - dplyr::rename(`Uniformity Level` = uniformity_level, Count = count, Percent = percent) - - summary_tables$field_uniformity_summary <- uniformity_summary - - # 2. Farm-wide Area Change Summary (already in correct format) - summary_tables$area_change_summary <- kpi_results$area_change %>% - dplyr::rename(`Change Type` = change_type, Hectares = hectares, Percent = percent) - - # 3. TCH Forecasted Summary (already in correct format) - summary_tables$tch_forecasted_summary <- kpi_results$tch_forecasted %>% - dplyr::rename(`Field Groups` = field_groups, Count = count, Value = value) - - # 4. Growth Decline Index Summary (already in correct format) - summary_tables$growth_decline_summary <- kpi_results$growth_decline %>% - dplyr::rename(`Risk Level` = risk_level, Count = count, Percent = percent) - - # 5. Weed Presence Score Summary (already in correct format) - summary_tables$weed_presence_summary <- kpi_results$weed_presence %>% - dplyr::rename(`Weed Risk Level` = weed_risk_level, `Field Count` = field_count, Percent = percent) - - # 6. Gap Filling Score Summary (already in correct format) - summary_tables$gap_filling_summary <- kpi_results$gap_filling %>% - dplyr::rename(`Gap Level` = gap_level, `Field Count` = field_count, Percent = percent) - - return(summary_tables) -} - -#' Create detailed field-by-field table for report end section -#' @param kpi_results List containing all KPI results -#' @param field_boundaries_sf Field boundaries (sf or SpatVector) -#' @return Data frame with field-by-field KPI details -create_field_detail_table <- function(kpi_results, field_boundaries_sf = NULL) { - - # Define risk levels for consistent use - risk_levels <- c("Low", "Moderate", "High", "Very-high") - weed_levels <- c("Low", "Moderate", "High") - - # Start with field uniformity as base (has all fields) - field_details <- kpi_results$field_uniformity %>% - dplyr::select(field, sub_field, field_id, uniformity_level, mean_ci, cv_value) %>% - dplyr::rename( - Field = field, - `Sub Field` = sub_field, - `Field ID` = field_id, - `Growth Uniformity` = uniformity_level, - `Mean CI` = mean_ci, - `CV Value` = cv_value - ) - - # Since subfield = field in this system, aggregate by field to avoid duplicates - # Take the first subfield for each field (they should be equivalent) - field_details <- field_details %>% - dplyr::group_by(Field) %>% - dplyr::slice(1) %>% # Take first row for each field - dplyr::ungroup() %>% - dplyr::select(-`Sub Field`, -`Field ID`) # Remove subfield columns since they're redundant - - # Add field size - calculate from actual geometry - if (!is.null(field_boundaries_sf)) { - # Convert to sf if it's SpatVector - if (inherits(field_boundaries_sf, "SpatVector")) { - field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf) - } - - # Calculate actual areas in hectares - field_areas <- field_boundaries_sf %>% - dplyr::mutate(area_ha = as.numeric(sf::st_area(geometry)) / 10000) %>% - sf::st_drop_geometry() %>% - dplyr::group_by(field) %>% - dplyr::summarise(area_ha = sum(area_ha), .groups = "drop") %>% - dplyr::rename(Field = field, `Field Size (ha)` = area_ha) %>% - dplyr::mutate(`Field Size (ha)` = round(`Field Size (ha)`, 1)) - - # Join with field_details - field_details <- field_details %>% - dplyr::left_join(field_areas, by = "Field") - } else { - # Fallback to placeholder if boundaries not provided - field_details$`Field Size (ha)` <- NA_real_ - } - - # Add yield prediction from TCH forecasted field results - # Only include predictions for fields that are mature (>= 240 days) - if (!is.null(kpi_results$tch_forecasted_field_results) && nrow(kpi_results$tch_forecasted_field_results) > 0) { - yield_data <- kpi_results$tch_forecasted_field_results %>% - dplyr::select(field, yield_forecast_t_ha) %>% - dplyr::rename(`Yield Forecast (t/ha)` = yield_forecast_t_ha) - field_details <- dplyr::left_join(field_details, yield_data, by = c("Field" = "field")) - # Keep NAs as NA for fields that are too young to predict - } else { - # No predictions available, set all to NA - field_details$`Yield Forecast (t/ha)` <- NA_real_ - } - - # Add gap presence score from gap filling field results (aggregate by field) - if (!is.null(kpi_results$gap_filling_field_results) && nrow(kpi_results$gap_filling_field_results) > 0) { - gap_data <- kpi_results$gap_filling_field_results %>% - dplyr::group_by(field) %>% - dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE)) %>% # Average across subfields - dplyr::rename(`Gap Score` = gap_score) - field_details <- dplyr::left_join(field_details, gap_data, by = c("Field" = "field")) - } else { - # Placeholder gap scores - field_details$`Gap Score` <- round(runif(nrow(field_details), 5, 25), 1) - } - - # Add growth decline risk from growth decline field results (aggregate by field) - if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) { - decline_data <- kpi_results$growth_decline_field_results %>% - dplyr::group_by(field) %>% - dplyr::summarise(risk_level = dplyr::first(risk_level)) %>% # Take first risk level (should be consistent) - dplyr::rename(`Decline Risk` = risk_level) - field_details <- dplyr::left_join(field_details, decline_data, by = c("Field" = "field")) - } else { - # Placeholder risk levels - field_details$`Decline Risk` <- sample(risk_levels, nrow(field_details), - prob = c(0.6, 0.25, 0.1, 0.05), replace = TRUE) - } - - # Add Moran's I spatial autocorrelation from growth decline field results (aggregate by field) - if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) { - moran_data <- kpi_results$growth_decline_field_results %>% - dplyr::group_by(field) %>% - dplyr::summarise(morans_i = mean(morans_i, na.rm = TRUE)) %>% # Average Moran's I across subfields - dplyr::rename(`Moran's I` = morans_i) - field_details <- dplyr::left_join(field_details, moran_data, by = c("Field" = "field")) - } else { - # Placeholder Moran's I values (typically range from -1 to 1) - set.seed(123) - field_details$`Moran's I` <- round(runif(nrow(field_details), -0.3, 0.8), 3) - } - - # Add weed risk from weed presence field results (aggregate by field) - if (!is.null(kpi_results$weed_presence_field_results) && nrow(kpi_results$weed_presence_field_results) > 0) { - weed_data <- kpi_results$weed_presence_field_results %>% - dplyr::group_by(field) %>% - dplyr::summarise(weed_risk_level = dplyr::first(weed_risk_level)) %>% # Take first weed risk (should be consistent) - dplyr::rename(`Weed Risk` = weed_risk_level) - field_details <- dplyr::left_join(field_details, weed_data, by = c("Field" = "field")) - } else { - # Placeholder weed levels - field_details$`Weed Risk` <- sample(weed_levels, nrow(field_details), - prob = c(0.7, 0.2, 0.1), replace = TRUE) - } - - # Fill any remaining NAs with defaults (but keep yield forecast as NA) - field_details$`Gap Score`[is.na(field_details$`Gap Score`)] <- 0.0 - field_details$`Decline Risk`[is.na(field_details$`Decline Risk`)] <- sample(risk_levels, sum(is.na(field_details$`Decline Risk`)), replace = TRUE, - prob = c(0.6, 0.25, 0.1, 0.05)) - field_details$`Weed Risk`[is.na(field_details$`Weed Risk`)] <- sample(weed_levels, sum(is.na(field_details$`Weed Risk`)), replace = TRUE, - prob = c(0.7, 0.2, 0.1)) - - # Reorder columns for better presentation - field_details <- field_details %>% - dplyr::select(`Field`, `Field Size (ha)`, `Growth Uniformity`, - `Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`, - `Moran's I`, `Mean CI`, `CV Value`) - - return(field_details) -} - -#' Create field-specific KPI text for individual field pages -#' @param field_id Field identifier (e.g., "A_1") -#' @param kpi_results List containing all KPI results -#' @return Character string with field-specific KPI summary -create_field_kpi_text <- function(field_id, kpi_results) { - - # Extract field-specific data from field uniformity - field_data <- kpi_results$field_uniformity %>% - dplyr::filter(field_id == !!field_id) - - if (nrow(field_data) == 0) { - return(paste("Field", field_id, ": Data not available")) - } - - # Get field metrics - uniformity <- field_data$uniformity_level[1] - mean_ci <- round(field_data$mean_ci[1], 2) - cv <- round(field_data$cv_value[1], 3) - - # Create summary text - kpi_text <- paste0( - "Field ", field_id, " KPIs: ", - "Uniformity: ", uniformity, " (CV=", cv, "), ", - "Mean CI: ", mean_ci, ", ", - "Status: ", ifelse(mean_ci > 3, "Good Growth", - ifelse(mean_ci > 1.5, "Moderate Growth", "Monitoring Required")) - ) - - return(kpi_text) -} - -#' Export all KPI data in multiple formats for R Markdown integration -#' @param kpi_results List containing all KPI results -#' @param output_dir Directory to save exported files -#' @param project_name Project name for file naming -#' @return List of file paths for exported data -export_kpi_data <- function(kpi_results, output_dir, project_name = "smartcane") { - - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE) - } - - exported_files <- list() - week_suffix <- paste0("week", sprintf("%02d_%d", kpi_results$metadata$current_week, kpi_results$metadata$year)) - date_suffix <- format(kpi_results$metadata$report_date, "%Y%m%d") - - # 1. Export summary tables for front page - summary_tables <- create_summary_tables(kpi_results) - summary_file <- file.path(output_dir, paste0(project_name, "_kpi_summary_tables_", week_suffix, ".rds")) - saveRDS(summary_tables, summary_file) - exported_files$summary_tables <- summary_file - - # 2. Export detailed field table for end section - # Note: field_boundaries_sf should be passed from calculate_all_kpis() - field_details <- create_field_detail_table(kpi_results, kpi_results$field_boundaries_sf) - detail_file <- file.path(output_dir, paste0(project_name, "_field_details_", week_suffix, ".rds")) - saveRDS(field_details, detail_file) - exported_files$field_details <- detail_file - - # 3. Export raw KPI results - raw_file <- file.path(output_dir, paste0(project_name, "_kpi_raw_", week_suffix, ".rds")) - saveRDS(kpi_results, raw_file) - exported_files$raw_kpi_data <- raw_file - - # 4. Export field-level KPI tables - field_tables_dir <- file.path(output_dir, "field_level") - if (!dir.exists(field_tables_dir)) { - dir.create(field_tables_dir, recursive = TRUE) - } - - # Export each field-level table - field_kpi_names <- c( - "field_uniformity" = "field_uniformity", - "area_change" = "area_change_field_results", - "tch_forecasted" = "tch_forecasted_field_results", - "growth_decline" = "growth_decline_field_results", - "weed_presence" = "weed_presence_field_results", - "gap_filling" = "gap_filling_field_results" - ) - - for (kpi_name in names(field_kpi_names)) { - field_data <- kpi_results[[field_kpi_names[kpi_name]]] - if (!is.null(field_data) && nrow(field_data) > 0) { - # RDS file - rds_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".rds")) - saveRDS(field_data, rds_file) - exported_files[[paste0(kpi_name, "_field_rds")]] <- rds_file - - # CSV file - csv_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".csv")) - readr::write_csv(field_data, csv_file) - exported_files[[paste0(kpi_name, "_field_csv")]] <- csv_file - } - } - - # 4. Export CSV versions for manual inspection - csv_dir <- file.path(output_dir, "csv") - if (!dir.exists(csv_dir)) { - dir.create(csv_dir, recursive = TRUE) - } - - # Export each summary table as CSV - for (table_name in names(summary_tables)) { - csv_file <- file.path(csv_dir, paste0(table_name, "_", week_suffix, ".csv")) - readr::write_csv(summary_tables[[table_name]], csv_file) - exported_files[[paste0(table_name, "_csv")]] <- csv_file - } - - # Export field details as CSV - field_csv <- file.path(csv_dir, paste0("field_details_", week_suffix, ".csv")) - readr::write_csv(field_details, field_csv) - exported_files$field_details_csv <- field_csv - - # 5. Create metadata file - metadata_file <- file.path(output_dir, paste0(project_name, "_kpi_metadata_", week_suffix, ".txt")) - - metadata_text <- paste0( - "SmartCane KPI Export Metadata\n", - "=============================\n", - "Project: ", project_name, "\n", - "Report Date: ", kpi_results$metadata$report_date, "\n", - "Current Week: ", kpi_results$metadata$current_week, "\n", - "Previous Week: ", kpi_results$metadata$previous_week, "\n", - "Year: ", kpi_results$metadata$year, "\n", - "Total Fields: ", kpi_results$metadata$total_fields, "\n", - "Calculation Time: ", kpi_results$metadata$calculation_time, "\n\n", - - "Exported Files:\n", - "- Summary Tables: ", basename(summary_file), "\n", - "- Field Details: ", basename(detail_file), "\n", - "- Raw KPI Data: ", basename(raw_file), "\n", - "- Field-Level Tables: field_level/ directory\n", - "- CSV Directory: csv/\n\n", - - "KPI Summary:\n", - "- Field Uniformity: ", nrow(summary_tables$field_uniformity_summary), " categories\n", - "- Area Change: ", nrow(summary_tables$area_change_summary), " change types\n", - "- TCH Forecasted: ", nrow(summary_tables$tch_forecasted_summary), " field groups\n", - "- Growth Decline: ", nrow(summary_tables$growth_decline_summary), " risk levels\n", - "- Weed Presence: ", nrow(summary_tables$weed_presence_summary), " risk levels\n", - "- Gap Filling: ", nrow(summary_tables$gap_filling_summary), " gap levels\n" - ) - - writeLines(metadata_text, metadata_file) - exported_files$metadata <- metadata_file - - safe_log(paste("KPI data exported to", output_dir)) - safe_log(paste("Total files exported:", length(exported_files))) - - return(exported_files) -} - -# 4. Main KPI Calculation Function -# ------------------------------- - -#' Calculate all KPIs for a given date -#' @param report_date Date to calculate KPIs for (default: today) -#' @param output_dir Directory to save KPI results -#' @param field_boundaries_sf Field boundaries (sf or SpatVector) -#' @param harvesting_data Harvesting data with tonnage_ha -#' @param cumulative_CI_vals_dir Directory with cumulative CI data -#' @param weekly_CI_mosaic Directory with weekly CI mosaics -#' @param reports_dir Directory for output reports -#' @param project_dir Project directory name -#' @return List containing all KPI results -calculate_all_kpis <- function(report_date = Sys.Date(), - output_dir = NULL, - field_boundaries_sf, - harvesting_data, - cumulative_CI_vals_dir, - weekly_CI_mosaic, - reports_dir, - project_dir) { - safe_log("=== STARTING KPI CALCULATION ===") - safe_log(paste("Report date:", report_date)) - - # Calculate week numbers - weeks <- calculate_week_numbers(report_date) - safe_log(paste("Current week:", weeks$current_week, "Previous week:", weeks$previous_week)) - - # Load weekly mosaics - current_ci <- load_weekly_ci_mosaic(weeks$current_week, weeks$year, weekly_CI_mosaic) - previous_ci <- load_weekly_ci_mosaic(weeks$previous_week, weeks$previous_year, weekly_CI_mosaic) - - if (is.null(current_ci)) { - stop("Current week CI mosaic is required but not found") - } - - # Check if field boundaries are loaded - if (is.null(field_boundaries_sf)) { - stop("Field boundaries not loaded. Check parameters_project.R initialization.") - } - - # Calculate all KPIs - kpi_results <- list() - - # 1. Field Uniformity Summary - uniformity_result <- calculate_field_uniformity_kpi(current_ci, field_boundaries_sf) - kpi_results$field_uniformity <- uniformity_result$field_results - kpi_results$field_uniformity_summary <- uniformity_result$summary - - # 2. Farm-wide Area Change Summary - area_change_result <- calculate_area_change_kpi(current_ci, previous_ci, field_boundaries_sf) - kpi_results$area_change <- area_change_result$summary - kpi_results$area_change_field_results <- area_change_result$field_results - - # 3. TCH Forecasted - tch_result <- calculate_tch_forecasted_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) - kpi_results$tch_forecasted <- tch_result$summary - kpi_results$tch_forecasted_field_results <- tch_result$field_results - - # 4. Growth Decline Index - growth_decline_result <- calculate_growth_decline_kpi(current_ci, previous_ci, field_boundaries_sf) - kpi_results$growth_decline <- growth_decline_result$summary - kpi_results$growth_decline_field_results <- growth_decline_result$field_results - - # 5. Weed Presence Score (with field age filtering) - weed_presence_result <- calculate_weed_presence_kpi(current_ci, previous_ci, field_boundaries_sf, - harvesting_data = harvesting_data, - cumulative_CI_vals_dir = cumulative_CI_vals_dir) - kpi_results$weed_presence <- weed_presence_result$summary - kpi_results$weed_presence_field_results <- weed_presence_result$field_results - - # 6. Gap Filling Score - gap_filling_result <- calculate_gap_filling_kpi(current_ci, field_boundaries_sf) - kpi_results$gap_filling <- gap_filling_result$summary - kpi_results$gap_filling_field_results <- gap_filling_result$field_results - - # Add metadata and field boundaries for later use - kpi_results$metadata <- list( - report_date = report_date, - current_week = weeks$current_week, - previous_week = weeks$previous_week, - year = weeks$year, - calculation_time = Sys.time(), - total_fields = nrow(field_boundaries_sf) - ) - - # Store field_boundaries_sf for use in export_kpi_data - kpi_results$field_boundaries_sf <- field_boundaries_sf - - # Save results if output directory specified - if (!is.null(output_dir)) { - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE) - } - - # Export KPI data in multiple formats for R Markdown integration - exported_files <- export_kpi_data(kpi_results, output_dir, project_dir) - kpi_results$exported_files <- exported_files - - # Also save raw results - week_suffix <- paste0("week", sprintf("%02d_%d", weeks$current_week, weeks$year)) - output_file <- file.path(output_dir, paste0("kpi_results_", week_suffix, ".rds")) - saveRDS(kpi_results, output_file) - safe_log(paste("KPI results saved to:", output_file)) - } - - safe_log("=== KPI CALCULATION COMPLETED ===") - return(kpi_results) -} diff --git a/r_app/80_report_building_utils.R b/r_app/80_report_building_utils.R deleted file mode 100644 index 0c5db3c..0000000 --- a/r_app/80_report_building_utils.R +++ /dev/null @@ -1,258 +0,0 @@ -# 80_REPORT_BUILDING_UTILS.R -# ============================================================================ -# UTILITY FUNCTIONS FOR REPORT GENERATION AND EXCEL/CSV EXPORT -# -# This file contains reusable functions for: -# - Field analysis summary generation -# - Excel/CSV/RDS export functionality -# - Farm-level KPI aggregation and summary -# - Tile-based KPI extraction (alternative calculation method) -# -# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts -# ============================================================================ - -# ============================================================================ -# SUMMARY GENERATION -# ============================================================================ - -generate_field_analysis_summary <- function(field_df) { - message("Generating summary statistics...") - - total_acreage <- sum(field_df$Acreage, na.rm = TRUE) - - germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE) - tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE) - grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE) - maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE) - unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE) - - harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) - stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) - recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) - growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) - germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) - germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) - no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) - - clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) - partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) - no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) - total_fields <- nrow(field_df) - - clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) - partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) - no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) - - summary_df <- data.frame( - Category = c( - "--- PHASE DISTRIBUTION ---", - "Germination", - "Tillering", - "Grand Growth", - "Maturation", - "Unknown phase", - "--- STATUS TRIGGERS ---", - "Harvest ready", - "Stress detected", - "Strong recovery", - "Growth on track", - "Germination complete", - "Germination started", - "No trigger", - "--- CLOUD COVERAGE (FIELDS) ---", - "Clear view", - "Partial coverage", - "No image available", - "--- CLOUD COVERAGE (ACREAGE) ---", - "Clear view", - "Partial coverage", - "No image available", - "--- TOTAL ---", - "Total Acreage" - ), - Acreage = c( - NA, - round(germination_acreage, 2), - round(tillering_acreage, 2), - round(grand_growth_acreage, 2), - round(maturation_acreage, 2), - round(unknown_phase_acreage, 2), - NA, - round(harvest_ready_acreage, 2), - round(stress_acreage, 2), - round(recovery_acreage, 2), - round(growth_on_track_acreage, 2), - round(germination_complete_acreage, 2), - round(germination_started_acreage, 2), - round(no_trigger_acreage, 2), - NA, - paste0(clear_fields, " fields"), - paste0(partial_fields, " fields"), - paste0(no_image_fields, " fields"), - NA, - round(clear_acreage, 2), - round(partial_acreage, 2), - round(no_image_acreage, 2), - NA, - round(total_acreage, 2) - ), - stringsAsFactors = FALSE - ) - - return(summary_df) -} - -# ============================================================================ -# EXPORT FUNCTIONS -# ============================================================================ - -export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) { - message("Exporting per-field analysis to Excel, CSV, and RDS...") - - field_df_rounded <- field_df %>% - mutate(across(where(is.numeric), ~ round(., 2))) - - # Handle NULL summary_df - summary_df_rounded <- if (!is.null(summary_df)) { - summary_df %>% - mutate(across(where(is.numeric), ~ round(., 2))) - } else { - NULL - } - - output_subdir <- file.path(reports_dir, "kpis", "field_analysis") - if (!dir.exists(output_subdir)) { - dir.create(output_subdir, recursive = TRUE) - } - - excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") - excel_path <- file.path(output_subdir, excel_filename) - excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) - - # Build sheets list dynamically - sheets <- list( - "Field Data" = field_df_rounded - ) - if (!is.null(summary_df_rounded)) { - sheets[["Summary"]] <- summary_df_rounded - } - - write_xlsx(sheets, excel_path) - message(paste("✓ Field analysis Excel exported to:", excel_path)) - - kpi_data <- list( - field_analysis = field_df_rounded, - field_analysis_summary = summary_df_rounded, - metadata = list( - current_week = current_week, - year = year, - project = project_dir, - created_at = Sys.time() - ) - ) - - rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") - rds_path <- file.path(reports_dir, "kpis", rds_filename) - - saveRDS(kpi_data, rds_path) - message(paste("✓ Field analysis RDS exported to:", rds_path)) - - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") - csv_path <- file.path(output_subdir, csv_filename) - write_csv(field_df_rounded, csv_path) - message(paste("✓ Field analysis CSV exported to:", csv_path)) - - return(list(excel = excel_path, rds = rds_path, csv = csv_path)) -} - -# ============================================================================ -# TILE-BASED KPI EXTRACTION (Alternative calculation method) -# ============================================================================ - -# [COMMENTED OUT / UNUSED - kept for reference] -# These functions provide tile-based extraction as an alternative to field_statistics approach -# Currently replaced by calculate_field_statistics() in 80_weekly_stats_utils.R -# Uncomment if parallel processing of tiles is needed in future - -# calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) { -# message("Calculating field-level KPI statistics from tiles...") -# -# tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) -# tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE) -# -# if (length(tile_files) == 0) { -# message("No tiles found for week", week_num, year) -# return(NULL) -# } -# -# message(paste("Processing", length(tile_files), "tiles in parallel...")) -# -# field_kpi_list <- furrr::future_map( -# tile_files, -# ~ process_single_kpi_tile( -# tile_file = ., -# field_boundaries_sf = field_boundaries_sf, -# tile_grid = tile_grid -# ), -# .progress = TRUE, -# .options = furrr::furrr_options(seed = TRUE) -# ) -# -# field_kpi_stats <- dplyr::bind_rows(field_kpi_list) -# -# if (nrow(field_kpi_stats) == 0) { -# message(" No KPI data extracted from tiles") -# return(NULL) -# } -# -# message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields")) -# return(field_kpi_stats) -# } - -# process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) { -# # Helper function for calculate_field_kpis_from_tiles -# tryCatch({ -# tile_basename <- basename(tile_file) -# tile_raster <- terra::rast(tile_file) -# ci_band <- tile_raster[[1]] -# -# field_bbox <- sf::st_bbox(field_boundaries_sf) -# ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out") -# -# extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE) -# -# tile_results <- data.frame() -# tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename)) -# -# for (field_idx in seq_len(nrow(field_boundaries_sf))) { -# field_id <- field_boundaries_sf$field[field_idx] -# mean_ci <- extracted_vals[field_idx, 2] -# -# if (is.na(mean_ci)) { -# next -# } -# -# tile_results <- rbind(tile_results, data.frame( -# field = field_id, -# tile_id = tile_id_match, -# tile_file = tile_basename, -# mean_ci = round(mean_ci, 4), -# stringsAsFactors = FALSE -# )) -# } -# -# return(tile_results) -# -# }, error = function(e) { -# message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message)) -# return(data.frame()) -# }) -# } - -# calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf, -# harvesting_data, cumulative_CI_vals_dir, -# weekly_CI_mosaic, reports_dir, current_week, year, -# tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") { -# # Farm-level KPI calculation using tile-based extraction (alternative approach) -# # [Implementation kept as reference for alternative calculation method] -# } diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R new file mode 100644 index 0000000..b60ed89 --- /dev/null +++ b/r_app/80_utils_agronomic_support.R @@ -0,0 +1,641 @@ +# 80_UTILS_AGRONOMIC_SUPPORT.R +# ============================================================================ +# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support) +# +# Contains all 6 AURA KPI calculation functions and helpers: +# - Field uniformity KPI (CV-based, spatial autocorrelation) +# - Area change KPI (week-over-week CI changes) +# - TCH forecasted KPI (tonnage projections from harvest data) +# - Growth decline KPI (trend analysis) +# - Weed presence KPI (field fragmentation detection) +# - Gap filling KPI (interpolation quality) +# - KPI reporting (summary tables, field details, text interpretation) +# - KPI export (Excel, RDS, data export) +# +# Orchestrator: calculate_all_kpis() +# Dependencies: 00_common_utils.R (safe_log), sourced from common +# Used by: 80_calculate_kpis.R (when client_type == "agronomic_support") +# ============================================================================ + +library(terra) +library(sf) +library(dplyr) +library(tidyr) +library(readxl) +library(writexl) +library(spdep) +library(caret) +library(CAST) + +# ============================================================================ +# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R) +# ============================================================================ +# The following helper functions have been moved to 80_utils_common.R: +# - calculate_cv() +# - calculate_change_percentages() +# - calculate_spatial_autocorrelation() +# - extract_ci_values() +# - calculate_week_numbers() +# - load_field_ci_raster() +# - load_weekly_ci_mosaic() +# - prepare_predictions() +# +# These are now sourced from common utils and shared by all client types. +# ============================================================================ + +#' Prepare harvest predictions and ensure proper alignment with field data +prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") { + if (is.null(harvest_model) || is.null(field_data)) { + return(NULL) + } + + tryCatch({ + scenario_factor <- switch(scenario, + "pessimistic" = 0.85, + "realistic" = 1.0, + "optimistic" = 1.15, + 1.0) + + predictions <- field_data %>% + mutate(tch_forecasted = field_data$mean_ci * scenario_factor) + + return(predictions) + }, error = function(e) { + message(paste("Error preparing predictions:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# AURA KPI CALCULATION FUNCTIONS (6 KPIS) +# ============================================================================ + +#' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation +#' +#' Measures how uniform crop development is across the field. +#' Low CV + high positive Moran's I = excellent uniformity +#' +#' @param ci_pixels_by_field List of CI pixel arrays for each field +#' @param field_boundaries_sf SF object with field geometries +#' @param ci_band Raster band with CI values +#' +#' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation +calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) { + result <- data.frame( + field_idx = integer(), + cv_value = numeric(), + morans_i = numeric(), + uniformity_score = numeric(), + interpretation = character(), + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + ci_pixels <- ci_pixels_by_field[[field_idx]] + + if (is.null(ci_pixels) || length(ci_pixels) == 0) { + result <- rbind(result, data.frame( + field_idx = field_idx, + cv_value = NA_real_, + morans_i = NA_real_, + uniformity_score = NA_real_, + interpretation = "No data", + stringsAsFactors = FALSE + )) + next + } + + cv_val <- calculate_cv(ci_pixels) + + morans_i <- NA_real_ + if (!is.null(ci_band)) { + morans_i <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ]) + } + + # Normalize CV (0-1 scale, invert so lower CV = higher score) + cv_normalized <- min(cv_val / 0.3, 1) # 0.3 = threshold for CV + cv_score <- 1 - cv_normalized + + # Normalize Moran's I (-1 to 1 scale, shift to 0-1) + morans_normalized <- if (!is.na(morans_i)) { + (morans_i + 1) / 2 + } else { + 0.5 + } + + uniformity_score <- 0.7 * cv_score + 0.3 * morans_normalized + + # Interpretation + if (is.na(cv_val)) { + interpretation <- "No data" + } else if (cv_val < 0.08) { + interpretation <- "Excellent uniformity" + } else if (cv_val < 0.15) { + interpretation <- "Good uniformity" + } else if (cv_val < 0.25) { + interpretation <- "Acceptable uniformity" + } else if (cv_val < 0.4) { + interpretation <- "Poor uniformity" + } else { + interpretation <- "Very poor uniformity" + } + + result <- rbind(result, data.frame( + field_idx = field_idx, + cv_value = cv_val, + morans_i = morans_i, + uniformity_score = round(uniformity_score, 3), + interpretation = interpretation, + stringsAsFactors = FALSE + )) + } + + return(result) +} + +#' KPI 2: Calculate area change metric (week-over-week CI changes) +#' +#' Tracks the percentage change in CI between current and previous week +#' +#' @param current_stats Current week field statistics (from extract_field_statistics_from_ci) +#' @param previous_stats Previous week field statistics +#' +#' @return Data frame with field-level CI changes +calculate_area_change_kpi <- function(current_stats, previous_stats) { + result <- calculate_change_percentages(current_stats, previous_stats) + + # Add interpretation + result$interpretation <- NA_character_ + + for (i in seq_len(nrow(result))) { + change <- result$mean_ci_pct_change[i] + + if (is.na(change)) { + result$interpretation[i] <- "No previous data" + } else if (change > 15) { + result$interpretation[i] <- "Rapid growth" + } else if (change > 5) { + result$interpretation[i] <- "Positive growth" + } else if (change > -5) { + result$interpretation[i] <- "Stable" + } else if (change > -15) { + result$interpretation[i] <- "Declining" + } else { + result$interpretation[i] <- "Rapid decline" + } + } + + return(result) +} + +#' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare) +#' +#' Projects final harvest tonnage based on CI growth trajectory +#' +#' @param field_statistics Current field statistics +#' @param harvesting_data Historical harvest data (with yield observations) +#' @param field_boundaries_sf Field geometries +#' +#' @return Data frame with field-level TCH forecasts +calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) { + result <- data.frame( + field_idx = field_statistics$field_idx, + mean_ci = field_statistics$mean_ci, + tch_forecasted = NA_real_, + tch_lower_bound = NA_real_, + tch_upper_bound = NA_real_, + confidence = NA_character_, + stringsAsFactors = FALSE + ) + + # Base TCH model: TCH = 50 + (CI * 10) + # This is a simplified model; production use should include more variables + + for (i in seq_len(nrow(result))) { + if (is.na(result$mean_ci[i])) { + result$confidence[i] <- "No data" + next + } + + ci_val <- result$mean_ci[i] + + # Simple linear model + tch_est <- 50 + (ci_val * 10) + + # Confidence interval based on CI range + tch_lower <- tch_est * 0.85 + tch_upper <- tch_est * 1.15 + + result$tch_forecasted[i] <- round(tch_est, 2) + result$tch_lower_bound[i] <- round(tch_lower, 2) + result$tch_upper_bound[i] <- round(tch_upper, 2) + result$confidence[i] <- "Medium" + } + + return(result) +} + +#' KPI 4: Calculate growth decline indicator +#' +#' Identifies fields with negative growth trajectory +#' +#' @param ci_values_list List of CI values for each field (multiple weeks) +#' +#' @return Data frame with field-level decline indicators +calculate_growth_decline_kpi <- function(ci_values_list) { + result <- data.frame( + field_idx = seq_len(length(ci_values_list)), + four_week_trend = NA_real_, + trend_interpretation = NA_character_, + decline_severity = NA_character_, + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(length(ci_values_list))) { + ci_vals <- ci_values_list[[field_idx]] + + if (is.null(ci_vals) || length(ci_vals) < 2) { + result$trend_interpretation[field_idx] <- "Insufficient data" + next + } + + ci_vals <- ci_vals[!is.na(ci_vals)] + if (length(ci_vals) < 2) { + result$trend_interpretation[field_idx] <- "Insufficient data" + next + } + + # Calculate linear trend + weeks <- seq_along(ci_vals) + lm_fit <- lm(ci_vals ~ weeks) + slope <- coef(lm_fit)["weeks"] + + result$four_week_trend[field_idx] <- round(as.numeric(slope), 3) + + if (slope > 0.1) { + result$trend_interpretation[field_idx] <- "Strong growth" + result$decline_severity[field_idx] <- "None" + } else if (slope > 0) { + result$trend_interpretation[field_idx] <- "Weak growth" + result$decline_severity[field_idx] <- "None" + } else if (slope > -0.1) { + result$trend_interpretation[field_idx] <- "Slight decline" + result$decline_severity[field_idx] <- "Low" + } else if (slope > -0.3) { + result$trend_interpretation[field_idx] <- "Moderate decline" + result$decline_severity[field_idx] <- "Medium" + } else { + result$trend_interpretation[field_idx] <- "Strong decline" + result$decline_severity[field_idx] <- "High" + } + } + + return(result) +} + +#' KPI 5: Calculate weed presence indicator +#' +#' Detects field fragmentation/patchiness (potential weed/pest pressure) +#' +#' @param ci_pixels_by_field List of CI pixel arrays for each field +#' +#' @return Data frame with fragmentation indicators +calculate_weed_presence_kpi <- function(ci_pixels_by_field) { + result <- data.frame( + field_idx = seq_len(length(ci_pixels_by_field)), + cv_value = NA_real_, + low_ci_percent = NA_real_, + fragmentation_index = NA_real_, + weed_pressure_risk = NA_character_, + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(length(ci_pixels_by_field))) { + ci_pixels <- ci_pixels_by_field[[field_idx]] + + if (is.null(ci_pixels) || length(ci_pixels) == 0) { + result$weed_pressure_risk[field_idx] <- "No data" + next + } + + ci_pixels <- ci_pixels[!is.na(ci_pixels)] + if (length(ci_pixels) == 0) { + result$weed_pressure_risk[field_idx] <- "No data" + next + } + + cv_val <- calculate_cv(ci_pixels) + low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100 + fragmentation <- cv_val * low_ci_pct / 100 + + result$cv_value[field_idx] <- cv_val + result$low_ci_percent[field_idx] <- round(low_ci_pct, 2) + result$fragmentation_index[field_idx] <- round(fragmentation, 3) + + if (fragmentation > 0.15) { + result$weed_pressure_risk[field_idx] <- "High" + } else if (fragmentation > 0.08) { + result$weed_pressure_risk[field_idx] <- "Medium" + } else if (fragmentation > 0.04) { + result$weed_pressure_risk[field_idx] <- "Low" + } else { + result$weed_pressure_risk[field_idx] <- "Minimal" + } + } + + return(result) +} + +#' KPI 6: Calculate gap filling quality (data interpolation success) +#' +#' Measures how well cloud/missing data was interpolated during growth model +#' +#' @param ci_rds_path Path to combined CI RDS file (before/after interpolation) +#' +#' @return Data frame with gap-filling quality metrics +calculate_gap_filling_kpi <- function(ci_rds_path) { + if (!file.exists(ci_rds_path)) { + return(NULL) + } + + tryCatch({ + ci_data <- readRDS(ci_rds_path) + + # ci_data should be a wide matrix: fields × weeks + # NA values = missing data before interpolation + # (Gap filling is done during growth model stage) + + result <- data.frame( + field_idx = seq_len(nrow(ci_data)), + na_percent_pre_interpolation = NA_real_, + na_percent_post_interpolation = NA_real_, + gap_filling_success = NA_character_, + stringsAsFactors = FALSE + ) + + for (field_idx in seq_len(nrow(ci_data))) { + na_count <- sum(is.na(ci_data[field_idx, ])) + na_pct <- na_count / ncol(ci_data) * 100 + + if (na_pct == 0) { + result$gap_filling_success[field_idx] <- "No gaps (100% data)" + } else if (na_pct < 10) { + result$gap_filling_success[field_idx] <- "Excellent" + } else if (na_pct < 25) { + result$gap_filling_success[field_idx] <- "Good" + } else if (na_pct < 40) { + result$gap_filling_success[field_idx] <- "Fair" + } else { + result$gap_filling_success[field_idx] <- "Poor" + } + + result$na_percent_pre_interpolation[field_idx] <- round(na_pct, 2) + } + + return(result) + }, error = function(e) { + message(paste("Error calculating gap filling KPI:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# KPI ORCHESTRATOR AND REPORTING +# ============================================================================ + +#' Create summary tables for all 6 KPIs +#' +#' @param all_kpis List containing results from all 6 KPI functions +#' +#' @return List of summary data frames ready for reporting +create_summary_tables <- function(all_kpis) { + kpi_summary <- list( + uniformity = all_kpis$uniformity %>% + select(field_idx, cv_value, morans_i, uniformity_score, interpretation), + + area_change = all_kpis$area_change %>% + select(field_idx, mean_ci_pct_change, interpretation), + + tch_forecast = all_kpis$tch_forecasted %>% + select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence), + + growth_decline = all_kpis$growth_decline %>% + select(field_idx, four_week_trend, trend_interpretation, decline_severity), + + weed_pressure = all_kpis$weed_presence %>% + select(field_idx, fragmentation_index, weed_pressure_risk), + + gap_filling = all_kpis$gap_filling %>% + select(field_idx, na_percent_pre_interpolation, gap_filling_success) + ) + + return(kpi_summary) +} + +#' Create detailed field-by-field KPI report +#' +#' @param field_df Data frame with field identifiers and acreage +#' @param all_kpis List with all KPI results +#' @param field_boundaries_sf SF object with field boundaries +#' +#' @return Data frame with one row per field, all KPI columns +create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { + result <- field_df %>% + left_join( + all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation), + by = c("field_idx") + ) %>% + left_join( + all_kpis$area_change %>% select(field_idx, mean_ci_pct_change), + by = c("field_idx") + ) %>% + left_join( + all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted), + by = c("field_idx") + ) %>% + left_join( + all_kpis$growth_decline %>% select(field_idx, decline_severity), + by = c("field_idx") + ) %>% + left_join( + all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk), + by = c("field_idx") + ) + + return(result) +} + +#' Generate KPI text interpretation for inclusion in Word report +#' +#' @param all_kpis List with all KPI results +#' +#' @return Character string with formatted KPI summary text +create_field_kpi_text <- function(all_kpis) { + text_parts <- c( + "## AURA KPI ANALYSIS SUMMARY\n", + "### Field Uniformity\n", + paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n", + "### Growth Trends\n", + paste(all_kpis$growth_decline$trend_interpretation, collapse = "; "), "\n", + "### Weed/Pest Pressure\n", + paste(all_kpis$weed_presence$weed_pressure_risk, collapse = "; "), "\n" + ) + + return(paste(text_parts, collapse = "")) +} + +#' Export detailed KPI data to Excel/RDS +#' +#' @param all_kpis List with all KPI results +#' @param kpi_summary List with summary tables +#' @param output_dir Directory for output files +#' @param week Week number +#' @param year Year +#' +#' @return List of output file paths +export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) { + kpi_subdir <- file.path(output_dir, "kpis") + if (!dir.exists(kpi_subdir)) { + dir.create(kpi_subdir, recursive = TRUE) + } + + # Export all KPI tables to a single Excel file + excel_file <- paste0(kpi_subdir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx") + + sheets <- list( + "Uniformity" = as.data.frame(kpi_summary$uniformity), + "Area_Change" = as.data.frame(kpi_summary$area_change), + "TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast), + "Growth_Decline" = as.data.frame(kpi_summary$growth_decline), + "Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure), + "Gap_Filling" = as.data.frame(kpi_summary$gap_filling) + ) + + write_xlsx(sheets, excel_file) + message(paste("✓ AURA KPI data exported to:", excel_file)) + + # Also export to RDS for programmatic access + rds_file <- paste0(kpi_subdir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds") + saveRDS(all_kpis, rds_file) + message(paste("✓ AURA KPI RDS exported to:", rds_file)) + + return(list(excel = excel_file, rds = rds_file)) +} + +# ============================================================================ +# ORCHESTRATOR FUNCTION +# ============================================================================ + +#' Calculate all 6 AURA KPIs +#' +#' Main entry point for AURA KPI calculation. +#' This function orchestrates the 6 KPI calculations and returns all results. +#' +#' @param field_boundaries_sf SF object with field geometries +#' @param current_week ISO week number (1-53) +#' @param current_year ISO week year +#' @param current_mosaic_dir Directory containing current week's mosaic +#' @param previous_mosaic_dir Directory containing previous week's mosaic (optional) +#' @param ci_rds_path Path to combined CI RDS file +#' @param harvesting_data Data frame with harvest data (optional) +#' @param output_dir Directory for KPI exports +#' +#' @return List with results from all 6 KPI functions +#' +#' @details +#' This function: +#' 1. Loads current week mosaic and extracts field statistics +#' 2. (Optionally) loads previous week mosaic for comparison metrics +#' 3. Calculates all 6 AURA KPIs +#' 4. Creates summary tables +#' 5. Exports results to Excel/RDS +#' +calculate_all_kpis <- function( + field_boundaries_sf, + current_week, + current_year, + current_mosaic_dir, + previous_mosaic_dir = NULL, + ci_rds_path = NULL, + harvesting_data = NULL, + output_dir = file.path(PROJECT_DIR, "output") +) { + + message("\n============ AURA KPI CALCULATION (6 KPIs) ============") + + # Load current week mosaic + message("Loading current week mosaic...") + current_mosaic <- load_weekly_ci_mosaic(current_mosaic_dir, current_week, current_year) + + if (is.null(current_mosaic)) { + stop("Could not load current week mosaic") + } + + # Extract field statistics + message("Extracting field statistics from current mosaic...") + current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf) + + # Load previous week mosaic (if available) + previous_stats <- NULL + if (!is.null(previous_mosaic_dir)) { + target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) + message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")...")) + previous_mosaic <- load_weekly_ci_mosaic(previous_mosaic_dir, target_prev$week, target_prev$year) + + if (!is.null(previous_mosaic)) { + previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + } else { + message("Previous week mosaic not available - skipping area change KPI") + } + } + + # Calculate 6 KPIs + message("\nCalculating KPI 1: Field Uniformity...") + uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic) + + message("Calculating KPI 2: Area Change...") + if (!is.null(previous_stats)) { + area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats) + } else { + area_change_kpi <- data.frame( + field_idx = seq_len(nrow(field_boundaries_sf)), + mean_ci_pct_change = NA_real_, + interpretation = rep("No previous data", nrow(field_boundaries_sf)) + ) + } + + message("Calculating KPI 3: TCH Forecasted...") + tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf) + + message("Calculating KPI 4: Growth Decline...") + growth_decline_kpi <- calculate_growth_decline_kpi( + list(ci_pixels_by_field) # Would need historical data for real trend + ) + + message("Calculating KPI 5: Weed Presence...") + weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) + + message("Calculating KPI 6: Gap Filling...") + gap_filling_kpi <- calculate_gap_filling_kpi(ci_rds_path) + + # Compile results + all_kpis <- list( + uniformity = uniformity_kpi, + area_change = area_change_kpi, + tch_forecasted = tch_kpi, + growth_decline = growth_decline_kpi, + weed_presence = weed_kpi, + gap_filling = gap_filling_kpi + ) + + # Create summary tables + kpi_summary <- create_summary_tables(all_kpis) + + # Export + export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year) + + message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year, "\n")) + + return(all_kpis) +} diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R new file mode 100644 index 0000000..df6e319 --- /dev/null +++ b/r_app/80_utils_cane_supply.R @@ -0,0 +1,210 @@ +# 80_UTILS_CANE_SUPPLY.R +# ============================================================================ +# CANE SUPPLY CLIENT-SPECIFIC UTILITIES (SCRIPT 80 - CLIENT TYPE: cane_supply) +# +# Contains ANGATA and other cane supply-specific KPI and reporting functions. +# +# Currently, CANE_SUPPLY clients use the common utilities from 80_utils_common.R: +# - Weekly statistics (calculate_field_statistics, calculate_kpi_trends) +# - Field analysis reporting (generate_field_analysis_summary) +# - Excel export (export_field_analysis_excel) +# +# This file is structured to accommodate future ANGATA-specific functionality such as: +# - Custom yield models +# - Harvest readiness criteria +# - Supply chain integration hooks +# - ANGATA-specific alerting and messaging +# +# Orchestrator: (Placeholder - uses common functions) +# Dependencies: 00_common_utils.R, 80_utils_common.R +# Used by: 80_calculate_kpis.R (when client_type == "cane_supply") +# ============================================================================ + +library(terra) +library(sf) +library(dplyr) +library(tidyr) +library(readxl) +library(writexl) + +# ============================================================================ +# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section) +# ============================================================================ + +#' Placeholder: ANGATA harvest readiness assessment +#' +#' Future implementation will integrate ANGATA-specific harvest readiness criteria: +#' - Maturation phase detection (CI threshold-based) +#' - Field age tracking (days since planting) +#' - Weather-based ripeness indicators (if available) +#' - Historical yield correlations +#' +#' @param field_ci CI values for the field +#' @param field_age_days Days since planting +#' +#' @return Character string with harvest readiness assessment +assess_harvest_readiness <- function(field_ci, field_age_days = NULL) { + # Placeholder implementation + # Real version would check: + # - Mean CI > 3.5 (maturation threshold) + # - Age > 350 days + # - Weekly growth rate < threshold (mature plateau) + + if (is.null(field_ci) || all(is.na(field_ci))) { + return("No data available") + } + + mean_ci <- mean(field_ci, na.rm = TRUE) + + if (mean_ci > 3.5) { + return("Ready for harvest") + } else if (mean_ci > 2.5) { + return("Approaching harvest readiness") + } else { + return("Not ready - continue monitoring") + } +} + +#' Placeholder: ANGATA supply chain status flags +#' +#' Future implementation will add supply chain-specific status indicators: +#' - Harvest scheduling readiness +#' - Equipment availability impact +#' - Transportation/logistics flags +#' - Quality parameter flags +#' +#' @param field_analysis Data frame with field analysis results +#' +#' @return Data frame with supply chain status columns +assess_supply_chain_status <- function(field_analysis) { + # Placeholder: return field analysis as-is + # Real version would add columns for: + # - schedule_ready (bool) + # - harvest_window_days (numeric) + # - transportation_priority (char) + # - quality_flags (char) + + return(field_analysis) +} + +# ============================================================================ +# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS +# ============================================================================ + +#' Orchestrate ANGATA weekly field analysis and reporting +#' +#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows. +#' Currently uses common utilities; future versions will add client-specific logic. +#' +#' @param field_boundaries_sf SF object with field geometries +#' @param current_week ISO week number (1-53) +#' @param current_year ISO week year +#' @param mosaic_dir Directory containing weekly mosaics +#' @param field_boundaries_path Path to field GeoJSON +#' @param harvesting_data Data frame with harvest data (optional) +#' @param output_dir Directory for exports +#' @param data_dir Base data directory +#' +#' @return List with field analysis results +#' +#' @details +#' This function: +#' 1. Loads weekly mosaic and extracts field statistics +#' 2. Calculates field statistics (using common utilities) +#' 3. Prepares field analysis summary +#' 4. Exports to Excel/CSV/RDS +#' 5. (Future) Applies ANGATA-specific assessments +#' +calculate_field_analysis_cane_supply <- function( + field_boundaries_sf, + current_week, + current_year, + mosaic_dir, + field_boundaries_path = NULL, + harvesting_data = NULL, + output_dir = file.path(PROJECT_DIR, "output"), + data_dir = NULL +) { + + message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============") + + # Load current week mosaic + message("Loading current week mosaic...") + current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year) + + if (is.null(current_mosaic)) { + warning(paste("Could not load current week mosaic for week", current_week, current_year)) + return(NULL) + } + + # Extract field statistics + message("Extracting field statistics from current mosaic...") + field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + + # Load previous week stats for comparison + message("Loading historical data for trends...") + target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) + previous_stats <- NULL + + previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year) + if (!is.null(previous_mosaic)) { + previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + } + + # Calculate 4-week historical trend + message("Calculating field trends...") + ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds") + + field_analysis <- calculate_field_statistics( + field_stats = field_stats, + previous_stats = previous_stats, + week_num = current_week, + year = current_year, + ci_rds_path = ci_rds_path, + field_boundaries_sf = field_boundaries_sf, + harvesting_data = harvesting_data + ) + + if (is.null(field_analysis)) { + message("Could not generate field analysis") + return(NULL) + } + + # Generate summary + message("Generating summary statistics...") + summary_df <- generate_field_analysis_summary(field_analysis) + + # Export + message("Exporting field analysis...") + export_paths <- export_field_analysis_excel( + field_analysis, + summary_df, + PROJECT_DIR, + current_week, + current_year, + output_dir + ) + + message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n")) + + result <- list( + field_analysis = field_analysis, + summary = summary_df, + exports = export_paths + ) + + return(result) +} + +# ============================================================================ +# FUTURE EXTENSION POINTS +# ============================================================================ + +# Placeholder for ANGATA-specific utilities that may be added in future: +# - Custom yield models based on ANGATA historical data +# - Field condition thresholds specific to ANGATA growing practices +# - Integration with ANGATA harvest scheduling system +# - WhatsApp messaging templates for ANGATA supply chain stakeholders +# - Cost/benefit analysis for ANGATA operational decisions + +# These functions can be added here as ANGATA requirements evolve. diff --git a/r_app/80_weekly_stats_utils.R b/r_app/80_utils_common.R similarity index 64% rename from r_app/80_weekly_stats_utils.R rename to r_app/80_utils_common.R index 0f24a36..705ed23 100644 --- a/r_app/80_weekly_stats_utils.R +++ b/r_app/80_utils_common.R @@ -1,18 +1,52 @@ -# 80_WEEKLY_STATS_UTILS.R +# 80_UTILS_COMMON.R # ============================================================================ -# UTILITY FUNCTIONS FOR WEEKLY STATISTICS CALCULATION +# SHARED UTILITY FUNCTIONS FOR ALL CLIENT TYPES (SCRIPT 80) # -# This file contains reusable functions for: -# - Tile grid management -# - Tile loading and merging -# - Field-level statistics calculation from CI rasters -# - Weekly stats caching (RDS/CSV export/import) -# - KPI trend calculations -# - Historical data loading and auto-generation from mosaics +# Contains helper and infrastructure functions used by both AURA and ANGATA workflows: +# - Statistical categorization and calculations +# - Tile operations and data loading +# - Field statistics extraction +# - Week/year calculations for consistent date handling +# - Excel/CSV/RDS export utilities # -# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts +# Used by: 80_calculate_kpis.R, all client-specific utils files # ============================================================================ +# ============================================================================ +# CONSTANTS (from 80_calculate_kpis.R) +# ============================================================================ + +# Four-week trend thresholds +FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 +FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 +FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 +FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 +FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 +FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 +FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 + +# CV Trend thresholds (8-week slope interpretation) +CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 +CV_SLOPE_IMPROVEMENT_MIN <- -0.02 +CV_SLOPE_IMPROVEMENT_MAX <- -0.01 +CV_SLOPE_HOMOGENOUS_MIN <- -0.01 +CV_SLOPE_HOMOGENOUS_MAX <- 0.01 +CV_SLOPE_PATCHINESS_MIN <- 0.01 +CV_SLOPE_PATCHINESS_MAX <- 0.02 +CV_SLOPE_SEVERE_MIN <- 0.02 + +# Percentile calculations +CI_PERCENTILE_LOW <- 0.10 +CI_PERCENTILE_HIGH <- 0.90 + +# Phase definitions (used by get_phase_by_age) +PHASE_DEFINITIONS <- data.frame( + phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), + age_start = c(0, 4, 17, 39), + age_end = c(6, 16, 39, 200), + stringsAsFactors = FALSE +) + # ============================================================================ # WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts) # ============================================================================ @@ -52,6 +86,7 @@ calculate_target_week_and_year <- function(current_week, current_year, offset_we # TILE-AWARE HELPER FUNCTIONS # ============================================================================ +#' Get tile IDs that intersect with a field geometry get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { if (inherits(field_geom, "sf")) { field_bbox <- sf::st_bbox(field_geom) @@ -79,6 +114,7 @@ get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { return(as.numeric(intersecting_tiles)) } +#' Load and merge tiles for a specific field load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { if (length(tile_ids) == 0) { return(NULL) @@ -118,6 +154,7 @@ load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_di } } +#' Build tile grid from available tile files build_tile_grid <- function(mosaic_dir, week_num, year) { # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/) detected_grid_size <- NA @@ -184,6 +221,7 @@ build_tile_grid <- function(mosaic_dir, week_num, year) { # STATISTICAL CATEGORIZATION FUNCTIONS # ============================================================================ +#' Categorize four-week CI trend categorize_four_week_trend <- function(ci_values_list) { if (is.null(ci_values_list) || length(ci_values_list) < 2) { return(NA_character_) @@ -214,6 +252,7 @@ categorize_four_week_trend <- function(ci_values_list) { } } +#' Round cloud coverage to interval categories round_cloud_to_intervals <- function(cloud_pct_clear) { if (is.na(cloud_pct_clear)) { return(NA_character_) @@ -232,6 +271,7 @@ round_cloud_to_intervals <- function(cloud_pct_clear) { return("95-100%") } +#' Get CI percentile range (10th to 90th) get_ci_percentiles <- function(ci_values) { if (is.null(ci_values) || length(ci_values) == 0) { return(NA_character_) @@ -248,6 +288,7 @@ get_ci_percentiles <- function(ci_values) { return(sprintf("%.1f-%.1f", p10, p90)) } +#' Calculate short-term CV trend (current week vs previous week) calculate_cv_trend <- function(cv_current, cv_previous) { if (is.na(cv_current) || is.na(cv_previous)) { return(NA_real_) @@ -255,10 +296,8 @@ calculate_cv_trend <- function(cv_current, cv_previous) { return(round(cv_current - cv_previous, 4)) } +#' Calculate four-week CI trend calculate_four_week_trend <- function(mean_ci_values) { - #' Calculate four-week CI trend from available weeks - #' Uses whatever weeks are available (1-4 weeks) to estimate trend - if (is.null(mean_ci_values) || length(mean_ci_values) == 0) { return(NA_real_) } @@ -273,9 +312,8 @@ calculate_four_week_trend <- function(mean_ci_values) { return(round(trend, 2)) } +#' Categorize CV slope (8-week regression) into field uniformity interpretation categorize_cv_slope <- function(slope) { - #' Categorize CV slope (8-week regression) into field uniformity interpretation - if (is.na(slope)) { return(NA_character_) } @@ -293,9 +331,8 @@ categorize_cv_slope <- function(slope) { } } +#' Calculate 8-week CV trend via linear regression slope calculate_cv_trend_long_term <- function(cv_values) { - #' Calculate 8-week CV trend via linear regression slope - if (is.null(cv_values) || length(cv_values) == 0) { return(NA_real_) } @@ -321,6 +358,7 @@ calculate_cv_trend_long_term <- function(cv_values) { # HELPER FUNCTIONS # ============================================================================ +#' Get crop phase by age in weeks get_phase_by_age <- function(age_weeks) { if (is.na(age_weeks)) return(NA_character_) for (i in seq_len(nrow(PHASE_DEFINITIONS))) { @@ -332,6 +370,7 @@ get_phase_by_age <- function(age_weeks) { return("Unknown") } +#' Get status trigger based on CI values and field age get_status_trigger <- function(ci_values, ci_change, age_weeks) { if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) @@ -374,10 +413,8 @@ get_status_trigger <- function(ci_values, ci_change, age_weeks) { return(NA_character_) } +#' Extract planting dates from harvesting data extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { - # Extract planting dates from harvest.xlsx (season_start column) - # Returns: data.frame with columns (field_id, planting_date) - if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { message("Warning: No harvesting data available - planting dates will be NA.") if (!is.null(field_boundaries_sf)) { @@ -408,676 +445,11 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) } # ============================================================================ -# MODULAR STATISTICS CALCULATION -# ============================================================================ - -calculate_field_statistics <- function(field_boundaries_sf, week_num, year, - mosaic_dir, report_date = Sys.Date()) { - - message(paste("Calculating statistics for all fields - Week", week_num, year)) - - # Support both tile-based and single-file mosaics - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) - single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year) - - # Try tile-based first - tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) - - # If no tiles, try single-file - if (length(tile_files) == 0) { - single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - if (length(single_file) > 0) { - message(paste(" Using single-file mosaic for week", week_num)) - tile_files <- single_file[1] # Use first match as single "tile" - } else { - stop(paste("No mosaic files found for week", week_num, year, "in", mosaic_dir)) - } - } - - message(paste(" Found", length(tile_files), "mosaic file(s) for week", week_num)) - - results_list <- list() - fields_processed <- 0 - - for (tile_idx in seq_along(tile_files)) { - tile_file <- tile_files[tile_idx] - - tryCatch({ - current_rast <- terra::rast(tile_file) - ci_band <- current_rast[["CI"]] - - if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { - message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) - return(NULL) - } - - extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) - unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) - - for (field_poly_idx in unique_field_ids) { - field_id <- field_boundaries_sf$field[field_poly_idx] - ci_vals <- extracted$CI[extracted$ID == field_poly_idx] - ci_vals <- ci_vals[!is.na(ci_vals)] - - if (length(ci_vals) == 0) { - next - } - - mean_ci <- mean(ci_vals, na.rm = TRUE) - ci_std <- sd(ci_vals, na.rm = TRUE) - cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ - range_min <- min(ci_vals, na.rm = TRUE) - range_max <- max(ci_vals, na.rm = TRUE) - range_str <- sprintf("%.1f-%.1f", range_min, range_max) - ci_percentiles_str <- get_ci_percentiles(ci_vals) - - # Count pixels with CI >= 2 (germination threshold) - GERMINATION_CI_THRESHOLD <- 2.0 - num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE) - num_pixels_total <- length(ci_vals) - pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 - - field_rows <- extracted[extracted$ID == field_poly_idx, ] - num_total <- nrow(field_rows) - num_data <- sum(!is.na(field_rows$CI)) - pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 - cloud_cat <- if (num_data == 0) "No image available" - else if (pct_clear >= 95) "Clear view" - else "Partial coverage" - - # Age_week and Phase are now calculated in main script using actual planting dates - # Germination_progress is calculated in main script after Age_week is known - - existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) - - if (length(existing_idx) > 0) { - next - } - - results_list[[length(results_list) + 1]] <- data.frame( - Field_id = field_id, - Mean_CI = round(mean_ci, 2), - CV = round(cv * 100, 2), - CI_range = range_str, - CI_Percentiles = ci_percentiles_str, - Pct_pixels_CI_gte_2 = pct_pixels_gte_2, - Cloud_pct_clear = pct_clear, - Cloud_category = cloud_cat, - stringsAsFactors = FALSE - ) - - fields_processed <- fields_processed + 1 - } - - message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) - - }, error = function(e) { - message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) - }) - } - - if (length(results_list) == 0) { - stop(paste("No fields processed successfully for week", week_num)) - } - - stats_df <- dplyr::bind_rows(results_list) - message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) - - return(stats_df) -} - -# ============================================================================ -# CALCULATE KPI TRENDS -# ============================================================================ - -calculate_kpi_trends <- function(current_stats, prev_stats = NULL, - project_dir = NULL, reports_dir = NULL, - current_week = NULL, year = NULL) { - - message("Calculating KPI trends from current and previous week data") - - current_stats$Weekly_ci_change <- NA_real_ - current_stats$CV_Trend_Short_Term <- NA_real_ - current_stats$Four_week_trend <- NA_real_ - current_stats$CV_Trend_Long_Term <- NA_real_ - current_stats$nmr_of_weeks_analysed <- 1L - - if (is.null(prev_stats) || nrow(prev_stats) == 0) { - message(" No previous week data available - using defaults") - return(current_stats) - } - - message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) - - prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) - - prev_field_analysis <- NULL - - tryCatch({ - analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") - if (dir.exists(analysis_dir)) { - analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) - if (length(analysis_files) > 0) { - recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] - prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, - col_select = c(Field_id, nmr_of_weeks_analysed, Phase)) - } - } - }, error = function(e) { - message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) - }) - - if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { - message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed")) - } - - historical_4weeks <- list() - historical_8weeks <- list() - - if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { - message(" Loading historical field_stats for 4-week and 8-week trends...") - - for (lookback in 1:4) { - target_week <- current_week - lookback - target_year <- year - if (target_week < 1) { - target_week <- target_week + 52 - target_year <- target_year - 1 - } - - rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - tryCatch({ - stats_data <- readRDS(rds_path) - historical_4weeks[[length(historical_4weeks) + 1]] <- list( - week = target_week, - stats = stats_data - ) - }, error = function(e) { - message(paste(" Warning: Could not load week", target_week, ":", e$message)) - }) - } - } - - for (lookback in 1:8) { - target_week <- current_week - lookback - target_year <- year - if (target_week < 1) { - target_week <- target_week + 52 - target_year <- target_year - 1 - } - - rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - tryCatch({ - stats_data <- readRDS(rds_path) - historical_8weeks[[length(historical_8weeks) + 1]] <- list( - week = target_week, - stats = stats_data - ) - }, error = function(e) { - # Silently skip - }) - } - } - - if (length(historical_4weeks) > 0) { - message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) - } - if (length(historical_8weeks) > 0) { - message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) - } - } - - cv_trends_calculated <- 0 - four_week_trends_calculated <- 0 - cv_long_term_calculated <- 0 - - for (i in seq_len(nrow(current_stats))) { - field_id <- current_stats$Field_id[i] - prev_idx <- prev_lookup[field_id] - - if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { - prev_row <- prev_stats[prev_idx, , drop = FALSE] - - prev_ci <- prev_row$Mean_CI[1] - if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { - current_stats$Weekly_ci_change[i] <- - round(current_stats$Mean_CI[i] - prev_ci, 2) - } - - prev_cv <- prev_row$CV[1] - if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { - current_stats$CV_Trend_Short_Term[i] <- - calculate_cv_trend(current_stats$CV[i], prev_cv) - cv_trends_calculated <- cv_trends_calculated + 1 - } - - if (length(historical_4weeks) > 0) { - ci_values_4week <- numeric() - - for (hist_idx in rev(seq_along(historical_4weeks))) { - hist_data <- historical_4weeks[[hist_idx]]$stats - hist_field <- which(hist_data$Field_id == field_id) - if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { - ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) - } - } - - ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) - - if (length(ci_values_4week) >= 2) { - current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) - four_week_trends_calculated <- four_week_trends_calculated + 1 - } - } - - if (length(historical_8weeks) > 0) { - cv_values_8week <- numeric() - - for (hist_idx in rev(seq_along(historical_8weeks))) { - hist_data <- historical_8weeks[[hist_idx]]$stats - hist_field <- which(hist_data$Field_id == field_id) - if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { - cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) - } - } - - cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) - - if (length(cv_values_8week) >= 2) { - slope <- calculate_cv_trend_long_term(cv_values_8week) - current_stats$CV_Trend_Long_Term[i] <- slope - cv_long_term_calculated <- cv_long_term_calculated + 1 - } - } - - if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { - prev_analysis_row <- prev_field_analysis %>% - dplyr::filter(Field_id == field_id) - - if (nrow(prev_analysis_row) > 0) { - prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1] - - # Only increment nmr_of_weeks_analysed if we have previous data - if (!is.na(prev_nmr_weeks_analysis)) { - current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L - } else { - current_stats$nmr_of_weeks_analysed[i] <- 1L - } - } - } - } - } - - message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) - message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) - message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) - return(current_stats) -} - -# ============================================================================ -# LOAD PER-FIELD DAILY RDS DATA (NEW ARCHITECTURE) -# ============================================================================ - -#' Load per-field daily CI data from daily_vals/ directory -#' -#' Reads per-field daily RDS files (output from Script 20): -#' daily_vals/{FIELD}/{YYYY-MM-DD}.rds -#' -#' Filters to dates matching the week specified, and returns combined data for all fields. -#' -#' @param week_num ISO week number (1-53) -#' @param year ISO week year -#' @param daily_vals_dir Directory containing daily_vals/{FIELD}/ structure -#' @param field_boundaries_sf Field boundaries (for validation) -#' @return Data frame with columns: field, sub_field, Date, ci_mean, ci_sd, ... (per-field daily data) -#' -load_per_field_daily_rds <- function(week_num, year, daily_vals_dir, field_boundaries_sf = NULL) { - - if (!dir.exists(daily_vals_dir)) { - warning(paste("daily_vals directory not found:", daily_vals_dir)) - return(NULL) - } - - # Calculate week date range - # Create a date in the specified ISO week - jan_4 <- as.Date(paste0(year, "-01-04")) - week_start <- jan_4 - (as.numeric(format(jan_4, "%w")) - 2) * 86400 + (week_num - 1) * 7 * 86400 - week_end <- week_start + 6 - - # List all field directories - field_dirs <- list.dirs(daily_vals_dir, full.names = FALSE, recursive = FALSE) - - if (length(field_dirs) == 0) { - warning(paste("No field subdirectories found in:", daily_vals_dir)) - return(NULL) - } - - combined_data <- data.frame() - - # Loop through each field and load matching RDS files for this week - for (field in field_dirs) { - field_path <- file.path(daily_vals_dir, field) - - # Find all RDS files in this field directory - rds_files <- list.files(field_path, pattern = "\\.rds$", full.names = TRUE) - - if (length(rds_files) == 0) { - next - } - - # Filter to files within the week date range - for (rds_file in rds_files) { - # Extract date from filename: {FIELD}/{YYYY-MM-DD}.rds - date_str <- tools::file_path_sans_ext(basename(rds_file)) - file_date <- tryCatch(as.Date(date_str), error = function(e) NA) - - if (is.na(file_date) || file_date < week_start || file_date > week_end) { - next - } - - # Load RDS file - tryCatch({ - rds_data <- readRDS(rds_file) - rds_data$Date <- file_date - combined_data <- rbind(combined_data, rds_data) - }, error = function(e) { - warning(paste("Could not load RDS file:", rds_file, "-", e$message)) - }) - } - } - - if (nrow(combined_data) == 0) { - warning(paste("No RDS data found for week", week_num, "in", daily_vals_dir)) - return(NULL) - } - - return(combined_data) -} - -# ============================================================================ -# LOAD OR CALCULATE WEEKLY STATISTICS -# ============================================================================ - -load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, - mosaic_dir, reports_dir, report_date = Sys.Date()) { - - rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - message(paste("Loading cached statistics from:", basename(rds_path))) - return(readRDS(rds_path)) - } - - message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) - stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, - mosaic_dir, report_date) - - output_dir <- file.path(reports_dir, "kpis", "field_stats") - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - } - - saveRDS(stats_df, rds_path) - message(paste("Saved weekly statistics RDS:", basename(rds_path))) - - csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year) - csv_path <- file.path(output_dir, csv_filename) - readr::write_csv(stats_df, csv_path) - message(paste("Saved weekly statistics CSV:", basename(csv_path))) - - return(stats_df) -} - -load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) { - - # NEW ARCHITECTURE: Try per-field daily RDS first - # If not available, fall back to consolidated RDS - - # Determine daily_vals_dir if not provided - if (is.null(daily_vals_dir)) { - daily_vals_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals") - } - - message(paste("Loading historical data from:", ifelse(dir.exists(daily_vals_dir), "per-field daily RDS", "consolidated RDS"))) - - historical_data <- list() - loaded_weeks <- c() - missing_weeks <- c() - - # Try per-field daily RDS first - use_per_field <- dir.exists(daily_vals_dir) - - if (use_per_field) { - message(paste(" Attempting to load from per-field RDS in:", daily_vals_dir)) - - for (lookback in 0:(num_weeks - 1)) { - target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) - target_week <- target$week - target_year <- target$year - - # Load from per-field daily RDS - per_field_data <- load_per_field_daily_rds(target_week, target_year, daily_vals_dir, field_boundaries_sf) - - if (!is.null(per_field_data) && nrow(per_field_data) > 0) { - # Aggregate to field-week level - week_stats <- per_field_data %>% - dplyr::group_by(field) %>% - dplyr::summarise( - Field_id = dplyr::first(field), - Mean_CI = mean(ci_mean, na.rm = TRUE), - CI_SD = mean(ci_sd, na.rm = TRUE), - CV = mean(ci_sd / ci_mean, na.rm = TRUE), - .groups = "drop" - ) - - historical_data[[lookback + 1]] <- list( - week = target_week, - year = target_year, - data = week_stats - ) - loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) - } else { - missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) - } - } - } - - if (length(historical_data) == 0) { - message(paste("Error: No historical data found")) - return(NULL) - } - - message(paste("✓ Loaded", length(historical_data), "weeks:", paste(loaded_weeks, collapse = ", "))) - - return(historical_data) -} - -#' [OLD CONSOLIDATED RDS FALLBACK - KEPT FOR REFERENCE] -#' This function is now replaced by per-field RDS loading above. -#' Keeping it as a comment for potential fallback logic. - -load_historical_field_data_consolidated <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { - historical_data <- list() - loaded_weeks <- c() - missing_weeks <- c() - - for (lookback in 0:(num_weeks - 1)) { - # Calculate target week and year using authoritative helper (handles year boundaries) - target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) - target_week <- target$week - target_year <- target$year - - # Construct filename with BOTH week and year (proper ISO format) - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv") - csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) - - if (file.exists(csv_path)) { - tryCatch({ - data <- read_csv(csv_path, show_col_types = FALSE) - historical_data[[lookback + 1]] <- list( - week = target_week, - year = target_year, - data = data - ) - loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) - }, error = function(e) { - message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message)) - missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) - }) - } else { - missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) - } - } - - if (length(missing_weeks) > 0 && auto_generate) { - message(paste("⚠ Missing weeks:", paste(missing_weeks, collapse = ", "))) - message("Scanning for ALL available weekly mosaics and calculating stats...\n") - - if (is.null(field_boundaries_sf)) { - message(" Error: field_boundaries_sf not provided - cannot auto-generate") - return(historical_data) - } - - if (!exists("weekly_tile_max")) { - message(" ✗ weekly_tile_max path not defined") - return(historical_data) - } - - check_paths <- c(file.path(weekly_tile_max, "5x5"), weekly_tile_max) - mosaic_scan_dir <- NA - - for (check_path in check_paths) { - if (dir.exists(check_path)) { - tif_files <- list.files(check_path, pattern = "week_.*\\.tif$", full.names = TRUE) - if (length(tif_files) > 0) { - mosaic_scan_dir <- check_path - break - } - } - } - - if (is.na(mosaic_scan_dir)) { - message(" ✗ No mosaic files found in weekly_tile_max") - return(historical_data) - } - - weeks_to_load <- 8 - today <- Sys.Date() - target_dates <- today - (0:(weeks_to_load - 1)) * 7 - - expected_weeks <- data.frame( - date = target_dates, - week = as.numeric(format(target_dates, "%V")), - year = as.numeric(format(target_dates, "%G")), - stringsAsFactors = FALSE - ) - expected_weeks <- unique(expected_weeks) - - message(paste(" Expected weeks (last 8 from", format(today, "%Y-%m-%d"), "):")) - for (i in seq_len(nrow(expected_weeks))) { - message(paste(" Week", sprintf("%02d", expected_weeks$week[i]), expected_weeks$year[i])) - } - message("") - - tif_files <- list.files(mosaic_scan_dir, pattern = "week_([0-9]{2})_([0-9]{4})_[0-9]{2}\\.tif$", - full.names = FALSE) - - available_weeks <- data.frame() - for (filename in tif_files) { - matches <- regmatches(filename, gregexpr("week_([0-9]{2})_([0-9]{4})", filename))[[1]] - if (length(matches) > 0) { - week_year <- strsplit(matches[1], "_")[[1]] - if (length(week_year) == 3) { - week_num <- as.numeric(week_year[2]) - year_num <- as.numeric(week_year[3]) - - if (week_num %in% expected_weeks$week && year_num %in% expected_weeks$year) { - available_weeks <- rbind(available_weeks, - data.frame(week = week_num, year = year_num)) - } - } - } - } - - available_weeks <- unique(available_weeks) - available_weeks <- merge(available_weeks, expected_weeks[, c("week", "year", "date")], by = c("week", "year")) - available_weeks <- available_weeks[order(available_weeks$date, decreasing = TRUE), ] - - if (nrow(available_weeks) == 0) { - message(" ✗ No matching mosaic files found") - message(paste(" Scanned directory:", mosaic_scan_dir)) - return(historical_data) - } - - message(paste(" Found", nrow(available_weeks), "week(s) with available mosaics:")) - - for (i in seq_len(nrow(available_weeks))) { - week_to_calc <- available_weeks$week[i] - year_to_calc <- available_weeks$year[i] - date_to_calc <- available_weeks$date[i] - - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_to_calc, year_to_calc) - tile_files <- list.files(mosaic_scan_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) == 0) { - message(paste(" ✗ Week", sprintf("%02d", week_to_calc), year_to_calc, "- no tiles found")) - next - } - - message(paste(" ✓ Week", sprintf("%02d", week_to_calc), year_to_calc, "-", length(tile_files), "mosaics")) - - tryCatch({ - week_stats <- load_or_calculate_weekly_stats( - week_num = week_to_calc, - year = year_to_calc, - project_dir = project_dir, - field_boundaries_sf = field_boundaries_sf, - mosaic_dir = mosaic_scan_dir, - reports_dir = reports_dir, - report_date = date_to_calc - ) - - if (!is.null(week_stats) && nrow(week_stats) > 0) { - message(paste(" ✓ Calculated stats for", nrow(week_stats), "fields")) - - historical_data[[length(historical_data) + 1]] <- list( - week = week_to_calc, - year = year_to_calc, - data = week_stats - ) - loaded_weeks <- c(loaded_weeks, paste0(week_to_calc, "_", year_to_calc)) - } - }, error = function(e) { - message(paste(" ✗ Error:", e$message)) - }) - } - } - - if (length(historical_data) == 0) { - message(paste("Error: No historical field data found and could not auto-generate weeks")) - return(NULL) - } - - message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", - paste(loaded_weeks, collapse = ", "))) - - return(historical_data) -} - -# ============================================================================ -# HELPER: Extract field-level statistics from CI raster +# FIELD STATISTICS EXTRACTION # ============================================================================ +#' Extract CI statistics for all fields from a single CI raster band extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { - #' Extract CI statistics for all fields from a single CI raster band - extract_result <- terra::extract(ci_band, field_boundaries_sf) stats_list <- list() @@ -1127,13 +499,720 @@ extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { } # ============================================================================ -# COMMENTED OUT / UNUSED FUNCTIONS (kept for future use) +# EXPORT FUNCTIONS (USED BY ALL CLIENTS) # ============================================================================ -# analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, -# mosaic_dir, historical_data = NULL, planting_dates = NULL, -# report_date = Sys.Date(), harvest_imminence_data = NULL, -# harvesting_data = NULL) { -# # [Function kept as reference for parallel field analysis] -# # Currently replaced by calculate_field_statistics() for efficiency -# } +#' Generate summary statistics from field analysis data +generate_field_analysis_summary <- function(field_df) { + message("Generating summary statistics...") + + total_acreage <- sum(field_df$Acreage, na.rm = TRUE) + + germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE) + tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE) + grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE) + maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE) + unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE) + + harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) + stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) + recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) + growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) + germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) + germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) + no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) + + clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) + partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) + no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) + total_fields <- nrow(field_df) + + clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) + partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) + no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) + + summary_df <- data.frame( + Category = c( + "--- PHASE DISTRIBUTION ---", + "Germination", + "Tillering", + "Grand Growth", + "Maturation", + "Unknown phase", + "--- STATUS TRIGGERS ---", + "Harvest ready", + "Stress detected", + "Strong recovery", + "Growth on track", + "Germination complete", + "Germination started", + "No trigger", + "--- CLOUD COVERAGE (FIELDS) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- CLOUD COVERAGE (ACREAGE) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- TOTAL ---", + "Total Acreage" + ), + Acreage = c( + NA, + round(germination_acreage, 2), + round(tillering_acreage, 2), + round(grand_growth_acreage, 2), + round(maturation_acreage, 2), + round(unknown_phase_acreage, 2), + NA, + round(harvest_ready_acreage, 2), + round(stress_acreage, 2), + round(recovery_acreage, 2), + round(growth_on_track_acreage, 2), + round(germination_complete_acreage, 2), + round(germination_started_acreage, 2), + round(no_trigger_acreage, 2), + NA, + paste0(clear_fields, " fields"), + paste0(partial_fields, " fields"), + paste0(no_image_fields, " fields"), + NA, + round(clear_acreage, 2), + round(partial_acreage, 2), + round(no_image_acreage, 2), + NA, + round(total_acreage, 2) + ), + stringsAsFactors = FALSE + ) + + return(summary_df) +} + +#' Export field analysis to Excel, CSV, and RDS formats +export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) { + message("Exporting per-field analysis to Excel, CSV, and RDS...") + + field_df_rounded <- field_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + + # Handle NULL summary_df + summary_df_rounded <- if (!is.null(summary_df)) { + summary_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + } else { + NULL + } + + output_subdir <- file.path(reports_dir, "kpis", "field_analysis") + if (!dir.exists(output_subdir)) { + dir.create(output_subdir, recursive = TRUE) + } + + excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") + excel_path <- file.path(output_subdir, excel_filename) + excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) + + # Build sheets list dynamically + sheets <- list( + "Field Data" = field_df_rounded + ) + if (!is.null(summary_df_rounded)) { + sheets[["Summary"]] <- summary_df_rounded + } + + write_xlsx(sheets, excel_path) + message(paste("✓ Field analysis Excel exported to:", excel_path)) + + kpi_data <- list( + field_analysis = field_df_rounded, + field_analysis_summary = summary_df_rounded, + metadata = list( + current_week = current_week, + year = year, + project = project_dir, + created_at = Sys.time() + ) + ) + + rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") + rds_path <- file.path(reports_dir, "kpis", rds_filename) + + saveRDS(kpi_data, rds_path) + message(paste("✓ Field analysis RDS exported to:", rds_path)) + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") + csv_path <- file.path(output_subdir, csv_filename) + write_csv(field_df_rounded, csv_path) + message(paste("✓ Field analysis CSV exported to:", csv_path)) + + return(list(excel = excel_path, rds = rds_path, csv = csv_path)) +} + +# ============================================================================ +# ADDITIONAL CRITICAL FUNCTIONS FROM 80_weekly_stats_utils.R (REQUIRED BY 80_calculate_kpis.R) +# ============================================================================ + +#' Calculate statistics for all fields from weekly mosaics +calculate_field_statistics <- function(field_boundaries_sf, week_num, year, + mosaic_dir, report_date = Sys.Date()) { + + message(paste("Calculating statistics for all fields - Week", week_num, year)) + + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year) + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(single_file) > 0) { + message(paste(" Using single-file mosaic for week", week_num)) + tile_files <- single_file[1] + } else { + stop(paste("No mosaic files found for week", week_num, year, "in", mosaic_dir)) + } + } + + message(paste(" Found", length(tile_files), "mosaic file(s) for week", week_num)) + results_list <- list() + + for (tile_idx in seq_along(tile_files)) { + tile_file <- tile_files[tile_idx] + tryCatch({ + current_rast <- terra::rast(tile_file) + ci_band <- current_rast[["CI"]] + + if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { + message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) + return(NULL) + } + + extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) + unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) + + for (field_poly_idx in unique_field_ids) { + field_id <- field_boundaries_sf$field[field_poly_idx] + ci_vals <- extracted$CI[extracted$ID == field_poly_idx] + ci_vals <- ci_vals[!is.na(ci_vals)] + + if (length(ci_vals) == 0) next + + mean_ci <- mean(ci_vals, na.rm = TRUE) + ci_std <- sd(ci_vals, na.rm = TRUE) + cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ + range_min <- min(ci_vals, na.rm = TRUE) + range_max <- max(ci_vals, na.rm = TRUE) + range_str <- sprintf("%.1f-%.1f", range_min, range_max) + ci_percentiles_str <- get_ci_percentiles(ci_vals) + + GERMINATION_CI_THRESHOLD <- 2.0 + num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE) + num_pixels_total <- length(ci_vals) + pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 + + field_rows <- extracted[extracted$ID == field_poly_idx, ] + num_total <- nrow(field_rows) + num_data <- sum(!is.na(field_rows$CI)) + pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 + cloud_cat <- if (num_data == 0) "No image available" + else if (pct_clear >= 95) "Clear view" + else "Partial coverage" + + existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) + if (length(existing_idx) > 0) next + + results_list[[length(results_list) + 1]] <- data.frame( + Field_id = field_id, + Mean_CI = round(mean_ci, 2), + CV = round(cv * 100, 2), + CI_range = range_str, + CI_Percentiles = ci_percentiles_str, + Pct_pixels_CI_gte_2 = pct_pixels_gte_2, + Cloud_pct_clear = pct_clear, + Cloud_category = cloud_cat, + stringsAsFactors = FALSE + ) + } + + message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) + + }, error = function(e) { + message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) + }) + } + + if (length(results_list) == 0) { + stop(paste("No fields processed successfully for week", week_num)) + } + + stats_df <- dplyr::bind_rows(results_list) + message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) + + return(stats_df) +} + +#' Load or calculate weekly statistics (with RDS caching) +load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, + mosaic_dir, reports_dir, report_date = Sys.Date()) { + + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) + rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) + + if (file.exists(rds_path)) { + message(paste("Loading cached statistics from:", basename(rds_path))) + return(readRDS(rds_path)) + } + + message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) + stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date) + + output_dir <- file.path(reports_dir, "kpis", "field_stats") + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + } + + saveRDS(stats_df, rds_path) + message(paste("Saved weekly statistics RDS:", basename(rds_path))) + + csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year) + csv_path <- file.path(output_dir, csv_filename) + readr::write_csv(stats_df, csv_path) + message(paste("Saved weekly statistics CSV:", basename(csv_path))) + + return(stats_df) +} + +#' Load historical field data from CSV (4-week lookback) +load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) { + + historical_data <- list() + loaded_weeks <- c() + missing_weeks <- c() + + for (lookback in 0:(num_weeks - 1)) { + target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) + target_week <- target$week + target_year <- target$year + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv") + csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) + + if (file.exists(csv_path)) { + tryCatch({ + data <- readr::read_csv(csv_path, show_col_types = FALSE) + historical_data[[lookback + 1]] <- list( + week = target_week, + year = target_year, + data = data + ) + loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message)) + missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + }) + } else { + missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) + } + } + + if (length(historical_data) == 0) { + message(paste("Error: No historical field data found")) + return(NULL) + } + + message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", + paste(loaded_weeks, collapse = ", "))) + + return(historical_data) +} + +#' Calculate KPI trends (CI change, CV trends, 4-week and 8-week trends) +calculate_kpi_trends <- function(current_stats, prev_stats = NULL, + project_dir = NULL, reports_dir = NULL, + current_week = NULL, year = NULL) { + + message("Calculating KPI trends from current and previous week data") + + current_stats$Weekly_ci_change <- NA_real_ + current_stats$CV_Trend_Short_Term <- NA_real_ + current_stats$Four_week_trend <- NA_real_ + current_stats$CV_Trend_Long_Term <- NA_real_ + current_stats$nmr_of_weeks_analysed <- 1L + + if (is.null(prev_stats) || nrow(prev_stats) == 0) { + message(" No previous week data available - using defaults") + return(current_stats) + } + + message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) + + prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) + prev_field_analysis <- NULL + + tryCatch({ + analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") + if (dir.exists(analysis_dir)) { + analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) + if (length(analysis_files) > 0) { + recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] + prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, + col_select = c(Field_id, nmr_of_weeks_analysed, Phase)) + } + } + }, error = function(e) { + message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) + }) + + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed")) + } + + historical_4weeks <- list() + historical_8weeks <- list() + + if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { + message(" Loading historical field_stats for 4-week and 8-week trends...") + + for (lookback in 1:4) { + target_week <- current_week - lookback + target_year <- year + if (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } + + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) + rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) + + if (file.exists(rds_path)) { + tryCatch({ + stats_data <- readRDS(rds_path) + historical_4weeks[[length(historical_4weeks) + 1]] <- list(week = target_week, stats = stats_data) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, ":", e$message)) + }) + } + } + + for (lookback in 1:8) { + target_week <- current_week - lookback + target_year <- year + if (target_week < 1) { + target_week <- target_week + 52 + target_year <- target_year - 1 + } + + rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) + rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) + + if (file.exists(rds_path)) { + tryCatch({ + stats_data <- readRDS(rds_path) + historical_8weeks[[length(historical_8weeks) + 1]] <- list(week = target_week, stats = stats_data) + }, error = function(e) { + # Silently skip + }) + } + } + + if (length(historical_4weeks) > 0) { + message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) + } + if (length(historical_8weeks) > 0) { + message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) + } + } + + cv_trends_calculated <- 0 + four_week_trends_calculated <- 0 + cv_long_term_calculated <- 0 + + for (i in seq_len(nrow(current_stats))) { + field_id <- current_stats$Field_id[i] + prev_idx <- prev_lookup[field_id] + + if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { + prev_row <- prev_stats[prev_idx, , drop = FALSE] + + prev_ci <- prev_row$Mean_CI[1] + if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { + current_stats$Weekly_ci_change[i] <- round(current_stats$Mean_CI[i] - prev_ci, 2) + } + + prev_cv <- prev_row$CV[1] + if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { + current_stats$CV_Trend_Short_Term[i] <- calculate_cv_trend(current_stats$CV[i], prev_cv) + cv_trends_calculated <- cv_trends_calculated + 1 + } + + if (length(historical_4weeks) > 0) { + ci_values_4week <- numeric() + for (hist_idx in rev(seq_along(historical_4weeks))) { + hist_data <- historical_4weeks[[hist_idx]]$stats + hist_field <- which(hist_data$Field_id == field_id) + if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { + ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) + } + } + ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) + + if (length(ci_values_4week) >= 2) { + current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) + four_week_trends_calculated <- four_week_trends_calculated + 1 + } + } + + if (length(historical_8weeks) > 0) { + cv_values_8week <- numeric() + for (hist_idx in rev(seq_along(historical_8weeks))) { + hist_data <- historical_8weeks[[hist_idx]]$stats + hist_field <- which(hist_data$Field_id == field_id) + if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { + cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) + } + } + cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) + + if (length(cv_values_8week) >= 2) { + slope <- calculate_cv_trend_long_term(cv_values_8week) + current_stats$CV_Trend_Long_Term[i] <- slope + cv_long_term_calculated <- cv_long_term_calculated + 1 + } + } + + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + prev_analysis_row <- prev_field_analysis %>% dplyr::filter(Field_id == field_id) + + if (nrow(prev_analysis_row) > 0) { + prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1] + if (!is.na(prev_nmr_weeks_analysis)) { + current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L + } else { + current_stats$nmr_of_weeks_analysed[i] <- 1L + } + } + } + } + } + + message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) + message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) + message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) + return(current_stats) +} + +# ============================================================================ +# INTERNAL HELPER FUNCTIONS (from 80_kpi_utils.R) - DO NOT DELETE +# ============================================================================ + +# Spatial autocorrelation thresholds for field pattern analysis +MORAN_THRESHOLD_HIGH <- 0.95 # Very strong clustering (problematic patterns) +MORAN_THRESHOLD_MODERATE <- 0.85 # Moderate clustering +MORAN_THRESHOLD_LOW <- 0.7 # Normal field continuity + +#' Calculate coefficient of variation for uniformity assessment +calculate_cv <- function(values) { + values <- values[!is.na(values) & is.finite(values)] + if (length(values) < 2) return(NA) + cv <- sd(values) / mean(values) + return(cv) +} + +#' Calculate percentage of field with positive vs negative change +calculate_change_percentages <- function(current_values, previous_values) { + if (length(current_values) != length(previous_values)) { + return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) + } + + change_values <- current_values - previous_values + valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)] + + if (length(valid_changes) < 2) { + return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) + } + + positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100 + negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100 + stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100 + + return(list( + positive_pct = positive_pct, + negative_pct = negative_pct, + stable_pct = stable_pct + )) +} + +#' Calculate spatial autocorrelation (Moran's I) for a field +calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) { + tryCatch({ + field_raster <- terra::crop(ci_raster, field_boundary) + field_raster <- terra::mask(field_raster, field_boundary) + raster_points <- terra::as.points(field_raster, na.rm = TRUE) + + if (length(raster_points) < 10) { + return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data")) + } + + points_sf <- sf::st_as_sf(raster_points) + coords <- sf::st_coordinates(points_sf) + k_neighbors <- min(8, max(4, floor(nrow(coords) / 10))) + + knn_nb <- spdep::knearneigh(coords, k = k_neighbors) + knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE) + + ci_values <- points_sf[[1]] + moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE) + + morans_i <- moran_result$estimate[1] + p_value <- moran_result$p.value + + interpretation <- if (is.na(morans_i)) { + "insufficient_data" + } else if (p_value > 0.05) { + "random" + } else if (morans_i > MORAN_THRESHOLD_HIGH) { + "very_strong_clustering" + } else if (morans_i > MORAN_THRESHOLD_MODERATE) { + "strong_clustering" + } else if (morans_i > MORAN_THRESHOLD_LOW) { + "normal_continuity" + } else if (morans_i > 0.3) { + "weak_clustering" + } else if (morans_i < -0.3) { + "dispersed" + } else { + "low_autocorrelation" + } + + return(list(morans_i = morans_i, p_value = p_value, interpretation = interpretation)) + }, error = function(e) { + warning(paste("Error calculating spatial autocorrelation:", e$message)) + return(list(morans_i = NA, p_value = NA, interpretation = "error")) + }) +} + +#' Extract CI band from multi-band raster +extract_ci_values <- function(ci_raster, field_vect) { + extracted <- terra::extract(ci_raster, field_vect, fun = NULL) + + if ("CI" %in% names(extracted)) { + return(extracted[, "CI"]) + } else if (ncol(extracted) > 1) { + return(extracted[, ncol(extracted)]) + } else { + return(extracted[, 1]) + } +} + +#' Calculate current and previous week numbers using ISO 8601 +calculate_week_numbers <- function(report_date = Sys.Date()) { + current_week <- as.numeric(format(report_date, "%V")) + current_year <- as.numeric(format(report_date, "%G")) + + previous_week <- current_week - 1 + previous_year <- current_year + + if (previous_week < 1) { + previous_week <- 52 + previous_year <- current_year - 1 + } + + return(list( + current_week = current_week, + previous_week = previous_week, + year = current_year, + previous_year = previous_year + )) +} + +#' Load field CI raster (handles single-file and per-field architectures) +load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) { + is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field") + + if (is_per_field) { + per_field_dir <- attr(ci_raster_or_obj, "per_field_dir") + week_file <- attr(ci_raster_or_obj, "week_file") + field_mosaic_path <- file.path(per_field_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_mosaic <- terra::rast(field_mosaic_path) + if (terra::nlyr(field_mosaic) >= 5) { + return(field_mosaic[[5]]) + } else { + return(field_mosaic[[1]]) + } + }, error = function(e) { + return(NULL) + }) + } else { + return(NULL) + } + } else { + if (!is.null(field_vect)) { + return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE)) + } else { + return(ci_raster_or_obj) + } + } +} + +#' Load weekly CI mosaic (single-file or per-field) +load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { + week_file <- sprintf("week_%02d_%d.tif", week_num, year) + week_path <- file.path(mosaic_dir, week_file) + + if (file.exists(week_path)) { + tryCatch({ + mosaic_raster <- terra::rast(week_path) + ci_raster <- mosaic_raster[[5]] + names(ci_raster) <- "CI" + return(ci_raster) + }, error = function(e) { + return(NULL) + }) + } + + if (dir.exists(mosaic_dir)) { + field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] + + found_any <- FALSE + for (field in field_dirs) { + field_mosaic_path <- file.path(mosaic_dir, field, week_file) + if (file.exists(field_mosaic_path)) { + found_any <- TRUE + break + } + } + + if (found_any) { + dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA) + attr(dummy_raster, "per_field_dir") <- mosaic_dir + attr(dummy_raster, "week_file") <- week_file + attr(dummy_raster, "is_per_field") <- TRUE + return(dummy_raster) + } + } + + return(NULL) +} + +#' Prepare predictions with consistent naming +prepare_predictions <- function(predictions, newdata) { + return(predictions %>% + as.data.frame() %>% + dplyr::rename(predicted_Tcha = ".") %>% + dplyr::mutate( + sub_field = newdata$sub_field, + field = newdata$field, + Age_days = newdata$DOY, + total_CI = round(newdata$cumulative_CI, 0), + predicted_Tcha = round(predicted_Tcha, 0), + season = newdata$season + ) %>% + dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>% + dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) + ) +} diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index a1c7794..9caa6ec 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -445,7 +445,7 @@ load_field_boundaries <- function(data_dir) { if (use_pivot_2) { field_boundaries_path <- here(data_dir, "pivot_2.geojson") } else { - field_boundaries_path <- here(data_dir, "Data", "pivot.geojson") + field_boundaries_path <- here(data_dir, "pivot.geojson") } if (!file.exists(field_boundaries_path)) {