# ============================================================================ # SCRIPT 30: Growth Model Interpolation (CI Time Series) # ============================================================================ # PURPOSE: # Interpolate Canopy Index (CI) values across time to create continuous # growth curves. Fills gaps in measurement dates, applies smoothing via # LOESS, and generates daily CI estimates and cumulative statistics for # each field. Enables downstream yield prediction and trend analysis. # # INPUT DATA: # - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds # - Format: RDS (wide format: fields × dates with CI values) # - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx) # # OUTPUT DATA: # - Destination: laravel_app/storage/app/{project}/interpolated_ci/ # - Format: RDS files per field (daily CI estimates) # - Also exports: Growth model curves as RDS (cumulative CI, daily values) # # USAGE: # Rscript 30_interpolate_growth_model.R [project] # # Example (Windows PowerShell): # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata # # PARAMETERS: # - project: Project name (character) - angata, chemba, xinavane, esa, simba # # CLIENT TYPES: # - cane_supply (ANGATA): Yes - core growth monitoring # - agronomic_support (AURA): Yes - field health trend analysis # # DEPENDENCIES: # - Packages: tidyverse, lubridate # - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R # - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx) # - Input data: combined_CI_data.rds from Script 20 # - Data directories: interpolated_ci/ (created if missing) # # NOTES: # - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends) # - Gap-filling: Assumes continuous growth between measurements; skips clouds # - Cumulative CI: Sum of daily interpolated values from planting to current date # - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting) # - Critical for 8-week CV trend calculation and 4-week growth categorization # # RELATED ISSUES: # SC-112: Utilities restructuring # SC-108: Core pipeline improvements # # ============================================================================ # 1. Load required packages # ----------------------- suppressPackageStartupMessages({ # File path handling library(here) # For relative path resolution (platform-independent file paths) # Data manipulation library(tidyverse) # For dplyr (data wrangling, grouping, mutating) library(lubridate) # For date/time operations (date arithmetic, ISO week extraction) library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases) # Parallel processing (Windows PSOCK + Mac/Linux fork-safe) library(future) # For setting up parallel execution plans library(furrr) # For future_map_dfr (parallel file I/O and field processing) library(parallel) # For detectCores (automatic CPU detection) library(data.table) # For fast filtering on large datasets }) # ============================================================================= # MAIN PROCESSING FUNCTION # ============================================================================= main <- function() { # STEP 1: Set working directory to project root (smartcane/) # This ensures all relative paths resolve correctly if (basename(getwd()) == "r_app") { setwd("..") } # STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations) # Parse command-line arguments FIRST args <- commandArgs(trailingOnly = TRUE) project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata" # Make project_dir available globally for parameters_project.R assign("project_dir", project_dir, envir = .GlobalEnv) # Load parameters_project.R (provides setup_project_directories, etc.) tryCatch({ source("r_app/parameters_project.R") }, error = function(e) { cat(sprintf("Error loading parameters_project.R: %s\n", e$message)) stop(e) }) # Load growth model utilities tryCatch({ source("r_app/30_growth_model_utils.R") }, error = function(e) { cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message)) stop(e) }) # STEP 3: Now all utilities are loaded, proceed with script logic safe_log(sprintf("=== Script 30: Growth Model Interpolation ===")) safe_log(sprintf("Project: %s", project_dir)) # Set up directory paths from parameters setup <- setup_project_directories(project_dir) # For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output) daily_vals_dir <- setup$daily_ci_vals_dir safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir)) safe_log("Starting CI growth model interpolation") # Set up data directory paths data_dir <- setup$data_dir # Load and process the data tryCatch({ # Load the combined CI data (created by Script 20 per-field) # Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds # OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files) harvesting_data <- tryCatch({ load_harvesting_data(data_dir) }, error = function(e) { safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING") NULL }) # Load CI data with date range pre-filtering CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data) # Validate harvesting data if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { safe_log("No harvesting data available", "ERROR") stop("No harvesting data available") } # Get the years from harvesting data years <- harvesting_data %>% filter(!is.na(season_start)) %>% distinct(year) %>% pull(year) safe_log(paste("Processing data for years:", paste(years, collapse = ", "))) # Generate interpolated CI data for each year and field CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data) # CI_all <- CI_all %>% # group_by(Date, field, season) %>% # filter(!(field == "00F25" & season == 2023 & duplicated(DOY))) # Calculate growth metrics and save the results if (nrow(CI_all) > 0) { # Add daily and cumulative metrics CI_all_with_metrics <- calculate_growth_metrics(CI_all) # Save the processed data to cumulative_vals directory save_growth_model( CI_all_with_metrics, setup$cumulative_ci_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds" ) } else { safe_log("No CI data was generated after interpolation", "WARNING") } safe_log("Growth model interpolation completed successfully") }, error = function(e) { safe_log(paste("Error in growth model interpolation:", e$message), "ERROR") stop(e$message) }) } if (sys.nframe() == 0) { main() }