# ============================================================================
# SCRIPT 30: Growth Model Interpolation (CI Time Series)
# ============================================================================
# PURPOSE:
#   Interpolate Canopy Index (CI) values across time to create continuous
#   growth curves. Fills gaps in measurement dates, applies smoothing via
#   LOESS, and generates daily CI estimates and cumulative statistics for
#   each field. Enables downstream yield prediction and trend analysis.
#
# INPUT DATA:
#   - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
#   - Format: RDS (wide format: fields × dates with CI values)
#   - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
#
# OUTPUT DATA:
#   - Destination: laravel_app/storage/app/{project}/interpolated_ci/
#   - Format: RDS files per field (daily CI estimates)
#   - Also exports: Growth model curves as RDS (cumulative CI, daily values)
#
# USAGE:
#   Rscript 30_interpolate_growth_model.R [project]
#
#   Example (Windows PowerShell):
#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
#
# PARAMETERS:
#   - project: Project name (character) - angata, chemba, xinavane, esa, simba
#
# CLIENT TYPES:
#   - cane_supply (ANGATA): Yes - core growth monitoring
#   - agronomic_support (AURA): Yes - field health trend analysis
#
# DEPENDENCIES:
#   - Packages: tidyverse, lubridate
#   - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R
#   - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
#   - Input data: combined_CI_data.rds from Script 20
#   - Data directories: interpolated_ci/ (created if missing)
#
# NOTES:
#   - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends)
#   - Gap-filling: Assumes continuous growth between measurements; skips clouds
#   - Cumulative CI: Sum of daily interpolated values from planting to current date
#   - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting)
#   - Critical for 8-week CV trend calculation and 4-week growth categorization
#
# RELATED ISSUES:
#   SC-112: Utilities restructuring
#   SC-108: Core pipeline improvements
#
# ============================================================================

# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
  # File path handling
  library(here)       # For relative path resolution (platform-independent file paths)
  
  # Data manipulation
  library(tidyverse)  # For dplyr (data wrangling, grouping, mutating)
  library(lubridate)  # For date/time operations (date arithmetic, ISO week extraction)
  library(readxl)     # For reading harvest.xlsx (harvest dates for growth model phases)
  
  # Parallel processing (Windows PSOCK + Mac/Linux fork-safe)
  library(future)     # For setting up parallel execution plans
  library(furrr)      # For future_map_dfr (parallel file I/O and field processing)
  library(parallel)   # For detectCores (automatic CPU detection)
  library(data.table) # For fast filtering on large datasets
})

# =============================================================================
# MAIN PROCESSING FUNCTION
# =============================================================================

main <- function() {
  # STEP 1: Set working directory to project root (smartcane/)
  # This ensures all relative paths resolve correctly
  if (basename(getwd()) == "r_app") {
    setwd("..")
  }
  
  # STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
  # Parse command-line arguments FIRST
  args <- commandArgs(trailingOnly = TRUE)
  project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
  
  # Make project_dir available globally for parameters_project.R
  assign("project_dir", project_dir, envir = .GlobalEnv)
  
  # Load parameters_project.R (provides setup_project_directories, etc.)
  tryCatch({
    source("r_app/parameters_project.R")
  }, error = function(e) {
    cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
    stop(e)
  })
  
  # Load growth model utilities
  tryCatch({
    source("r_app/30_growth_model_utils.R")
  }, error = function(e) {
    cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message))
    stop(e)
  })
  
  # STEP 3: Now all utilities are loaded, proceed with script logic
  safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
  safe_log(sprintf("Project: %s", project_dir))
  
  # Set up directory paths from parameters
  setup <- setup_project_directories(project_dir)
  
  # For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output)
  daily_vals_dir <- setup$daily_ci_vals_dir
  safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir))
  
  safe_log("Starting CI growth model interpolation")
  
  # Set up data directory paths
  data_dir <- setup$data_dir
  
  # Load and process the data
  tryCatch({
    # Load the combined CI data (created by Script 20 per-field)
    # Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
    # OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files)
    harvesting_data <- tryCatch({
      load_harvesting_data(data_dir)
    }, error = function(e) {
      safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING")
      NULL
    })
    
    # Load CI data with date range pre-filtering
    CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data)
    
    # Validate harvesting data
    if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
      safe_log("No harvesting data available", "ERROR")
      stop("No harvesting data available")
    }
    
    # Get the years from harvesting data
    years <- harvesting_data %>% 
      filter(!is.na(season_start)) %>% 
      distinct(year) %>% 
      pull(year)
    
    safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
    
    # Generate interpolated CI data for each year and field
    CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
    
    # CI_all <- CI_all %>%
    #   group_by(Date, field, season) %>%
    #   filter(!(field == "00F25" & season == 2023 & duplicated(DOY)))
    
    # Calculate growth metrics and save the results
    if (nrow(CI_all) > 0) {
      # Add daily and cumulative metrics
      CI_all_with_metrics <- calculate_growth_metrics(CI_all)
      
      # Save the processed data to cumulative_vals directory
      save_growth_model(
        CI_all_with_metrics, 
        setup$cumulative_ci_vals_dir, 
        "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
      )
    } else {
      safe_log("No CI data was generated after interpolation", "WARNING")
    }
    
    safe_log("Growth model interpolation completed successfully")
    
  }, error = function(e) {
    safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
    stop(e$message)
  })
}

if (sys.nframe() == 0) {
  main()
}