SmartCane/r_app/30_interpolate_growth_model.R

177 lines
6.6 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ============================================================================
# SCRIPT 30: Growth Model Interpolation (CI Time Series)
# ============================================================================
# PURPOSE:
# Interpolate Canopy Index (CI) values across time to create continuous
# growth curves. Fills gaps in measurement dates, applies smoothing via
# LOESS, and generates daily CI estimates and cumulative statistics for
# each field. Enables downstream yield prediction and trend analysis.
#
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
# - Format: RDS (wide format: fields × dates with CI values)
# - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/interpolated_ci/
# - Format: RDS files per field (daily CI estimates)
# - Also exports: Growth model curves as RDS (cumulative CI, daily values)
#
# USAGE:
# Rscript 30_interpolate_growth_model.R [project]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - core growth monitoring
# - agronomic_support (AURA): Yes - field health trend analysis
#
# DEPENDENCIES:
# - Packages: tidyverse, lubridate
# - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
# - Input data: combined_CI_data.rds from Script 20
# - Data directories: interpolated_ci/ (created if missing)
#
# NOTES:
# - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends)
# - Gap-filling: Assumes continuous growth between measurements; skips clouds
# - Cumulative CI: Sum of daily interpolated values from planting to current date
# - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting)
# - Critical for 8-week CV trend calculation and 4-week growth categorization
#
# RELATED ISSUES:
# SC-112: Utilities restructuring
# SC-108: Core pipeline improvements
#
# ============================================================================
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Data manipulation
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
})
# =============================================================================
# MAIN PROCESSING FUNCTION
# =============================================================================
main <- function() {
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
# Parse command-line arguments FIRST
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
# Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
# Load parameters_project.R (provides setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
}, error = function(e) {
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
# Load growth model utilities
tryCatch({
source("r_app/30_growth_model_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message))
stop(e)
})
# STEP 3: Now all utilities are loaded, proceed with script logic
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
safe_log(sprintf("Project: %s", project_dir))
# Set up directory paths from parameters
setup <- setup_project_directories(project_dir)
# For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output)
daily_vals_dir <- setup$daily_ci_vals_dir
safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir))
safe_log("Starting CI growth model interpolation")
# Load and process the data
tryCatch({
# Load the combined CI data (created by Script 20 per-field)
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
CI_data <- load_combined_ci_data(daily_vals_dir)
# Load harvesting data from harvest.xlsx for growth model phase assignment
# Use the centralized load_harvesting_data() function which handles NA season_end values
# by setting them to Sys.Date() (field is still in current growing season)
data_dir <- setup$data_dir
harvesting_data <- tryCatch({
load_harvesting_data(data_dir)
}, error = function(e) {
safe_log(paste("Error loading harvest data:", e$message), "WARNING")
NULL
})
# Validate harvesting data
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
safe_log("No harvesting data available", "ERROR")
stop("No harvesting data available")
}
# Get the years from harvesting data
years <- harvesting_data %>%
filter(!is.na(season_start)) %>%
distinct(year) %>%
pull(year)
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
# Generate interpolated CI data for each year and field
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
# CI_all <- CI_all %>%
# group_by(Date, field, season) %>%
# filter(!(field == "00F25" & season == 2023 & duplicated(DOY)))
# Calculate growth metrics and save the results
if (nrow(CI_all) > 0) {
# Add daily and cumulative metrics
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
# Save the processed data to cumulative_vals directory
save_growth_model(
CI_all_with_metrics,
setup$cumulative_ci_vals_dir,
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
)
} else {
safe_log("No CI data was generated after interpolation", "WARNING")
}
safe_log("Growth model interpolation completed successfully")
}, error = function(e) {
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
stop(e$message)
})
}
if (sys.nframe() == 0) {
main()
}