SmartCane/r_app/30_interpolate_growth_model.R

167 lines
6.3 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ============================================================================
# SCRIPT 30: Growth Model Interpolation (CI Time Series)
# ============================================================================
# PURPOSE:
# Interpolate Canopy Index (CI) values across time to create continuous
# growth curves. Fills gaps in measurement dates, applies smoothing via
# LOESS, and generates daily CI estimates and cumulative statistics for
# each field. Enables downstream yield prediction and trend analysis.
#
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/combined_CI/combined_CI_data.rds
# - Format: RDS (wide format: fields × dates with CI values)
# - Requirement: Field boundaries (pivot.geojson) and harvest data (harvest.xlsx)
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/interpolated_ci/
# - Format: RDS files per field (daily CI estimates)
# - Also exports: Growth model curves as RDS (cumulative CI, daily values)
#
# USAGE:
# Rscript 30_interpolate_growth_model.R [project]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - core growth monitoring
# - agronomic_support (AURA): Yes - field health trend analysis
#
# DEPENDENCIES:
# - Packages: tidyverse, lubridate
# - Utils files: parameters_project.R, 00_common_utils.R, 30_growth_model_utils.R
# - External data: Field boundaries (pivot.geojson), harvest data (harvest.xlsx)
# - Input data: combined_CI_data.rds from Script 20
# - Data directories: interpolated_ci/ (created if missing)
#
# NOTES:
# - Interpolation method: LOESS smoothing with span = 0.3 (sensitive to local trends)
# - Gap-filling: Assumes continuous growth between measurements; skips clouds
# - Cumulative CI: Sum of daily interpolated values from planting to current date
# - Used by: Script 80 (KPI trends) and Script 12 (yield forecasting)
# - Critical for 8-week CV trend calculation and 4-week growth categorization
#
# RELATED ISSUES:
# SC-112: Utilities restructuring
# SC-108: Core pipeline improvements
#
# ============================================================================
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
# File path handling
library(here) # For relative path resolution (platform-independent file paths)
# Data manipulation
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
})
# =============================================================================
# Load configuration and utility functions
# =============================================================================
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "00_common_utils.R"))
source(here::here("r_app", "30_growth_model_utils.R"))
# =============================================================================
# Main Processing
# =============================================================================
main <- function() {
# IMPORTANT: Set working directory to project root (smartcane/)
# This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
if (basename(getwd()) == "r_app") {
setwd("..")
}
# Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
# IMPORTANT: Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
safe_log(sprintf("Project: %s", project_dir))
# 1. Load parameters (includes field boundaries setup)
# ---------------------------------------------------
tryCatch({
source("r_app/parameters_project.R")
safe_log("Loaded parameters_project.R")
}, error = function(e) {
safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
stop(e)
})
# 2. Set up directory paths from parameters
# -----------------------------------------------
setup <- setup_project_directories(project_dir)
# For per-field architecture: read from daily_vals_per_field_dir (Script 20 per-field output)
daily_vals_dir <- setup$daily_vals_per_field_dir
safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir))
safe_log("Starting CI growth model interpolation")
# 3. Load and process the data
# ----------------------------
tryCatch({
# Load the combined CI data (created by Script 20 per-field)
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
CI_data <- load_combined_ci_data(daily_vals_dir)
# Validate harvesting data
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
safe_log("No harvesting data available", "ERROR")
stop("No harvesting data available")
}
# Get the years from harvesting data
years <- harvesting_data %>%
filter(!is.na(season_start)) %>%
distinct(year) %>%
pull(year)
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
# Generate interpolated CI data for each year and field
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
# CI_all <- CI_all %>%
# group_by(Date, field, season) %>%
# filter(!(field == "00F25" & season == 2023 & duplicated(DOY)))
# Calculate growth metrics and save the results
if (nrow(CI_all) > 0) {
# Add daily and cumulative metrics
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
# Save the processed data to cumulative_vals directory
save_growth_model(
CI_all_with_metrics,
setup$cumulative_CI_vals_dir,
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
)
} else {
safe_log("No CI data was generated after interpolation", "WARNING")
}
safe_log("Growth model interpolation completed successfully")
}, error = function(e) {
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
stop(e$message)
})
}
if (sys.nframe() == 0) {
main()
}