script 30 refactored to per field

2026-01-29 21:36:47 +01:00 · 2026-01-29 21:36:47 +01:00 · 9b8c971902
parent 4d6bba828f
commit 9b8c971902
3 changed files with 103 additions and 106 deletions
--- a/r_app/20_ci_extraction_per_field.R
+++ b/r_app/20_ci_extraction_per_field.R
@ -190,48 +190,6 @@ main <- function() {
    safe_log(sprintf("  TIFFs: %s", setup$field_tiles_ci_dir))
    safe_log(sprintf("  RDS:   %s", setup$daily_vals_per_field_dir))
  }
-  
-  # 8. Aggregate per-field daily RDS files into combined_CI_data.rds
-  # ----------------------------------------------------------------
-  # This creates the wide-format (fields × dates) file that Script 30 and
-  # other downstream scripts expect for backward compatibility
-  safe_log("\n=== Aggregating Per-Field Daily RDS into combined_CI_data.rds ===")
-  
-  tryCatch({
-    # Find all daily RDS files (recursively from daily_vals/{FIELD}/{DATE}.rds)
-    all_daily_files <- list.files(
-      path = setup$daily_vals_per_field_dir,
-      pattern = "\\.rds$",
-      full.names = TRUE,
-      recursive = TRUE
-    )
-    
-    if (length(all_daily_files) == 0) {
-      safe_log("No daily RDS files found to aggregate", "WARNING")
-    } else {
-      safe_log(sprintf("Aggregating %d daily RDS files into combined_CI_data.rds", length(all_daily_files)))
-      
-      # Read and combine all daily RDS files
-      combined_data <- all_daily_files %>%
-        purrr::map(readRDS) %>%
-        purrr::list_rbind() %>%
-        dplyr::group_by(sub_field)
-      
-      # Create output directory if needed
-      dir.create(setup$cumulative_CI_vals_dir, showWarnings = FALSE, recursive = TRUE)
-      
-      # Save combined data
-      combined_ci_path <- file.path(setup$cumulative_CI_vals_dir, "combined_CI_data.rds")
-      saveRDS(combined_data, combined_ci_path)
-      
-      safe_log(sprintf("✓ Created combined_CI_data.rds with %d rows from %d files", 
-                         nrow(combined_data), length(all_daily_files)))
-      safe_log(sprintf("  Location: %s", combined_ci_path))
-    }
-  }, error = function(e) {
-    safe_log(sprintf("⚠ Error aggregating to combined_CI_data.rds: %s", e$message), "WARNING")
-    safe_log("  This is OK - Script 30 can still use per-field RDS files directly", "WARNING")
-  })
 }

 # Execute main if called from command line
--- a/r_app/30_growth_model_utils.R
+++ b/r_app/30_growth_model_utils.R
@ -29,32 +29,75 @@ safe_log <- function(message, level = "INFO") {
 #' @return Long-format dataframe with CI values by date
 #'
 load_combined_ci_data <- function(data_dir) {
-  file_path <- here::here(data_dir, "combined_CI_data.rds")
+  # Load all daily RDS files from daily_vals/ directory
+  daily_vals_dir <- file.path(data_dir, "..", "daily_vals")
  
-  if (!file.exists(file_path)) {
-    stop(paste("Combined CI data file not found:", file_path))
+  if (!dir.exists(daily_vals_dir)) {
+    stop(paste("Daily values directory not found:", daily_vals_dir))
  }
  
-  safe_log(paste("Loading CI data from:", file_path))
+  safe_log(paste("Loading CI data from daily files in:", daily_vals_dir))
  
-  # Load and transform the data to long format
-  pivot_stats <- readRDS(file_path) %>%
-    dplyr::ungroup() %>%
-    dplyr::group_by(field, sub_field) %>%
-    dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
+  # Find all daily RDS files recursively
+  all_daily_files <- list.files(
+    path = daily_vals_dir,
+    pattern = "\\.rds$",
+    full.names = TRUE,
+    recursive = TRUE
+  )
  
-  pivot_stats_long <- pivot_stats %>%
-    tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
+  if (length(all_daily_files) == 0) {
+    stop(paste("No daily RDS files found in:", daily_vals_dir))
+  }
+  
+  safe_log(sprintf("Found %d daily RDS files to load", length(all_daily_files)))
+  
+  # Read and combine all daily RDS files
+  # Each file contains: field, sub_field, ci_mean, ci_median, ci_sd, ci_min, ci_max, ci_count
+  combined_data <- all_daily_files %>%
+    purrr::map(readRDS) %>%
+    purrr::list_rbind()
+  
+  # Extract date from file path: .../daily_vals/{FIELD}/{YYYY-MM-DD}.rds
+  combined_data <- combined_data %>%
    dplyr::mutate(
-      Date = lubridate::ymd(Date),
-      value = as.numeric(value)
-    ) %>%
+      file_path = NA_character_,  # Will be filled by mapping
+      Date = NA_Date_
+    )
+  
+  # Add dates by mapping file paths to dates
+  for (i in seq_along(all_daily_files)) {
+    file_path <- all_daily_files[i]
+    date_str <- tools::file_path_sans_ext(basename(file_path))
+    
+    # Match rows in combined_data that came from this file
+    # This is a simplification - in practice we'd need to track which rows came from which file
+    # For now, we'll rebuild the data with explicit date tracking
+  }
+  
+  # Better approach: rebuild with explicit date tracking
+  combined_long <- data.frame()
+  
+  for (file in all_daily_files) {
+    date_str <- tools::file_path_sans_ext(basename(file))
+    rds_data <- readRDS(file)
+    rds_data <- rds_data %>%
+      dplyr::mutate(Date = lubridate::ymd(date_str))
+    combined_long <- rbind(combined_long, rds_data)
+  }
+  
+  # Reshape to long format using ci_mean as the main CI value
+  pivot_stats_long <- combined_long %>%
+    dplyr::select(field, sub_field, ci_mean, Date) %>%
+    dplyr::rename(value = ci_mean) %>%
+    dplyr::mutate(value = as.numeric(value)) %>%
    tidyr::drop_na(c("value", "Date")) %>%
-    dplyr::filter(!is.na(sub_field), !is.na(field)) %>%  # Filter out NA field names
+    dplyr::filter(!is.na(sub_field), !is.na(field)) %>%
    dplyr::filter(!is.infinite(value)) %>%
    dplyr::distinct()
  
-  safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
+  safe_log(sprintf("Loaded %d CI data points from %d daily files", 
+                   nrow(pivot_stats_long), length(all_daily_files)))
  
  return(pivot_stats_long)
 }
--- a/r_app/30_interpolate_growth_model.R
+++ b/r_app/30_interpolate_growth_model.R
@ -19,64 +19,60 @@ suppressPackageStartupMessages({
  library(here)
 })

-# 2. Main function to handle interpolation
-# -------------------------------------
+# =============================================================================
+# Load utility functions from 30_growth_model_utils.R
+# =============================================================================
+source("r_app/30_growth_model_utils.R")
+
+# =============================================================================
+# Main Processing
+# =============================================================================
+
 main <- function() {
-  # Process command line arguments
+  # IMPORTANT: Set working directory to project root (smartcane/)
+  # This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
+  if (basename(getwd()) == "r_app") {
+    setwd("..")
+  }
+  
+  # Parse command-line arguments
  args <- commandArgs(trailingOnly = TRUE)
  
-  # Get project directory from arguments or use default
-  if (length(args) >= 1 && !is.na(args[1])) {
-    project_dir <- as.character(args[1])
-  } else if (exists("project_dir", envir = .GlobalEnv)) {
-    project_dir <- get("project_dir", envir = .GlobalEnv)
-  } else {
-    project_dir <- "esa"
-    message("No project_dir provided. Using default:", project_dir)
-  }
+  project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
  
-  # Get data_source from arguments (for consistency with Script 20)
-  if (length(args) >= 2 && !is.na(args[2])) {
-    data_source <- as.character(args[2])
-  } else if (exists("data_source", envir = .GlobalEnv)) {
-    data_source <- get("data_source", envir = .GlobalEnv)
-  } else {
-    data_source <- "merged_tif"  # Default to 4-band (most common for existing projects)
-  }
-  
-    # Make project_dir and data_source available globally so parameters_project.R can use it
+  # IMPORTANT: Make project_dir available globally for parameters_project.R
  assign("project_dir", project_dir, envir = .GlobalEnv)
-  assign("data_source", data_source, envir = .GlobalEnv)
  
-  # Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
-  ci_extraction_script <- TRUE
-  assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
+  safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
+  safe_log(sprintf("Project: %s", project_dir))
  
-  # Initialize project configuration and load utility functions
+  # 1. Load parameters (includes field boundaries setup)
+  # ---------------------------------------------------
  tryCatch({
-    source("parameters_project.R")
-    source("30_growth_model_utils.R")
+    source("r_app/parameters_project.R")
+    safe_log("Loaded parameters_project.R")
  }, error = function(e) {
-    warning("Default source files not found. Attempting to source from 'r_app' directory.")
-    tryCatch({
-      source(here::here("r_app", "parameters_project.R"))
-      source(here::here("r_app", "30_growth_model_utils.R"))
-      warning(paste("Successfully sourced files from 'r_app' directory."))
-
-    }, error = function(e) {
-      stop("Failed to source required files from both default and 'r_app' directories.")
-    })
+    safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
+    stop(e)
  })
  
-  log_message("Starting CI growth model interpolation")
+  # 2. Set up directory paths from parameters
+  # -----------------------------------------------
+  setup <- setup_project_directories(project_dir)
  
-  # Load and process the data
+  safe_log(sprintf("Using cumulative CI directory: %s", setup$cumulative_CI_vals_dir))
+  
+  safe_log("Starting CI growth model interpolation")
+  
+  # 3. Load and process the data
+  # ----------------------------
  tryCatch({
-    # Load the combined CI data
-    CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
+    # Load the combined CI data (created by Script 20)
+    CI_data <- load_combined_ci_data(setup$cumulative_CI_vals_dir)
    
    # Validate harvesting data
    if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
+      safe_log("No harvesting data available", "ERROR")
      stop("No harvesting data available")
    }
    
@ -86,7 +82,7 @@ main <- function() {
      distinct(year) %>% 
      pull(year)
    
-    log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
+    safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
    
    # Generate interpolated CI data for each year and field
    CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
@ -100,20 +96,20 @@ main <- function() {
      # Add daily and cumulative metrics
      CI_all_with_metrics <- calculate_growth_metrics(CI_all)
      
-      # Save the processed data
+      # Save the processed data to cumulative_vals directory
      save_growth_model(
        CI_all_with_metrics, 
-        cumulative_CI_vals_dir, 
+        setup$cumulative_CI_vals_dir, 
        "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
      )
    } else {
-      log_message("No CI data was generated after interpolation", level = "WARNING")
+      safe_log("No CI data was generated after interpolation", "WARNING")
    }
    
-    log_message("Growth model interpolation completed successfully")
+    safe_log("Growth model interpolation completed successfully")
    
  }, error = function(e) {
-    log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
+    safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
    stop(e$message)
  })
 }