script 30 refactored to per field

2026-01-29 21:36:47 +01:00 · 2026-01-29 21:36:47 +01:00 · 9b8c971902
parent 4d6bba828f
commit 9b8c971902
3 changed files with 103 additions and 106 deletions
--- a/r_app/20_ci_extraction_per_field.R
+++ b/r_app/20_ci_extraction_per_field.R
@ -190,48 +190,6 @@ main <- function() {
    safe_log(sprintf("  TIFFs: %s", setup$field_tiles_ci_dir))
    safe_log(sprintf("  RDS:   %s", setup$daily_vals_per_field_dir))
  }
  # 8. Aggregate per-field daily RDS files into combined_CI_data.rds
  # ----------------------------------------------------------------
  # This creates the wide-format (fields × dates) file that Script 30 and
  # other downstream scripts expect for backward compatibility
  safe_log("\n=== Aggregating Per-Field Daily RDS into combined_CI_data.rds ===")
  tryCatch({
    # Find all daily RDS files (recursively from daily_vals/{FIELD}/{DATE}.rds)
    all_daily_files <- list.files(
      path = setup$daily_vals_per_field_dir,
      pattern = "\\.rds$",
      full.names = TRUE,
      recursive = TRUE
    )
    if (length(all_daily_files) == 0) {
      safe_log("No daily RDS files found to aggregate", "WARNING")
    } else {
      safe_log(sprintf("Aggregating %d daily RDS files into combined_CI_data.rds", length(all_daily_files)))
      # Read and combine all daily RDS files
      combined_data <- all_daily_files %>%
        purrr::map(readRDS) %>%
        purrr::list_rbind() %>%
        dplyr::group_by(sub_field)
      # Create output directory if needed
      dir.create(setup$cumulative_CI_vals_dir, showWarnings = FALSE, recursive = TRUE)
      # Save combined data
      combined_ci_path <- file.path(setup$cumulative_CI_vals_dir, "combined_CI_data.rds")
      saveRDS(combined_data, combined_ci_path)
      safe_log(sprintf("✓ Created combined_CI_data.rds with %d rows from %d files", 
                         nrow(combined_data), length(all_daily_files)))
      safe_log(sprintf("  Location: %s", combined_ci_path))
    }
  }, error = function(e) {
    safe_log(sprintf("⚠ Error aggregating to combined_CI_data.rds: %s", e$message), "WARNING")
    safe_log("  This is OK - Script 30 can still use per-field RDS files directly", "WARNING")
  })
 }
 # Execute main if called from command line
--- a/r_app/30_growth_model_utils.R
+++ b/r_app/30_growth_model_utils.R
@ -29,32 +29,75 @@ safe_log <- function(message, level = "INFO") {
 #' @return Long-format dataframe with CI values by date
 #'
 load_combined_ci_data <- function(data_dir) {
-  file_path <- here::here(data_dir, "combined_CI_data.rds")
+  # Load all daily RDS files from daily_vals/ directory
  daily_vals_dir <- file.path(data_dir, "..", "daily_vals")
-  if (!file.exists(file_path)) {
+  if (!dir.exists(daily_vals_dir)) {
-    stop(paste("Combined CI data file not found:", file_path))
+    stop(paste("Daily values directory not found:", daily_vals_dir))
  }
-  safe_log(paste("Loading CI data from:", file_path))
+  safe_log(paste("Loading CI data from daily files in:", daily_vals_dir))
-  # Load and transform the data to long format
+  # Find all daily RDS files recursively
-  pivot_stats <- readRDS(file_path) %>%
+  all_daily_files <- list.files(
-    dplyr::ungroup() %>%
+    path = daily_vals_dir,
-    dplyr::group_by(field, sub_field) %>%
+    pattern = "\\.rds$",
-    dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
+    full.names = TRUE,
    recursive = TRUE
  )
-  pivot_stats_long <- pivot_stats %>%
+  if (length(all_daily_files) == 0) {
-    tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
+    stop(paste("No daily RDS files found in:", daily_vals_dir))
  }
  safe_log(sprintf("Found %d daily RDS files to load", length(all_daily_files)))
  # Read and combine all daily RDS files
  # Each file contains: field, sub_field, ci_mean, ci_median, ci_sd, ci_min, ci_max, ci_count
  combined_data <- all_daily_files %>%
    purrr::map(readRDS) %>%
    purrr::list_rbind()
  # Extract date from file path: .../daily_vals/{FIELD}/{YYYY-MM-DD}.rds
  combined_data <- combined_data %>%
    dplyr::mutate(
-      Date = lubridate::ymd(Date),
+      file_path = NA_character_,  # Will be filled by mapping
-      value = as.numeric(value)
+      Date = NA_Date_
-    ) %>%
+    )
  # Add dates by mapping file paths to dates
  for (i in seq_along(all_daily_files)) {
    file_path <- all_daily_files[i]
    date_str <- tools::file_path_sans_ext(basename(file_path))
    # Match rows in combined_data that came from this file
    # This is a simplification - in practice we'd need to track which rows came from which file
    # For now, we'll rebuild the data with explicit date tracking
  }
  # Better approach: rebuild with explicit date tracking
  combined_long <- data.frame()
  for (file in all_daily_files) {
    date_str <- tools::file_path_sans_ext(basename(file))
    rds_data <- readRDS(file)
    rds_data <- rds_data %>%
      dplyr::mutate(Date = lubridate::ymd(date_str))
    combined_long <- rbind(combined_long, rds_data)
  }
  # Reshape to long format using ci_mean as the main CI value
  pivot_stats_long <- combined_long %>%
    dplyr::select(field, sub_field, ci_mean, Date) %>%
    dplyr::rename(value = ci_mean) %>%
    dplyr::mutate(value = as.numeric(value)) %>%
    tidyr::drop_na(c("value", "Date")) %>%
-    dplyr::filter(!is.na(sub_field), !is.na(field)) %>%  # Filter out NA field names
+    dplyr::filter(!is.na(sub_field), !is.na(field)) %>%
    dplyr::filter(!is.infinite(value)) %>%
    dplyr::distinct()
-  safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
+  safe_log(sprintf("Loaded %d CI data points from %d daily files", 
                   nrow(pivot_stats_long), length(all_daily_files)))
  return(pivot_stats_long)
 }
--- a/r_app/30_interpolate_growth_model.R
+++ b/r_app/30_interpolate_growth_model.R
@ -19,64 +19,60 @@ suppressPackageStartupMessages({
  library(here)
 })
-# 2. Main function to handle interpolation
+# =============================================================================
-# -------------------------------------
+# Load utility functions from 30_growth_model_utils.R
 # =============================================================================
 source("r_app/30_growth_model_utils.R")
 # =============================================================================
 # Main Processing
 # =============================================================================
 main <- function() {
-  # Process command line arguments
+  # IMPORTANT: Set working directory to project root (smartcane/)
  # This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
  if (basename(getwd()) == "r_app") {
    setwd("..")
  }
  # Parse command-line arguments
  args <- commandArgs(trailingOnly = TRUE)
-  # Get project directory from arguments or use default
+  project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
  if (length(args) >= 1 && !is.na(args[1])) {
    project_dir <- as.character(args[1])
  } else if (exists("project_dir", envir = .GlobalEnv)) {
    project_dir <- get("project_dir", envir = .GlobalEnv)
  } else {
    project_dir <- "esa"
    message("No project_dir provided. Using default:", project_dir)
  }
-  # Get data_source from arguments (for consistency with Script 20)
+  # IMPORTANT: Make project_dir available globally for parameters_project.R
  if (length(args) >= 2 && !is.na(args[2])) {
    data_source <- as.character(args[2])
  } else if (exists("data_source", envir = .GlobalEnv)) {
    data_source <- get("data_source", envir = .GlobalEnv)
  } else {
    data_source <- "merged_tif"  # Default to 4-band (most common for existing projects)
  }
    # Make project_dir and data_source available globally so parameters_project.R can use it
  assign("project_dir", project_dir, envir = .GlobalEnv)
  assign("data_source", data_source, envir = .GlobalEnv)
-  # Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
+  safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
-  ci_extraction_script <- TRUE
+  safe_log(sprintf("Project: %s", project_dir))
  assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
-  # Initialize project configuration and load utility functions
+  # 1. Load parameters (includes field boundaries setup)
  # ---------------------------------------------------
  tryCatch({
-    source("parameters_project.R")
+    source("r_app/parameters_project.R")
-    source("30_growth_model_utils.R")
+    safe_log("Loaded parameters_project.R")
  }, error = function(e) {
-    warning("Default source files not found. Attempting to source from 'r_app' directory.")
+    safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
-    tryCatch({
+    stop(e)
      source(here::here("r_app", "parameters_project.R"))
      source(here::here("r_app", "30_growth_model_utils.R"))
      warning(paste("Successfully sourced files from 'r_app' directory."))
    }, error = function(e) {
      stop("Failed to source required files from both default and 'r_app' directories.")
    })
  })
-  log_message("Starting CI growth model interpolation")
+  # 2. Set up directory paths from parameters
  # -----------------------------------------------
  setup <- setup_project_directories(project_dir)
-  # Load and process the data
+  safe_log(sprintf("Using cumulative CI directory: %s", setup$cumulative_CI_vals_dir))
  safe_log("Starting CI growth model interpolation")
  # 3. Load and process the data
  # ----------------------------
  tryCatch({
-    # Load the combined CI data
+    # Load the combined CI data (created by Script 20)
-    CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
+    CI_data <- load_combined_ci_data(setup$cumulative_CI_vals_dir)
    # Validate harvesting data
    if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
      safe_log("No harvesting data available", "ERROR")
      stop("No harvesting data available")
    }
@ -86,7 +82,7 @@ main <- function() {
      distinct(year) %>% 
      pull(year)
-    log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
+    safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
    # Generate interpolated CI data for each year and field
    CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
@ -100,20 +96,20 @@ main <- function() {
      # Add daily and cumulative metrics
      CI_all_with_metrics <- calculate_growth_metrics(CI_all)
-      # Save the processed data
+      # Save the processed data to cumulative_vals directory
      save_growth_model(
        CI_all_with_metrics, 
-        cumulative_CI_vals_dir, 
+        setup$cumulative_CI_vals_dir, 
        "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
      )
    } else {
-      log_message("No CI data was generated after interpolation", level = "WARNING")
+      safe_log("No CI data was generated after interpolation", "WARNING")
    }
-    log_message("Growth model interpolation completed successfully")
+    safe_log("Growth model interpolation completed successfully")
  }, error = function(e) {
-    log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
+    safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
    stop(e$message)
  })
 }