diff --git a/r_app/20_ci_extraction_per_field.R b/r_app/20_ci_extraction_per_field.R index 1847a59..dc93ede 100644 --- a/r_app/20_ci_extraction_per_field.R +++ b/r_app/20_ci_extraction_per_field.R @@ -190,48 +190,6 @@ main <- function() { safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir)) safe_log(sprintf(" RDS: %s", setup$daily_vals_per_field_dir)) } - - # 8. Aggregate per-field daily RDS files into combined_CI_data.rds - # ---------------------------------------------------------------- - # This creates the wide-format (fields × dates) file that Script 30 and - # other downstream scripts expect for backward compatibility - safe_log("\n=== Aggregating Per-Field Daily RDS into combined_CI_data.rds ===") - - tryCatch({ - # Find all daily RDS files (recursively from daily_vals/{FIELD}/{DATE}.rds) - all_daily_files <- list.files( - path = setup$daily_vals_per_field_dir, - pattern = "\\.rds$", - full.names = TRUE, - recursive = TRUE - ) - - if (length(all_daily_files) == 0) { - safe_log("No daily RDS files found to aggregate", "WARNING") - } else { - safe_log(sprintf("Aggregating %d daily RDS files into combined_CI_data.rds", length(all_daily_files))) - - # Read and combine all daily RDS files - combined_data <- all_daily_files %>% - purrr::map(readRDS) %>% - purrr::list_rbind() %>% - dplyr::group_by(sub_field) - - # Create output directory if needed - dir.create(setup$cumulative_CI_vals_dir, showWarnings = FALSE, recursive = TRUE) - - # Save combined data - combined_ci_path <- file.path(setup$cumulative_CI_vals_dir, "combined_CI_data.rds") - saveRDS(combined_data, combined_ci_path) - - safe_log(sprintf("✓ Created combined_CI_data.rds with %d rows from %d files", - nrow(combined_data), length(all_daily_files))) - safe_log(sprintf(" Location: %s", combined_ci_path)) - } - }, error = function(e) { - safe_log(sprintf("⚠ Error aggregating to combined_CI_data.rds: %s", e$message), "WARNING") - safe_log(" This is OK - Script 30 can still use per-field RDS files directly", "WARNING") - }) } # Execute main if called from command line diff --git a/r_app/30_growth_model_utils.R b/r_app/30_growth_model_utils.R index 5281c02..b37bbed 100644 --- a/r_app/30_growth_model_utils.R +++ b/r_app/30_growth_model_utils.R @@ -29,32 +29,75 @@ safe_log <- function(message, level = "INFO") { #' @return Long-format dataframe with CI values by date #' load_combined_ci_data <- function(data_dir) { - file_path <- here::here(data_dir, "combined_CI_data.rds") + # Load all daily RDS files from daily_vals/ directory + daily_vals_dir <- file.path(data_dir, "..", "daily_vals") - if (!file.exists(file_path)) { - stop(paste("Combined CI data file not found:", file_path)) + if (!dir.exists(daily_vals_dir)) { + stop(paste("Daily values directory not found:", daily_vals_dir)) } - safe_log(paste("Loading CI data from:", file_path)) + safe_log(paste("Loading CI data from daily files in:", daily_vals_dir)) - # Load and transform the data to long format - pivot_stats <- readRDS(file_path) %>% - dplyr::ungroup() %>% - dplyr::group_by(field, sub_field) %>% - dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop") + # Find all daily RDS files recursively + all_daily_files <- list.files( + path = daily_vals_dir, + pattern = "\\.rds$", + full.names = TRUE, + recursive = TRUE + ) - pivot_stats_long <- pivot_stats %>% - tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>% + if (length(all_daily_files) == 0) { + stop(paste("No daily RDS files found in:", daily_vals_dir)) + } + + safe_log(sprintf("Found %d daily RDS files to load", length(all_daily_files))) + + # Read and combine all daily RDS files + # Each file contains: field, sub_field, ci_mean, ci_median, ci_sd, ci_min, ci_max, ci_count + combined_data <- all_daily_files %>% + purrr::map(readRDS) %>% + purrr::list_rbind() + + # Extract date from file path: .../daily_vals/{FIELD}/{YYYY-MM-DD}.rds + combined_data <- combined_data %>% dplyr::mutate( - Date = lubridate::ymd(Date), - value = as.numeric(value) - ) %>% + file_path = NA_character_, # Will be filled by mapping + Date = NA_Date_ + ) + + # Add dates by mapping file paths to dates + for (i in seq_along(all_daily_files)) { + file_path <- all_daily_files[i] + date_str <- tools::file_path_sans_ext(basename(file_path)) + + # Match rows in combined_data that came from this file + # This is a simplification - in practice we'd need to track which rows came from which file + # For now, we'll rebuild the data with explicit date tracking + } + + # Better approach: rebuild with explicit date tracking + combined_long <- data.frame() + + for (file in all_daily_files) { + date_str <- tools::file_path_sans_ext(basename(file)) + rds_data <- readRDS(file) + rds_data <- rds_data %>% + dplyr::mutate(Date = lubridate::ymd(date_str)) + combined_long <- rbind(combined_long, rds_data) + } + + # Reshape to long format using ci_mean as the main CI value + pivot_stats_long <- combined_long %>% + dplyr::select(field, sub_field, ci_mean, Date) %>% + dplyr::rename(value = ci_mean) %>% + dplyr::mutate(value = as.numeric(value)) %>% tidyr::drop_na(c("value", "Date")) %>% - dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names + dplyr::filter(!is.na(sub_field), !is.na(field)) %>% dplyr::filter(!is.infinite(value)) %>% dplyr::distinct() - safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points")) + safe_log(sprintf("Loaded %d CI data points from %d daily files", + nrow(pivot_stats_long), length(all_daily_files))) return(pivot_stats_long) } diff --git a/r_app/30_interpolate_growth_model.R b/r_app/30_interpolate_growth_model.R index 05b54b0..5ef5011 100644 --- a/r_app/30_interpolate_growth_model.R +++ b/r_app/30_interpolate_growth_model.R @@ -19,64 +19,60 @@ suppressPackageStartupMessages({ library(here) }) -# 2. Main function to handle interpolation -# ------------------------------------- +# ============================================================================= +# Load utility functions from 30_growth_model_utils.R +# ============================================================================= +source("r_app/30_growth_model_utils.R") + +# ============================================================================= +# Main Processing +# ============================================================================= + main <- function() { - # Process command line arguments + # IMPORTANT: Set working directory to project root (smartcane/) + # This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app + if (basename(getwd()) == "r_app") { + setwd("..") + } + + # Parse command-line arguments args <- commandArgs(trailingOnly = TRUE) - # Get project directory from arguments or use default - if (length(args) >= 1 && !is.na(args[1])) { - project_dir <- as.character(args[1]) - } else if (exists("project_dir", envir = .GlobalEnv)) { - project_dir <- get("project_dir", envir = .GlobalEnv) - } else { - project_dir <- "esa" - message("No project_dir provided. Using default:", project_dir) - } + project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata" - # Get data_source from arguments (for consistency with Script 20) - if (length(args) >= 2 && !is.na(args[2])) { - data_source <- as.character(args[2]) - } else if (exists("data_source", envir = .GlobalEnv)) { - data_source <- get("data_source", envir = .GlobalEnv) - } else { - data_source <- "merged_tif" # Default to 4-band (most common for existing projects) - } - - # Make project_dir and data_source available globally so parameters_project.R can use it + # IMPORTANT: Make project_dir available globally for parameters_project.R assign("project_dir", project_dir, envir = .GlobalEnv) - assign("data_source", data_source, envir = .GlobalEnv) - # Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction) - ci_extraction_script <- TRUE - assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv) + safe_log(sprintf("=== Script 30: Growth Model Interpolation ===")) + safe_log(sprintf("Project: %s", project_dir)) - # Initialize project configuration and load utility functions + # 1. Load parameters (includes field boundaries setup) + # --------------------------------------------------- tryCatch({ - source("parameters_project.R") - source("30_growth_model_utils.R") + source("r_app/parameters_project.R") + safe_log("Loaded parameters_project.R") }, error = function(e) { - warning("Default source files not found. Attempting to source from 'r_app' directory.") - tryCatch({ - source(here::here("r_app", "parameters_project.R")) - source(here::here("r_app", "30_growth_model_utils.R")) - warning(paste("Successfully sourced files from 'r_app' directory.")) - - }, error = function(e) { - stop("Failed to source required files from both default and 'r_app' directories.") - }) + safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR") + stop(e) }) - log_message("Starting CI growth model interpolation") + # 2. Set up directory paths from parameters + # ----------------------------------------------- + setup <- setup_project_directories(project_dir) - # Load and process the data + safe_log(sprintf("Using cumulative CI directory: %s", setup$cumulative_CI_vals_dir)) + + safe_log("Starting CI growth model interpolation") + + # 3. Load and process the data + # ---------------------------- tryCatch({ - # Load the combined CI data - CI_data <- load_combined_ci_data(cumulative_CI_vals_dir) + # Load the combined CI data (created by Script 20) + CI_data <- load_combined_ci_data(setup$cumulative_CI_vals_dir) # Validate harvesting data if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { + safe_log("No harvesting data available", "ERROR") stop("No harvesting data available") } @@ -86,7 +82,7 @@ main <- function() { distinct(year) %>% pull(year) - log_message(paste("Processing data for years:", paste(years, collapse = ", "))) + safe_log(paste("Processing data for years:", paste(years, collapse = ", "))) # Generate interpolated CI data for each year and field CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data) @@ -100,20 +96,20 @@ main <- function() { # Add daily and cumulative metrics CI_all_with_metrics <- calculate_growth_metrics(CI_all) - # Save the processed data + # Save the processed data to cumulative_vals directory save_growth_model( CI_all_with_metrics, - cumulative_CI_vals_dir, + setup$cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds" ) } else { - log_message("No CI data was generated after interpolation", level = "WARNING") + safe_log("No CI data was generated after interpolation", "WARNING") } - log_message("Growth model interpolation completed successfully") + safe_log("Growth model interpolation completed successfully") }, error = function(e) { - log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR") + safe_log(paste("Error in growth model interpolation:", e$message), "ERROR") stop(e$message) }) }