script 30 refactored to per field

This commit is contained in:
Timon 2026-01-29 21:36:47 +01:00
parent 4d6bba828f
commit 9b8c971902
3 changed files with 103 additions and 106 deletions

View file

@ -190,48 +190,6 @@ main <- function() {
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
safe_log(sprintf(" RDS: %s", setup$daily_vals_per_field_dir))
}
# 8. Aggregate per-field daily RDS files into combined_CI_data.rds
# ----------------------------------------------------------------
# This creates the wide-format (fields × dates) file that Script 30 and
# other downstream scripts expect for backward compatibility
safe_log("\n=== Aggregating Per-Field Daily RDS into combined_CI_data.rds ===")
tryCatch({
# Find all daily RDS files (recursively from daily_vals/{FIELD}/{DATE}.rds)
all_daily_files <- list.files(
path = setup$daily_vals_per_field_dir,
pattern = "\\.rds$",
full.names = TRUE,
recursive = TRUE
)
if (length(all_daily_files) == 0) {
safe_log("No daily RDS files found to aggregate", "WARNING")
} else {
safe_log(sprintf("Aggregating %d daily RDS files into combined_CI_data.rds", length(all_daily_files)))
# Read and combine all daily RDS files
combined_data <- all_daily_files %>%
purrr::map(readRDS) %>%
purrr::list_rbind() %>%
dplyr::group_by(sub_field)
# Create output directory if needed
dir.create(setup$cumulative_CI_vals_dir, showWarnings = FALSE, recursive = TRUE)
# Save combined data
combined_ci_path <- file.path(setup$cumulative_CI_vals_dir, "combined_CI_data.rds")
saveRDS(combined_data, combined_ci_path)
safe_log(sprintf("✓ Created combined_CI_data.rds with %d rows from %d files",
nrow(combined_data), length(all_daily_files)))
safe_log(sprintf(" Location: %s", combined_ci_path))
}
}, error = function(e) {
safe_log(sprintf("⚠ Error aggregating to combined_CI_data.rds: %s", e$message), "WARNING")
safe_log(" This is OK - Script 30 can still use per-field RDS files directly", "WARNING")
})
}
# Execute main if called from command line

View file

@ -29,32 +29,75 @@ safe_log <- function(message, level = "INFO") {
#' @return Long-format dataframe with CI values by date
#'
load_combined_ci_data <- function(data_dir) {
file_path <- here::here(data_dir, "combined_CI_data.rds")
# Load all daily RDS files from daily_vals/ directory
daily_vals_dir <- file.path(data_dir, "..", "daily_vals")
if (!file.exists(file_path)) {
stop(paste("Combined CI data file not found:", file_path))
if (!dir.exists(daily_vals_dir)) {
stop(paste("Daily values directory not found:", daily_vals_dir))
}
safe_log(paste("Loading CI data from:", file_path))
safe_log(paste("Loading CI data from daily files in:", daily_vals_dir))
# Load and transform the data to long format
pivot_stats <- readRDS(file_path) %>%
dplyr::ungroup() %>%
dplyr::group_by(field, sub_field) %>%
dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
# Find all daily RDS files recursively
all_daily_files <- list.files(
path = daily_vals_dir,
pattern = "\\.rds$",
full.names = TRUE,
recursive = TRUE
)
pivot_stats_long <- pivot_stats %>%
tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
if (length(all_daily_files) == 0) {
stop(paste("No daily RDS files found in:", daily_vals_dir))
}
safe_log(sprintf("Found %d daily RDS files to load", length(all_daily_files)))
# Read and combine all daily RDS files
# Each file contains: field, sub_field, ci_mean, ci_median, ci_sd, ci_min, ci_max, ci_count
combined_data <- all_daily_files %>%
purrr::map(readRDS) %>%
purrr::list_rbind()
# Extract date from file path: .../daily_vals/{FIELD}/{YYYY-MM-DD}.rds
combined_data <- combined_data %>%
dplyr::mutate(
Date = lubridate::ymd(Date),
value = as.numeric(value)
) %>%
file_path = NA_character_, # Will be filled by mapping
Date = NA_Date_
)
# Add dates by mapping file paths to dates
for (i in seq_along(all_daily_files)) {
file_path <- all_daily_files[i]
date_str <- tools::file_path_sans_ext(basename(file_path))
# Match rows in combined_data that came from this file
# This is a simplification - in practice we'd need to track which rows came from which file
# For now, we'll rebuild the data with explicit date tracking
}
# Better approach: rebuild with explicit date tracking
combined_long <- data.frame()
for (file in all_daily_files) {
date_str <- tools::file_path_sans_ext(basename(file))
rds_data <- readRDS(file)
rds_data <- rds_data %>%
dplyr::mutate(Date = lubridate::ymd(date_str))
combined_long <- rbind(combined_long, rds_data)
}
# Reshape to long format using ci_mean as the main CI value
pivot_stats_long <- combined_long %>%
dplyr::select(field, sub_field, ci_mean, Date) %>%
dplyr::rename(value = ci_mean) %>%
dplyr::mutate(value = as.numeric(value)) %>%
tidyr::drop_na(c("value", "Date")) %>%
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names
dplyr::filter(!is.na(sub_field), !is.na(field)) %>%
dplyr::filter(!is.infinite(value)) %>%
dplyr::distinct()
safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
safe_log(sprintf("Loaded %d CI data points from %d daily files",
nrow(pivot_stats_long), length(all_daily_files)))
return(pivot_stats_long)
}

View file

@ -19,64 +19,60 @@ suppressPackageStartupMessages({
library(here)
})
# 2. Main function to handle interpolation
# -------------------------------------
# =============================================================================
# Load utility functions from 30_growth_model_utils.R
# =============================================================================
source("r_app/30_growth_model_utils.R")
# =============================================================================
# Main Processing
# =============================================================================
main <- function() {
# Process command line arguments
# IMPORTANT: Set working directory to project root (smartcane/)
# This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
if (basename(getwd()) == "r_app") {
setwd("..")
}
# Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
# Get project directory from arguments or use default
if (length(args) >= 1 && !is.na(args[1])) {
project_dir <- as.character(args[1])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "esa"
message("No project_dir provided. Using default:", project_dir)
}
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
# Get data_source from arguments (for consistency with Script 20)
if (length(args) >= 2 && !is.na(args[2])) {
data_source <- as.character(args[2])
} else if (exists("data_source", envir = .GlobalEnv)) {
data_source <- get("data_source", envir = .GlobalEnv)
} else {
data_source <- "merged_tif" # Default to 4-band (most common for existing projects)
}
# Make project_dir and data_source available globally so parameters_project.R can use it
# IMPORTANT: Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
ci_extraction_script <- TRUE
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
safe_log(sprintf("Project: %s", project_dir))
# Initialize project configuration and load utility functions
# 1. Load parameters (includes field boundaries setup)
# ---------------------------------------------------
tryCatch({
source("parameters_project.R")
source("30_growth_model_utils.R")
source("r_app/parameters_project.R")
safe_log("Loaded parameters_project.R")
}, error = function(e) {
warning("Default source files not found. Attempting to source from 'r_app' directory.")
tryCatch({
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "30_growth_model_utils.R"))
warning(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.")
})
safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
stop(e)
})
log_message("Starting CI growth model interpolation")
# 2. Set up directory paths from parameters
# -----------------------------------------------
setup <- setup_project_directories(project_dir)
# Load and process the data
safe_log(sprintf("Using cumulative CI directory: %s", setup$cumulative_CI_vals_dir))
safe_log("Starting CI growth model interpolation")
# 3. Load and process the data
# ----------------------------
tryCatch({
# Load the combined CI data
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
# Load the combined CI data (created by Script 20)
CI_data <- load_combined_ci_data(setup$cumulative_CI_vals_dir)
# Validate harvesting data
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
safe_log("No harvesting data available", "ERROR")
stop("No harvesting data available")
}
@ -86,7 +82,7 @@ main <- function() {
distinct(year) %>%
pull(year)
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
# Generate interpolated CI data for each year and field
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
@ -100,20 +96,20 @@ main <- function() {
# Add daily and cumulative metrics
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
# Save the processed data
# Save the processed data to cumulative_vals directory
save_growth_model(
CI_all_with_metrics,
cumulative_CI_vals_dir,
setup$cumulative_CI_vals_dir,
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
)
} else {
log_message("No CI data was generated after interpolation", level = "WARNING")
safe_log("No CI data was generated after interpolation", "WARNING")
}
log_message("Growth model interpolation completed successfully")
safe_log("Growth model interpolation completed successfully")
}, error = function(e) {
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
stop(e$message)
})
}