script 30 refactored to per field
This commit is contained in:
parent
4d6bba828f
commit
9b8c971902
|
|
@ -190,48 +190,6 @@ main <- function() {
|
|||
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
|
||||
safe_log(sprintf(" RDS: %s", setup$daily_vals_per_field_dir))
|
||||
}
|
||||
|
||||
# 8. Aggregate per-field daily RDS files into combined_CI_data.rds
|
||||
# ----------------------------------------------------------------
|
||||
# This creates the wide-format (fields × dates) file that Script 30 and
|
||||
# other downstream scripts expect for backward compatibility
|
||||
safe_log("\n=== Aggregating Per-Field Daily RDS into combined_CI_data.rds ===")
|
||||
|
||||
tryCatch({
|
||||
# Find all daily RDS files (recursively from daily_vals/{FIELD}/{DATE}.rds)
|
||||
all_daily_files <- list.files(
|
||||
path = setup$daily_vals_per_field_dir,
|
||||
pattern = "\\.rds$",
|
||||
full.names = TRUE,
|
||||
recursive = TRUE
|
||||
)
|
||||
|
||||
if (length(all_daily_files) == 0) {
|
||||
safe_log("No daily RDS files found to aggregate", "WARNING")
|
||||
} else {
|
||||
safe_log(sprintf("Aggregating %d daily RDS files into combined_CI_data.rds", length(all_daily_files)))
|
||||
|
||||
# Read and combine all daily RDS files
|
||||
combined_data <- all_daily_files %>%
|
||||
purrr::map(readRDS) %>%
|
||||
purrr::list_rbind() %>%
|
||||
dplyr::group_by(sub_field)
|
||||
|
||||
# Create output directory if needed
|
||||
dir.create(setup$cumulative_CI_vals_dir, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
# Save combined data
|
||||
combined_ci_path <- file.path(setup$cumulative_CI_vals_dir, "combined_CI_data.rds")
|
||||
saveRDS(combined_data, combined_ci_path)
|
||||
|
||||
safe_log(sprintf("✓ Created combined_CI_data.rds with %d rows from %d files",
|
||||
nrow(combined_data), length(all_daily_files)))
|
||||
safe_log(sprintf(" Location: %s", combined_ci_path))
|
||||
}
|
||||
}, error = function(e) {
|
||||
safe_log(sprintf("⚠ Error aggregating to combined_CI_data.rds: %s", e$message), "WARNING")
|
||||
safe_log(" This is OK - Script 30 can still use per-field RDS files directly", "WARNING")
|
||||
})
|
||||
}
|
||||
|
||||
# Execute main if called from command line
|
||||
|
|
|
|||
|
|
@ -29,32 +29,75 @@ safe_log <- function(message, level = "INFO") {
|
|||
#' @return Long-format dataframe with CI values by date
|
||||
#'
|
||||
load_combined_ci_data <- function(data_dir) {
|
||||
file_path <- here::here(data_dir, "combined_CI_data.rds")
|
||||
# Load all daily RDS files from daily_vals/ directory
|
||||
daily_vals_dir <- file.path(data_dir, "..", "daily_vals")
|
||||
|
||||
if (!file.exists(file_path)) {
|
||||
stop(paste("Combined CI data file not found:", file_path))
|
||||
if (!dir.exists(daily_vals_dir)) {
|
||||
stop(paste("Daily values directory not found:", daily_vals_dir))
|
||||
}
|
||||
|
||||
safe_log(paste("Loading CI data from:", file_path))
|
||||
safe_log(paste("Loading CI data from daily files in:", daily_vals_dir))
|
||||
|
||||
# Load and transform the data to long format
|
||||
pivot_stats <- readRDS(file_path) %>%
|
||||
dplyr::ungroup() %>%
|
||||
dplyr::group_by(field, sub_field) %>%
|
||||
dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
|
||||
# Find all daily RDS files recursively
|
||||
all_daily_files <- list.files(
|
||||
path = daily_vals_dir,
|
||||
pattern = "\\.rds$",
|
||||
full.names = TRUE,
|
||||
recursive = TRUE
|
||||
)
|
||||
|
||||
pivot_stats_long <- pivot_stats %>%
|
||||
tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
|
||||
if (length(all_daily_files) == 0) {
|
||||
stop(paste("No daily RDS files found in:", daily_vals_dir))
|
||||
}
|
||||
|
||||
safe_log(sprintf("Found %d daily RDS files to load", length(all_daily_files)))
|
||||
|
||||
# Read and combine all daily RDS files
|
||||
# Each file contains: field, sub_field, ci_mean, ci_median, ci_sd, ci_min, ci_max, ci_count
|
||||
combined_data <- all_daily_files %>%
|
||||
purrr::map(readRDS) %>%
|
||||
purrr::list_rbind()
|
||||
|
||||
# Extract date from file path: .../daily_vals/{FIELD}/{YYYY-MM-DD}.rds
|
||||
combined_data <- combined_data %>%
|
||||
dplyr::mutate(
|
||||
Date = lubridate::ymd(Date),
|
||||
value = as.numeric(value)
|
||||
) %>%
|
||||
file_path = NA_character_, # Will be filled by mapping
|
||||
Date = NA_Date_
|
||||
)
|
||||
|
||||
# Add dates by mapping file paths to dates
|
||||
for (i in seq_along(all_daily_files)) {
|
||||
file_path <- all_daily_files[i]
|
||||
date_str <- tools::file_path_sans_ext(basename(file_path))
|
||||
|
||||
# Match rows in combined_data that came from this file
|
||||
# This is a simplification - in practice we'd need to track which rows came from which file
|
||||
# For now, we'll rebuild the data with explicit date tracking
|
||||
}
|
||||
|
||||
# Better approach: rebuild with explicit date tracking
|
||||
combined_long <- data.frame()
|
||||
|
||||
for (file in all_daily_files) {
|
||||
date_str <- tools::file_path_sans_ext(basename(file))
|
||||
rds_data <- readRDS(file)
|
||||
rds_data <- rds_data %>%
|
||||
dplyr::mutate(Date = lubridate::ymd(date_str))
|
||||
combined_long <- rbind(combined_long, rds_data)
|
||||
}
|
||||
|
||||
# Reshape to long format using ci_mean as the main CI value
|
||||
pivot_stats_long <- combined_long %>%
|
||||
dplyr::select(field, sub_field, ci_mean, Date) %>%
|
||||
dplyr::rename(value = ci_mean) %>%
|
||||
dplyr::mutate(value = as.numeric(value)) %>%
|
||||
tidyr::drop_na(c("value", "Date")) %>%
|
||||
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names
|
||||
dplyr::filter(!is.na(sub_field), !is.na(field)) %>%
|
||||
dplyr::filter(!is.infinite(value)) %>%
|
||||
dplyr::distinct()
|
||||
|
||||
safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
|
||||
safe_log(sprintf("Loaded %d CI data points from %d daily files",
|
||||
nrow(pivot_stats_long), length(all_daily_files)))
|
||||
|
||||
return(pivot_stats_long)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,64 +19,60 @@ suppressPackageStartupMessages({
|
|||
library(here)
|
||||
})
|
||||
|
||||
# 2. Main function to handle interpolation
|
||||
# -------------------------------------
|
||||
# =============================================================================
|
||||
# Load utility functions from 30_growth_model_utils.R
|
||||
# =============================================================================
|
||||
source("r_app/30_growth_model_utils.R")
|
||||
|
||||
# =============================================================================
|
||||
# Main Processing
|
||||
# =============================================================================
|
||||
|
||||
main <- function() {
|
||||
# Process command line arguments
|
||||
# IMPORTANT: Set working directory to project root (smartcane/)
|
||||
# This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
|
||||
if (basename(getwd()) == "r_app") {
|
||||
setwd("..")
|
||||
}
|
||||
|
||||
# Parse command-line arguments
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# Get project directory from arguments or use default
|
||||
if (length(args) >= 1 && !is.na(args[1])) {
|
||||
project_dir <- as.character(args[1])
|
||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
||||
} else {
|
||||
project_dir <- "esa"
|
||||
message("No project_dir provided. Using default:", project_dir)
|
||||
}
|
||||
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||
|
||||
# Get data_source from arguments (for consistency with Script 20)
|
||||
if (length(args) >= 2 && !is.na(args[2])) {
|
||||
data_source <- as.character(args[2])
|
||||
} else if (exists("data_source", envir = .GlobalEnv)) {
|
||||
data_source <- get("data_source", envir = .GlobalEnv)
|
||||
} else {
|
||||
data_source <- "merged_tif" # Default to 4-band (most common for existing projects)
|
||||
}
|
||||
|
||||
# Make project_dir and data_source available globally so parameters_project.R can use it
|
||||
# IMPORTANT: Make project_dir available globally for parameters_project.R
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
assign("data_source", data_source, envir = .GlobalEnv)
|
||||
|
||||
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
|
||||
ci_extraction_script <- TRUE
|
||||
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
|
||||
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
|
||||
safe_log(sprintf("Project: %s", project_dir))
|
||||
|
||||
# Initialize project configuration and load utility functions
|
||||
# 1. Load parameters (includes field boundaries setup)
|
||||
# ---------------------------------------------------
|
||||
tryCatch({
|
||||
source("parameters_project.R")
|
||||
source("30_growth_model_utils.R")
|
||||
source("r_app/parameters_project.R")
|
||||
safe_log("Loaded parameters_project.R")
|
||||
}, error = function(e) {
|
||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "30_growth_model_utils.R"))
|
||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
||||
|
||||
}, error = function(e) {
|
||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
||||
})
|
||||
safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
|
||||
stop(e)
|
||||
})
|
||||
|
||||
log_message("Starting CI growth model interpolation")
|
||||
# 2. Set up directory paths from parameters
|
||||
# -----------------------------------------------
|
||||
setup <- setup_project_directories(project_dir)
|
||||
|
||||
# Load and process the data
|
||||
safe_log(sprintf("Using cumulative CI directory: %s", setup$cumulative_CI_vals_dir))
|
||||
|
||||
safe_log("Starting CI growth model interpolation")
|
||||
|
||||
# 3. Load and process the data
|
||||
# ----------------------------
|
||||
tryCatch({
|
||||
# Load the combined CI data
|
||||
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
|
||||
# Load the combined CI data (created by Script 20)
|
||||
CI_data <- load_combined_ci_data(setup$cumulative_CI_vals_dir)
|
||||
|
||||
# Validate harvesting data
|
||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||
safe_log("No harvesting data available", "ERROR")
|
||||
stop("No harvesting data available")
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +82,7 @@ main <- function() {
|
|||
distinct(year) %>%
|
||||
pull(year)
|
||||
|
||||
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
|
||||
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
|
||||
|
||||
# Generate interpolated CI data for each year and field
|
||||
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
|
||||
|
|
@ -100,20 +96,20 @@ main <- function() {
|
|||
# Add daily and cumulative metrics
|
||||
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
||||
|
||||
# Save the processed data
|
||||
# Save the processed data to cumulative_vals directory
|
||||
save_growth_model(
|
||||
CI_all_with_metrics,
|
||||
cumulative_CI_vals_dir,
|
||||
setup$cumulative_CI_vals_dir,
|
||||
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||
)
|
||||
} else {
|
||||
log_message("No CI data was generated after interpolation", level = "WARNING")
|
||||
safe_log("No CI data was generated after interpolation", "WARNING")
|
||||
}
|
||||
|
||||
log_message("Growth model interpolation completed successfully")
|
||||
safe_log("Growth model interpolation completed successfully")
|
||||
|
||||
}, error = function(e) {
|
||||
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
|
||||
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
|
||||
stop(e$message)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue