script 30 refactored to per field
This commit is contained in:
parent
4d6bba828f
commit
9b8c971902
|
|
@ -190,48 +190,6 @@ main <- function() {
|
||||||
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
|
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
|
||||||
safe_log(sprintf(" RDS: %s", setup$daily_vals_per_field_dir))
|
safe_log(sprintf(" RDS: %s", setup$daily_vals_per_field_dir))
|
||||||
}
|
}
|
||||||
|
|
||||||
# 8. Aggregate per-field daily RDS files into combined_CI_data.rds
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# This creates the wide-format (fields × dates) file that Script 30 and
|
|
||||||
# other downstream scripts expect for backward compatibility
|
|
||||||
safe_log("\n=== Aggregating Per-Field Daily RDS into combined_CI_data.rds ===")
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
# Find all daily RDS files (recursively from daily_vals/{FIELD}/{DATE}.rds)
|
|
||||||
all_daily_files <- list.files(
|
|
||||||
path = setup$daily_vals_per_field_dir,
|
|
||||||
pattern = "\\.rds$",
|
|
||||||
full.names = TRUE,
|
|
||||||
recursive = TRUE
|
|
||||||
)
|
|
||||||
|
|
||||||
if (length(all_daily_files) == 0) {
|
|
||||||
safe_log("No daily RDS files found to aggregate", "WARNING")
|
|
||||||
} else {
|
|
||||||
safe_log(sprintf("Aggregating %d daily RDS files into combined_CI_data.rds", length(all_daily_files)))
|
|
||||||
|
|
||||||
# Read and combine all daily RDS files
|
|
||||||
combined_data <- all_daily_files %>%
|
|
||||||
purrr::map(readRDS) %>%
|
|
||||||
purrr::list_rbind() %>%
|
|
||||||
dplyr::group_by(sub_field)
|
|
||||||
|
|
||||||
# Create output directory if needed
|
|
||||||
dir.create(setup$cumulative_CI_vals_dir, showWarnings = FALSE, recursive = TRUE)
|
|
||||||
|
|
||||||
# Save combined data
|
|
||||||
combined_ci_path <- file.path(setup$cumulative_CI_vals_dir, "combined_CI_data.rds")
|
|
||||||
saveRDS(combined_data, combined_ci_path)
|
|
||||||
|
|
||||||
safe_log(sprintf("✓ Created combined_CI_data.rds with %d rows from %d files",
|
|
||||||
nrow(combined_data), length(all_daily_files)))
|
|
||||||
safe_log(sprintf(" Location: %s", combined_ci_path))
|
|
||||||
}
|
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(sprintf("⚠ Error aggregating to combined_CI_data.rds: %s", e$message), "WARNING")
|
|
||||||
safe_log(" This is OK - Script 30 can still use per-field RDS files directly", "WARNING")
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Execute main if called from command line
|
# Execute main if called from command line
|
||||||
|
|
|
||||||
|
|
@ -29,32 +29,75 @@ safe_log <- function(message, level = "INFO") {
|
||||||
#' @return Long-format dataframe with CI values by date
|
#' @return Long-format dataframe with CI values by date
|
||||||
#'
|
#'
|
||||||
load_combined_ci_data <- function(data_dir) {
|
load_combined_ci_data <- function(data_dir) {
|
||||||
file_path <- here::here(data_dir, "combined_CI_data.rds")
|
# Load all daily RDS files from daily_vals/ directory
|
||||||
|
daily_vals_dir <- file.path(data_dir, "..", "daily_vals")
|
||||||
|
|
||||||
if (!file.exists(file_path)) {
|
if (!dir.exists(daily_vals_dir)) {
|
||||||
stop(paste("Combined CI data file not found:", file_path))
|
stop(paste("Daily values directory not found:", daily_vals_dir))
|
||||||
}
|
}
|
||||||
|
|
||||||
safe_log(paste("Loading CI data from:", file_path))
|
safe_log(paste("Loading CI data from daily files in:", daily_vals_dir))
|
||||||
|
|
||||||
# Load and transform the data to long format
|
# Find all daily RDS files recursively
|
||||||
pivot_stats <- readRDS(file_path) %>%
|
all_daily_files <- list.files(
|
||||||
dplyr::ungroup() %>%
|
path = daily_vals_dir,
|
||||||
dplyr::group_by(field, sub_field) %>%
|
pattern = "\\.rds$",
|
||||||
dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
|
full.names = TRUE,
|
||||||
|
recursive = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
pivot_stats_long <- pivot_stats %>%
|
if (length(all_daily_files) == 0) {
|
||||||
tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
|
stop(paste("No daily RDS files found in:", daily_vals_dir))
|
||||||
|
}
|
||||||
|
|
||||||
|
safe_log(sprintf("Found %d daily RDS files to load", length(all_daily_files)))
|
||||||
|
|
||||||
|
# Read and combine all daily RDS files
|
||||||
|
# Each file contains: field, sub_field, ci_mean, ci_median, ci_sd, ci_min, ci_max, ci_count
|
||||||
|
combined_data <- all_daily_files %>%
|
||||||
|
purrr::map(readRDS) %>%
|
||||||
|
purrr::list_rbind()
|
||||||
|
|
||||||
|
# Extract date from file path: .../daily_vals/{FIELD}/{YYYY-MM-DD}.rds
|
||||||
|
combined_data <- combined_data %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
Date = lubridate::ymd(Date),
|
file_path = NA_character_, # Will be filled by mapping
|
||||||
value = as.numeric(value)
|
Date = NA_Date_
|
||||||
) %>%
|
)
|
||||||
|
|
||||||
|
# Add dates by mapping file paths to dates
|
||||||
|
for (i in seq_along(all_daily_files)) {
|
||||||
|
file_path <- all_daily_files[i]
|
||||||
|
date_str <- tools::file_path_sans_ext(basename(file_path))
|
||||||
|
|
||||||
|
# Match rows in combined_data that came from this file
|
||||||
|
# This is a simplification - in practice we'd need to track which rows came from which file
|
||||||
|
# For now, we'll rebuild the data with explicit date tracking
|
||||||
|
}
|
||||||
|
|
||||||
|
# Better approach: rebuild with explicit date tracking
|
||||||
|
combined_long <- data.frame()
|
||||||
|
|
||||||
|
for (file in all_daily_files) {
|
||||||
|
date_str <- tools::file_path_sans_ext(basename(file))
|
||||||
|
rds_data <- readRDS(file)
|
||||||
|
rds_data <- rds_data %>%
|
||||||
|
dplyr::mutate(Date = lubridate::ymd(date_str))
|
||||||
|
combined_long <- rbind(combined_long, rds_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Reshape to long format using ci_mean as the main CI value
|
||||||
|
pivot_stats_long <- combined_long %>%
|
||||||
|
dplyr::select(field, sub_field, ci_mean, Date) %>%
|
||||||
|
dplyr::rename(value = ci_mean) %>%
|
||||||
|
dplyr::mutate(value = as.numeric(value)) %>%
|
||||||
tidyr::drop_na(c("value", "Date")) %>%
|
tidyr::drop_na(c("value", "Date")) %>%
|
||||||
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names
|
dplyr::filter(!is.na(sub_field), !is.na(field)) %>%
|
||||||
dplyr::filter(!is.infinite(value)) %>%
|
dplyr::filter(!is.infinite(value)) %>%
|
||||||
dplyr::distinct()
|
dplyr::distinct()
|
||||||
|
|
||||||
safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
|
safe_log(sprintf("Loaded %d CI data points from %d daily files",
|
||||||
|
nrow(pivot_stats_long), length(all_daily_files)))
|
||||||
|
|
||||||
return(pivot_stats_long)
|
return(pivot_stats_long)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,64 +19,60 @@ suppressPackageStartupMessages({
|
||||||
library(here)
|
library(here)
|
||||||
})
|
})
|
||||||
|
|
||||||
# 2. Main function to handle interpolation
|
# =============================================================================
|
||||||
# -------------------------------------
|
# Load utility functions from 30_growth_model_utils.R
|
||||||
|
# =============================================================================
|
||||||
|
source("r_app/30_growth_model_utils.R")
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Main Processing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
# Process command line arguments
|
# IMPORTANT: Set working directory to project root (smartcane/)
|
||||||
|
# This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
|
||||||
|
if (basename(getwd()) == "r_app") {
|
||||||
|
setwd("..")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse command-line arguments
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
|
||||||
# Get project directory from arguments or use default
|
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||||
if (length(args) >= 1 && !is.na(args[1])) {
|
|
||||||
project_dir <- as.character(args[1])
|
|
||||||
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
|
||||||
project_dir <- get("project_dir", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
project_dir <- "esa"
|
|
||||||
message("No project_dir provided. Using default:", project_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get data_source from arguments (for consistency with Script 20)
|
# IMPORTANT: Make project_dir available globally for parameters_project.R
|
||||||
if (length(args) >= 2 && !is.na(args[2])) {
|
|
||||||
data_source <- as.character(args[2])
|
|
||||||
} else if (exists("data_source", envir = .GlobalEnv)) {
|
|
||||||
data_source <- get("data_source", envir = .GlobalEnv)
|
|
||||||
} else {
|
|
||||||
data_source <- "merged_tif" # Default to 4-band (most common for existing projects)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Make project_dir and data_source available globally so parameters_project.R can use it
|
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
assign("data_source", data_source, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
|
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
|
||||||
ci_extraction_script <- TRUE
|
safe_log(sprintf("Project: %s", project_dir))
|
||||||
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
|
|
||||||
|
|
||||||
# Initialize project configuration and load utility functions
|
# 1. Load parameters (includes field boundaries setup)
|
||||||
|
# ---------------------------------------------------
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source("parameters_project.R")
|
source("r_app/parameters_project.R")
|
||||||
source("30_growth_model_utils.R")
|
safe_log("Loaded parameters_project.R")
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
|
||||||
tryCatch({
|
stop(e)
|
||||||
source(here::here("r_app", "parameters_project.R"))
|
|
||||||
source(here::here("r_app", "30_growth_model_utils.R"))
|
|
||||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
|
||||||
|
|
||||||
}, error = function(e) {
|
|
||||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
|
||||||
log_message("Starting CI growth model interpolation")
|
# 2. Set up directory paths from parameters
|
||||||
|
# -----------------------------------------------
|
||||||
|
setup <- setup_project_directories(project_dir)
|
||||||
|
|
||||||
# Load and process the data
|
safe_log(sprintf("Using cumulative CI directory: %s", setup$cumulative_CI_vals_dir))
|
||||||
|
|
||||||
|
safe_log("Starting CI growth model interpolation")
|
||||||
|
|
||||||
|
# 3. Load and process the data
|
||||||
|
# ----------------------------
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Load the combined CI data
|
# Load the combined CI data (created by Script 20)
|
||||||
CI_data <- load_combined_ci_data(cumulative_CI_vals_dir)
|
CI_data <- load_combined_ci_data(setup$cumulative_CI_vals_dir)
|
||||||
|
|
||||||
# Validate harvesting data
|
# Validate harvesting data
|
||||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||||
|
safe_log("No harvesting data available", "ERROR")
|
||||||
stop("No harvesting data available")
|
stop("No harvesting data available")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -86,7 +82,7 @@ main <- function() {
|
||||||
distinct(year) %>%
|
distinct(year) %>%
|
||||||
pull(year)
|
pull(year)
|
||||||
|
|
||||||
log_message(paste("Processing data for years:", paste(years, collapse = ", ")))
|
safe_log(paste("Processing data for years:", paste(years, collapse = ", ")))
|
||||||
|
|
||||||
# Generate interpolated CI data for each year and field
|
# Generate interpolated CI data for each year and field
|
||||||
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
|
CI_all <- generate_interpolated_ci_data(years, harvesting_data, CI_data)
|
||||||
|
|
@ -100,20 +96,20 @@ main <- function() {
|
||||||
# Add daily and cumulative metrics
|
# Add daily and cumulative metrics
|
||||||
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
CI_all_with_metrics <- calculate_growth_metrics(CI_all)
|
||||||
|
|
||||||
# Save the processed data
|
# Save the processed data to cumulative_vals directory
|
||||||
save_growth_model(
|
save_growth_model(
|
||||||
CI_all_with_metrics,
|
CI_all_with_metrics,
|
||||||
cumulative_CI_vals_dir,
|
setup$cumulative_CI_vals_dir,
|
||||||
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
log_message("No CI data was generated after interpolation", level = "WARNING")
|
safe_log("No CI data was generated after interpolation", "WARNING")
|
||||||
}
|
}
|
||||||
|
|
||||||
log_message("Growth model interpolation completed successfully")
|
safe_log("Growth model interpolation completed successfully")
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
log_message(paste("Error in growth model interpolation:", e$message), level = "ERROR")
|
safe_log(paste("Error in growth model interpolation:", e$message), "ERROR")
|
||||||
stop(e$message)
|
stop(e$message)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue