# 02b_CONVERT_CI_RDS_TO_CSV.R # ============================ # Convert combined_CI_data.rds (output of script 02) to CSV format for Python # This script runs AFTER script 02 (CI extraction) and creates a CSV that Python # can use for harvest date detection WITHOUT requiring the 'model' column (which # comes from script 03 after interpolation and harvest dates are known). # # Usage: Rscript 02b_convert_ci_rds_to_csv.R [project_dir] # - project_dir: Project directory name (e.g., "esa", "chemba", "angata") # # Output: CSV file at laravel_app/storage/app/{project_dir}/Data/extracted_ci/cumulative_vals/ci_data_for_python.csv # Columns: field, sub_field, Date, FitData, DOY, value (alias for FitData) # suppressPackageStartupMessages({ library(tidyverse) library(lubridate) library(here) }) main <- function() { # Process command line arguments args <- commandArgs(trailingOnly = TRUE) # Get project directory if (length(args) >= 1 && !is.na(args[1])) { project_dir <- as.character(args[1]) } else if (exists("project_dir", envir = .GlobalEnv)) { project_dir <- get("project_dir", envir = .GlobalEnv) } else { project_dir <- "esa" } # Make available globally assign("project_dir", project_dir, envir = .GlobalEnv) cat(sprintf("Converting CI RDS to CSV: project=%s\n", project_dir)) # Initialize project configuration tryCatch({ source("parameters_project.R") }, error = function(e) { warning("Default parameters_project.R not found. Attempting from 'r_app' directory.") tryCatch({ source(here::here("r_app", "parameters_project.R")) }, error = function(e) { stop("Failed to source parameters_project.R from both default and 'r_app' directories.") }) }) # Define paths ci_data_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals") input_file <- file.path(ci_data_dir, "combined_CI_data.rds") output_file <- file.path(ci_data_dir, "ci_data_for_python.csv") # Check if input file exists if (!file.exists(input_file)) { stop(paste("Input file not found:", input_file)) } cat(sprintf("Loading: %s\n", input_file)) # Load RDS file ci_data <- readRDS(input_file) %>% as_tibble() cat(sprintf(" Loaded %d rows\n", nrow(ci_data))) cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", "))) # Prepare data for Python ci_data_python <- ci_data %>% # Ensure standard column names rename( field = field, sub_field = sub_field, Date = Date, FitData = FitData, DOY = DOY ) %>% # Add 'value' as an alias for FitData (sometimes needed) mutate(value = FitData) %>% # Keep only necessary columns select(field, sub_field, Date, FitData, DOY, value) %>% # Sort by field and date arrange(field, Date) # Validate data cat(sprintf("\nValidation:\n")) cat(sprintf(" Unique fields: %d\n", n_distinct(ci_data_python$field))) cat(sprintf(" Date range: %s to %s\n", min(ci_data_python$Date, na.rm = TRUE), max(ci_data_python$Date, na.rm = TRUE))) cat(sprintf(" FitData range: %.2f to %.2f\n", min(ci_data_python$FitData, na.rm = TRUE), max(ci_data_python$FitData, na.rm = TRUE))) cat(sprintf(" Missing FitData: %d rows\n", sum(is.na(ci_data_python$FitData)))) # Save to CSV cat(sprintf("\nSaving to: %s\n", output_file)) write_csv(ci_data_python, output_file) cat(sprintf("✓ Successfully created CSV with %d rows\n", nrow(ci_data_python))) cat("\nNext steps for Python harvest detection:\n") cat(" 1. Read this CSV file in Python\n") cat(" 2. Group by field to identify seasons\n") cat(" 3. Run LSTM model to detect harvest dates\n") cat(" 4. Save predicted harvest dates to Excel\n") cat(" 5. Use output in script 03 for interpolation\n") } if (sys.nframe() == 0) { main() }