SmartCane/r_app/02b_convert_ci_rds_to_csv.R
2026-01-06 14:17:37 +01:00

115 lines
3.9 KiB
R

# 02b_CONVERT_CI_RDS_TO_CSV.R
# ============================
# Convert combined_CI_data.rds (output of script 02) to CSV format for Python
# This script runs AFTER script 02 (CI extraction) and creates a CSV that Python
# can use for harvest date detection WITHOUT requiring the 'model' column (which
# comes from script 03 after interpolation and harvest dates are known).
#
# Usage: Rscript 02b_convert_ci_rds_to_csv.R [project_dir]
# - project_dir: Project directory name (e.g., "esa", "chemba", "angata")
#
# Output: CSV file at laravel_app/storage/app/{project_dir}/Data/extracted_ci/cumulative_vals/ci_data_for_python.csv
# Columns: field, sub_field, Date, FitData, DOY, value (alias for FitData)
#
suppressPackageStartupMessages({
library(tidyverse)
library(lubridate)
library(here)
})
main <- function() {
# Process command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Get project directory
if (length(args) >= 1 && !is.na(args[1])) {
project_dir <- as.character(args[1])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "esa"
}
# Make available globally
assign("project_dir", project_dir, envir = .GlobalEnv)
cat(sprintf("Converting CI RDS to CSV: project=%s\n", project_dir))
# Initialize project configuration
tryCatch({
source("parameters_project.R")
}, error = function(e) {
warning("Default parameters_project.R not found. Attempting from 'r_app' directory.")
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Failed to source parameters_project.R from both default and 'r_app' directories.")
})
})
# Define paths
ci_data_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
input_file <- file.path(ci_data_dir, "combined_CI_data.rds")
output_file <- file.path(ci_data_dir, "ci_data_for_python.csv")
# Check if input file exists
if (!file.exists(input_file)) {
stop(paste("Input file not found:", input_file))
}
cat(sprintf("Loading: %s\n", input_file))
# Load RDS file
ci_data <- readRDS(input_file) %>%
as_tibble()
cat(sprintf(" Loaded %d rows\n", nrow(ci_data)))
cat(sprintf(" Columns: %s\n", paste(names(ci_data), collapse = ", ")))
# Prepare data for Python
ci_data_python <- ci_data %>%
# Ensure standard column names
rename(
field = field,
sub_field = sub_field,
Date = Date,
FitData = FitData,
DOY = DOY
) %>%
# Add 'value' as an alias for FitData (sometimes needed)
mutate(value = FitData) %>%
# Keep only necessary columns
select(field, sub_field, Date, FitData, DOY, value) %>%
# Sort by field and date
arrange(field, Date)
# Validate data
cat(sprintf("\nValidation:\n"))
cat(sprintf(" Unique fields: %d\n", n_distinct(ci_data_python$field)))
cat(sprintf(" Date range: %s to %s\n",
min(ci_data_python$Date, na.rm = TRUE),
max(ci_data_python$Date, na.rm = TRUE)))
cat(sprintf(" FitData range: %.2f to %.2f\n",
min(ci_data_python$FitData, na.rm = TRUE),
max(ci_data_python$FitData, na.rm = TRUE)))
cat(sprintf(" Missing FitData: %d rows\n", sum(is.na(ci_data_python$FitData))))
# Save to CSV
cat(sprintf("\nSaving to: %s\n", output_file))
write_csv(ci_data_python, output_file)
cat(sprintf("✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
cat("\nNext steps for Python harvest detection:\n")
cat(" 1. Read this CSV file in Python\n")
cat(" 2. Group by field to identify seasons\n")
cat(" 3. Run LSTM model to detect harvest dates\n")
cat(" 4. Save predicted harvest dates to Excel\n")
cat(" 5. Use output in script 03 for interpolation\n")
}
if (sys.nframe() == 0) {
main()
}