SmartCane/r_app/extract_rds_only.R
2026-01-06 14:17:37 +01:00

105 lines
3.2 KiB
R

# EXTRACT_RDS_ONLY.R
# ===================
# Extract and combine daily CI values into combined_CI_data.rds
# Skips raster processing - assumes daily extracted files already exist
#
# Usage: Rscript r_app/extract_rds_only.R [project_dir]
# - project_dir: Project directory name (e.g., "angata", "aura", "chemba")
#
# Example:
# Rscript r_app/extract_rds_only.R angata
suppressPackageStartupMessages({
library(tidyverse)
library(here)
})
main <- function() {
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Process project_dir argument
if (length(args) >= 1 && !is.na(args[1])) {
project_dir <- as.character(args[1])
} else {
project_dir <- "angata"
}
cat(sprintf("RDS Extraction: project=%s\n", project_dir))
# Source configuration
tryCatch({
source("parameters_project.R")
}, error = function(e) {
warning("Default source files not found. Attempting to source from 'r_app' directory.")
tryCatch({
source("r_app/parameters_project.R")
warning(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) {
stop("Failed to source parameters_project.R from both default and 'r_app' directories.")
})
})
# Define paths for CI data
daily_CI_vals_dir <- file.path(
"laravel_app/storage/app", project_dir,
"Data/extracted_ci/daily_vals"
)
cumulative_CI_vals_dir <- file.path(
"laravel_app/storage/app", project_dir,
"Data/extracted_ci/cumulative_vals"
)
cat(sprintf("Daily CI values dir: %s\n", daily_CI_vals_dir))
cat(sprintf("Cumulative CI values dir: %s\n\n", cumulative_CI_vals_dir))
# Check if daily CI directory exists and has files
if (!dir.exists(daily_CI_vals_dir)) {
stop(sprintf("ERROR: Daily CI directory not found: %s", daily_CI_vals_dir))
}
# List RDS files
files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE)
if (length(files) == 0) {
stop(sprintf("ERROR: No extracted CI values found in %s", daily_CI_vals_dir))
}
cat(sprintf("Found %d daily CI RDS files\n\n", length(files)))
# Create cumulative directory if it doesn't exist
if (!dir.exists(cumulative_CI_vals_dir)) {
dir.create(cumulative_CI_vals_dir, recursive = TRUE)
cat(sprintf("Created directory: %s\n\n", cumulative_CI_vals_dir))
}
# Combine all RDS files
cat("Combining daily RDS files...\n")
combined_data <- files %>%
purrr::map(readRDS) %>%
purrr::list_rbind() %>%
dplyr::group_by(sub_field)
# Save combined data
output_path <- file.path(cumulative_CI_vals_dir, "combined_CI_data.rds")
saveRDS(combined_data, output_path)
cat(sprintf("✓ Combined %d daily files\n", length(files)))
cat(sprintf("✓ Total rows: %d\n", nrow(combined_data))
cat(sprintf("✓ Saved to: %s\n\n", output_path))
# Summary
cat("Summary:\n")
cat(sprintf(" Fields: %d\n", n_distinct(combined_data$field, na.rm = TRUE)))
cat(sprintf(" Sub-fields: %d\n", n_distinct(combined_data$sub_field, na.rm = TRUE)))
cat(sprintf(" Total measurements: %d\n\n", nrow(combined_data)))
cat("✓ RDS extraction complete!\n")
cat("Next: Run 02b_convert_rds_to_csv.R to convert to CSV\n")
}
if (sys.nframe() == 0) {
main()
}