105 lines
3.2 KiB
R
105 lines
3.2 KiB
R
# EXTRACT_RDS_ONLY.R
|
|
# ===================
|
|
# Extract and combine daily CI values into combined_CI_data.rds
|
|
# Skips raster processing - assumes daily extracted files already exist
|
|
#
|
|
# Usage: Rscript r_app/extract_rds_only.R [project_dir]
|
|
# - project_dir: Project directory name (e.g., "angata", "aura", "chemba")
|
|
#
|
|
# Example:
|
|
# Rscript r_app/extract_rds_only.R angata
|
|
|
|
suppressPackageStartupMessages({
|
|
library(tidyverse)
|
|
library(here)
|
|
})
|
|
|
|
main <- function() {
|
|
# Capture command line arguments
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
|
|
# Process project_dir argument
|
|
if (length(args) >= 1 && !is.na(args[1])) {
|
|
project_dir <- as.character(args[1])
|
|
} else {
|
|
project_dir <- "angata"
|
|
}
|
|
|
|
cat(sprintf("RDS Extraction: project=%s\n", project_dir))
|
|
|
|
# Source configuration
|
|
tryCatch({
|
|
source("parameters_project.R")
|
|
}, error = function(e) {
|
|
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
|
tryCatch({
|
|
source("r_app/parameters_project.R")
|
|
warning(paste("Successfully sourced files from 'r_app' directory."))
|
|
}, error = function(e) {
|
|
stop("Failed to source parameters_project.R from both default and 'r_app' directories.")
|
|
})
|
|
})
|
|
|
|
# Define paths for CI data
|
|
daily_CI_vals_dir <- file.path(
|
|
"laravel_app/storage/app", project_dir,
|
|
"Data/extracted_ci/daily_vals"
|
|
)
|
|
|
|
cumulative_CI_vals_dir <- file.path(
|
|
"laravel_app/storage/app", project_dir,
|
|
"Data/extracted_ci/cumulative_vals"
|
|
)
|
|
|
|
cat(sprintf("Daily CI values dir: %s\n", daily_CI_vals_dir))
|
|
cat(sprintf("Cumulative CI values dir: %s\n\n", cumulative_CI_vals_dir))
|
|
|
|
# Check if daily CI directory exists and has files
|
|
if (!dir.exists(daily_CI_vals_dir)) {
|
|
stop(sprintf("ERROR: Daily CI directory not found: %s", daily_CI_vals_dir))
|
|
}
|
|
|
|
# List RDS files
|
|
files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE)
|
|
|
|
if (length(files) == 0) {
|
|
stop(sprintf("ERROR: No extracted CI values found in %s", daily_CI_vals_dir))
|
|
}
|
|
|
|
cat(sprintf("Found %d daily CI RDS files\n\n", length(files)))
|
|
|
|
# Create cumulative directory if it doesn't exist
|
|
if (!dir.exists(cumulative_CI_vals_dir)) {
|
|
dir.create(cumulative_CI_vals_dir, recursive = TRUE)
|
|
cat(sprintf("Created directory: %s\n\n", cumulative_CI_vals_dir))
|
|
}
|
|
|
|
# Combine all RDS files
|
|
cat("Combining daily RDS files...\n")
|
|
combined_data <- files %>%
|
|
purrr::map(readRDS) %>%
|
|
purrr::list_rbind() %>%
|
|
dplyr::group_by(sub_field)
|
|
|
|
# Save combined data
|
|
output_path <- file.path(cumulative_CI_vals_dir, "combined_CI_data.rds")
|
|
saveRDS(combined_data, output_path)
|
|
|
|
cat(sprintf("✓ Combined %d daily files\n", length(files)))
|
|
cat(sprintf("✓ Total rows: %d\n", nrow(combined_data))
|
|
cat(sprintf("✓ Saved to: %s\n\n", output_path))
|
|
|
|
# Summary
|
|
cat("Summary:\n")
|
|
cat(sprintf(" Fields: %d\n", n_distinct(combined_data$field, na.rm = TRUE)))
|
|
cat(sprintf(" Sub-fields: %d\n", n_distinct(combined_data$sub_field, na.rm = TRUE)))
|
|
cat(sprintf(" Total measurements: %d\n\n", nrow(combined_data)))
|
|
|
|
cat("✓ RDS extraction complete!\n")
|
|
cat("Next: Run 02b_convert_rds_to_csv.R to convert to CSV\n")
|
|
}
|
|
|
|
if (sys.nframe() == 0) {
|
|
main()
|
|
}
|