seperate scripts work for angata, except for the word doc.
This commit is contained in:
parent
3ee3f9e31c
commit
bfd56ccd16
|
|
@ -19,13 +19,15 @@
|
||||||
# - Naming: Per-field GeoTIFFs organized by field and date
|
# - Naming: Per-field GeoTIFFs organized by field and date
|
||||||
#
|
#
|
||||||
# USAGE:
|
# USAGE:
|
||||||
# Rscript 10_create_per_field_tiffs.R [project]
|
# Rscript 10_create_per_field_tiffs.R [project] [end_date] [offset]
|
||||||
#
|
#
|
||||||
# Example (Windows PowerShell):
|
# Example (Windows PowerShell):
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
|
||||||
#
|
#
|
||||||
# PARAMETERS:
|
# PARAMETERS:
|
||||||
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
# - project: Project name (character) - angata, chemba, xinavane, esa, simba (default: angata)
|
||||||
|
# - end_date: End date for processing (YYYY-MM-DD format, default: today)
|
||||||
|
# - offset: Days to look back (numeric, default: 7)
|
||||||
#
|
#
|
||||||
# CLIENT TYPES:
|
# CLIENT TYPES:
|
||||||
# - cane_supply (ANGATA): Yes - primary data organization script
|
# - cane_supply (ANGATA): Yes - primary data organization script
|
||||||
|
|
@ -70,10 +72,16 @@ main <- function() {
|
||||||
|
|
||||||
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
|
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
project_dir <- if (length(args) == 0) "angata" else args[1]
|
|
||||||
|
|
||||||
# Make project_dir available to sourced files (they execute in global scope)
|
# Parse arguments: [project] [end_date] [offset]
|
||||||
|
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
||||||
|
end_date_arg <- if (length(args) >= 2 && args[2] != "") as.Date(args[2], format = "%Y-%m-%d") else Sys.Date()
|
||||||
|
offset_arg <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
|
||||||
|
|
||||||
|
# Make variables available to sourced files (they execute in global scope)
|
||||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||||
|
assign("end_date", end_date_arg, envir = .GlobalEnv)
|
||||||
|
assign("offset", offset_arg, envir = .GlobalEnv)
|
||||||
|
|
||||||
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
|
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
|
||||||
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
|
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
|
||||||
|
|
@ -97,7 +105,7 @@ main <- function() {
|
||||||
# Window: end_date - offset days to end_date
|
# Window: end_date - offset days to end_date
|
||||||
# Always coerce to correct types to avoid issues with lingering/inherited values
|
# Always coerce to correct types to avoid issues with lingering/inherited values
|
||||||
if (!exists("end_date") || !inherits(end_date, "Date")) {
|
if (!exists("end_date") || !inherits(end_date, "Date")) {
|
||||||
end_date <- as.Date("2026-02-04")
|
end_date <- Sys.Date()
|
||||||
safe_log(paste("Using default end_date:", end_date), "INFO")
|
safe_log(paste("Using default end_date:", end_date), "INFO")
|
||||||
}
|
}
|
||||||
if (!exists("offset") || !is.numeric(offset)) {
|
if (!exists("offset") || !is.numeric(offset)) {
|
||||||
|
|
|
||||||
|
|
@ -127,91 +127,96 @@ main <- function() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Process each DATE (OPTIMIZED: load TIFF once, process all fields)
|
# Process each DATE (load merged TIFF once, extract all fields from it)
|
||||||
total_success <- 0
|
total_success <- 0
|
||||||
total_error <- 0
|
total_error <- 0
|
||||||
ci_results_by_date <- list()
|
|
||||||
|
|
||||||
for (date_str in dates_filter) {
|
for (date_str in dates_filter) {
|
||||||
# Load the merged TIFF ONCE for this date
|
# Load the MERGED TIFF (farm-wide) ONCE for this date
|
||||||
merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str))
|
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
|
||||||
|
|
||||||
# Find the actual TIFF path (it's in the first field that has it)
|
if (!file.exists(input_tif_merged)) {
|
||||||
input_tif_full <- NULL
|
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
|
||||||
for (field in fields) {
|
total_error <<- total_error + 1
|
||||||
candidate_path <- file.path(setup$field_tiles_dir, field, sprintf("%s.tif", date_str))
|
|
||||||
if (file.exists(candidate_path)) {
|
|
||||||
input_tif_full <- candidate_path
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is.null(input_tif_full)) {
|
|
||||||
safe_log(sprintf(" %s: Input TIFF not found (skipping)", date_str))
|
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Load TIFF ONCE
|
# Load 4-band TIFF ONCE
|
||||||
raster_4band <- terra::rast(input_tif_full)
|
raster_4band <- terra::rast(input_tif_merged)
|
||||||
|
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
|
||||||
|
|
||||||
|
# Calculate CI from 4-band
|
||||||
|
ci_raster <- calc_ci_from_raster(raster_4band)
|
||||||
|
|
||||||
|
# Create 5-band (R, G, B, NIR, CI)
|
||||||
|
five_band <- c(raster_4band, ci_raster)
|
||||||
|
|
||||||
|
# Now process all fields from this single merged TIFF
|
||||||
|
fields_processed_this_date <- 0
|
||||||
|
|
||||||
# Now process all fields from this single TIFF
|
|
||||||
for (field in fields) {
|
for (field in fields) {
|
||||||
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
|
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
|
||||||
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
|
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
|
||||||
|
|
||||||
|
# Pre-create output directories
|
||||||
|
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
|
||||||
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
|
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
|
||||||
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
|
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
|
||||||
|
|
||||||
# MODE 3: Skip if both outputs already exist
|
# MODE 3: Skip if both outputs already exist
|
||||||
if (file.exists(output_tif) && file.exists(output_rds)) {
|
if (file.exists(output_tif) && file.exists(output_rds)) {
|
||||||
next # Skip to next field
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
|
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
|
||||||
if (file.exists(output_tif) && !file.exists(output_rds)) {
|
if (file.exists(output_tif) && !file.exists(output_rds)) {
|
||||||
tryCatch({
|
tryCatch({
|
||||||
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
|
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
|
||||||
total_success <<- total_success + 1
|
fields_processed_this_date <- fields_processed_this_date + 1
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
total_error <<- total_error + 1
|
# Continue to next field
|
||||||
})
|
})
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
# MODE 1: Normal mode - calculate CI from 4-band input
|
# MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Calculate CI
|
# Crop 5-band TIFF to field boundary
|
||||||
ci_raster <- calc_ci_from_raster(raster_4band)
|
field_geom <- field_boundaries_sf %>% filter(field == !!field)
|
||||||
|
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
|
||||||
|
|
||||||
# Create 5-band TIFF (R, G, B, NIR, CI)
|
# Save 5-band field TIFF
|
||||||
five_band <- c(raster_4band, ci_raster)
|
terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
|
||||||
|
|
||||||
# Save 5-band TIFF
|
# Extract CI statistics by sub_field (from cropped CI raster)
|
||||||
terra::writeRaster(five_band, output_tif, overwrite = TRUE)
|
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
|
||||||
|
ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
|
||||||
# Extract CI statistics by sub_field
|
|
||||||
ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field)
|
|
||||||
|
|
||||||
# Save RDS
|
# Save RDS
|
||||||
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
||||||
saveRDS(ci_stats, output_rds)
|
saveRDS(ci_stats, output_rds)
|
||||||
|
|
||||||
# Store for daily aggregation
|
|
||||||
ci_stats_with_date <- ci_stats %>% mutate(date = date_str)
|
|
||||||
key <- sprintf("%s_%s", field, date_str)
|
|
||||||
ci_results_by_date[[key]] <<- ci_stats_with_date
|
|
||||||
}
|
}
|
||||||
|
|
||||||
total_success <<- total_success + 1
|
fields_processed_this_date <- fields_processed_this_date + 1
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
total_error <<- total_error + 1
|
# Error in individual field, continue to next
|
||||||
|
safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Increment success counter if at least one field succeeded
|
||||||
|
if (fields_processed_this_date > 0) {
|
||||||
|
total_success <<- total_success + 1
|
||||||
|
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
|
||||||
|
}
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
safe_log(sprintf(" %s: ✗ Error loading TIFF - %s", date_str, e$message), "ERROR")
|
|
||||||
total_error <<- total_error + 1
|
total_error <<- total_error + 1
|
||||||
|
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,22 @@
|
||||||
# ===================
|
# ===================
|
||||||
# Utility functions for growth model interpolation and manipulation.
|
# Utility functions for growth model interpolation and manipulation.
|
||||||
# These functions support the creation of continuous growth models from point measurements.
|
# These functions support the creation of continuous growth models from point measurements.
|
||||||
|
#
|
||||||
|
# PERFORMANCE OPTIMIZATION:
|
||||||
|
# - Parallel file I/O: Reads 450k+ RDS files using furrr::future_map_dfr()
|
||||||
|
# - Parallel field interpolation: Processes fields in parallel (1 core per ~100 fields)
|
||||||
|
# - Dynamic CPU detection: Allocates workers based on available cores
|
||||||
|
# - Windows compatible: Uses furrr with plan(multisession) for cross-platform support
|
||||||
|
|
||||||
#' Load and prepare the combined CI data (Per-Field Architecture)
|
#' Load and prepare the combined CI data (Per-Field Architecture)
|
||||||
|
#' OPTIMIZE: Filters by date during load (skip unnecessary date ranges)
|
||||||
|
#' PARALLELIZE: Reads 450k+ RDS files in parallel using furrr::future_map_dfr()
|
||||||
#'
|
#'
|
||||||
#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals)
|
#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals)
|
||||||
|
#' @param harvesting_data Optional: Dataframe with season dates. If provided, only loads files within season ranges (major speedup)
|
||||||
#' @return Long-format dataframe with CI values by date and field
|
#' @return Long-format dataframe with CI values by date and field
|
||||||
#'
|
#'
|
||||||
load_combined_ci_data <- function(daily_vals_dir) {
|
load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) {
|
||||||
# For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals
|
# For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals
|
||||||
# Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
# Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
||||||
|
|
||||||
|
|
@ -20,6 +29,17 @@ load_combined_ci_data <- function(daily_vals_dir) {
|
||||||
|
|
||||||
safe_log(paste("Loading per-field CI data from:", daily_vals_dir))
|
safe_log(paste("Loading per-field CI data from:", daily_vals_dir))
|
||||||
|
|
||||||
|
# OPTIMIZATION: If harvest data provided, extract date range to avoid loading unnecessary dates
|
||||||
|
date_filter_min <- NULL
|
||||||
|
date_filter_max <- NULL
|
||||||
|
if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
|
||||||
|
date_filter_min <- min(harvesting_data$season_start, na.rm = TRUE)
|
||||||
|
date_filter_max <- max(harvesting_data$season_end, na.rm = TRUE)
|
||||||
|
safe_log(sprintf("Pre-filtering by harvest season dates: %s to %s",
|
||||||
|
format(date_filter_min, "%Y-%m-%d"),
|
||||||
|
format(date_filter_max, "%Y-%m-%d")))
|
||||||
|
}
|
||||||
|
|
||||||
# Find all daily RDS files recursively (per-field structure)
|
# Find all daily RDS files recursively (per-field structure)
|
||||||
# IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories
|
# IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories
|
||||||
all_daily_files <- list.files(
|
all_daily_files <- list.files(
|
||||||
|
|
@ -37,71 +57,87 @@ load_combined_ci_data <- function(daily_vals_dir) {
|
||||||
stop(paste("No per-field daily RDS files found in:", daily_vals_dir))
|
stop(paste("No per-field daily RDS files found in:", daily_vals_dir))
|
||||||
}
|
}
|
||||||
|
|
||||||
safe_log(sprintf("Found %d per-field daily RDS files to load (filtered from legacy format)", length(all_daily_files)))
|
safe_log(sprintf("Found %d per-field daily RDS files (filtered from legacy format)", length(all_daily_files)))
|
||||||
|
|
||||||
# Rebuild with explicit date and field tracking
|
# OPTIMIZATION: Filter files by filename date BEFORE parallel loading
|
||||||
# File structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
# Skip files outside harvest season (can save 60-80% of I/O on large datasets)
|
||||||
combined_long <- data.frame()
|
if (!is.null(date_filter_min) && !is.null(date_filter_max)) {
|
||||||
|
all_daily_files <- all_daily_files[
|
||||||
|
{
|
||||||
|
dates <- as.Date(tools::file_path_sans_ext(basename(all_daily_files)), format = "%Y-%m-%d")
|
||||||
|
!is.na(dates) & dates >= date_filter_min & dates <= date_filter_max
|
||||||
|
}
|
||||||
|
]
|
||||||
|
safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files)))
|
||||||
|
}
|
||||||
|
|
||||||
for (file in all_daily_files) {
|
# Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking)
|
||||||
tryCatch({
|
# Automatically detect available cores and limit to reasonable number
|
||||||
|
n_cores <- min(parallel::detectCores() - 1, 8) # Use max 8 cores (diminishing returns after)
|
||||||
|
future::plan(strategy = future::multisession, workers = n_cores)
|
||||||
|
safe_log(sprintf("Using %d parallel workers for file I/O", n_cores))
|
||||||
|
|
||||||
|
# Parallel file reading: future_map_dfr processes each file in parallel
|
||||||
|
# Returns combined dataframe directly (no need to rbind)
|
||||||
|
combined_long <- furrr::future_map_dfr(
|
||||||
|
all_daily_files,
|
||||||
|
.progress = TRUE,
|
||||||
|
.options = furrr::furrr_options(seed = TRUE),
|
||||||
|
function(file) {
|
||||||
# Extract date from filename: {YYYY-MM-DD}.rds
|
# Extract date from filename: {YYYY-MM-DD}.rds
|
||||||
filename <- basename(file)
|
filename <- basename(file)
|
||||||
date_str <- tools::file_path_sans_ext(filename)
|
date_str <- tools::file_path_sans_ext(filename)
|
||||||
|
|
||||||
# Parse date - handle various formats
|
# Parse date
|
||||||
parsed_date <- NA
|
|
||||||
if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
|
if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
|
||||||
parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
|
parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
|
||||||
} else {
|
} else {
|
||||||
safe_log(sprintf("Warning: Could not parse date from filename: %s", filename), "WARNING")
|
return(data.frame()) # Return empty dataframe if parse fails
|
||||||
next
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is.na(parsed_date)) {
|
if (is.na(parsed_date)) {
|
||||||
safe_log(sprintf("Warning: Invalid date parsed from: %s", filename), "WARNING")
|
return(data.frame())
|
||||||
next
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Read RDS file
|
# Read RDS file
|
||||||
rds_data <- tryCatch({
|
tryCatch({
|
||||||
readRDS(file)
|
rds_data <- readRDS(file)
|
||||||
}, error = function(e) {
|
|
||||||
safe_log(sprintf("Error reading RDS file %s: %s", file, e$message), "WARNING")
|
|
||||||
return(NULL)
|
|
||||||
})
|
|
||||||
|
|
||||||
if (is.null(rds_data) || nrow(rds_data) == 0) {
|
if (is.null(rds_data) || nrow(rds_data) == 0) {
|
||||||
next
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add date column to the data
|
# Add date column to the data
|
||||||
rds_data <- rds_data %>%
|
rds_data %>%
|
||||||
dplyr::mutate(Date = parsed_date)
|
dplyr::mutate(Date = parsed_date)
|
||||||
|
|
||||||
combined_long <- rbind(combined_long, rds_data)
|
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
safe_log(sprintf("Error processing file %s: %s", file, e$message), "WARNING")
|
return(data.frame()) # Return empty dataframe on error
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return to sequential processing to avoid nested parallelism
|
||||||
|
future::plan(future::sequential)
|
||||||
|
|
||||||
if (nrow(combined_long) == 0) {
|
if (nrow(combined_long) == 0) {
|
||||||
safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
|
safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# OPTIMIZATION: Use data.table for fast filtering (10-20x faster than dplyr on large datasets)
|
||||||
# Reshape to long format using ci_mean as the main CI value
|
# Reshape to long format using ci_mean as the main CI value
|
||||||
# Only keep rows where ci_mean has valid data
|
DT <- data.table::as.data.table(combined_long)
|
||||||
pivot_stats_long <- combined_long %>%
|
DT <- DT[, .(field, sub_field, ci_mean, Date)]
|
||||||
dplyr::select(field, sub_field, ci_mean, Date) %>%
|
DT[, c("value") := list(as.numeric(ci_mean))]
|
||||||
dplyr::rename(value = ci_mean) %>%
|
DT[, ci_mean := NULL]
|
||||||
dplyr::mutate(value = as.numeric(value)) %>%
|
|
||||||
# Keep rows even if ci_mean is NA or 0 (might be valid), but drop if Date is missing
|
# Fast filtering without .distinct() (which is slow on large datasets)
|
||||||
tidyr::drop_na(Date) %>%
|
# Keep rows where Date is valid, field/sub_field exist, and value is finite
|
||||||
dplyr::filter(!is.na(sub_field), !is.na(field)) %>%
|
DT <- DT[!is.na(Date) & !is.na(sub_field) & !is.na(field) & is.finite(value)]
|
||||||
dplyr::filter(!is.infinite(value)) %>%
|
|
||||||
dplyr::distinct()
|
# Convert back to tibble for compatibility with rest of pipeline
|
||||||
|
pivot_stats_long <- dplyr::as_tibble(DT)
|
||||||
|
|
||||||
safe_log(sprintf("Loaded %d CI data points from %d daily files",
|
safe_log(sprintf("Loaded %d CI data points from %d daily files",
|
||||||
nrow(pivot_stats_long), length(all_daily_files)))
|
nrow(pivot_stats_long), length(all_daily_files)))
|
||||||
|
|
@ -194,6 +230,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Generate interpolated CI data for all fields and seasons
|
#' Generate interpolated CI data for all fields and seasons
|
||||||
|
#' PARALLELIZE: Processes fields in parallel using furrr::future_map_df()
|
||||||
#'
|
#'
|
||||||
#' @param years Vector of years to process
|
#' @param years Vector of years to process
|
||||||
#' @param harvesting_data Dataframe with harvesting information
|
#' @param harvesting_data Dataframe with harvesting information
|
||||||
|
|
@ -227,40 +264,50 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
|
||||||
return(data.frame())
|
return(data.frame())
|
||||||
}
|
}
|
||||||
|
|
||||||
# Initialize progress bar for this year
|
|
||||||
total_fields <<- total_fields + length(valid_sub_fields)
|
total_fields <<- total_fields + length(valid_sub_fields)
|
||||||
pb <- txtProgressBar(min = 0, max = length(valid_sub_fields), style = 3, width = 50)
|
safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields)))
|
||||||
counter <- 0
|
|
||||||
|
|
||||||
# Extract and interpolate data for each valid field with progress bar
|
# Set up parallel future plan for field interpolation
|
||||||
result_list <- list()
|
# Allocate 1 core per ~100 fields (with minimum 2 cores)
|
||||||
for (field in valid_sub_fields) {
|
n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100)))
|
||||||
counter <- counter + 1
|
future::plan(strategy = future::multisession, workers = n_cores)
|
||||||
setTxtProgressBar(pb, counter)
|
|
||||||
|
|
||||||
# Call with verbose=FALSE to suppress warnings during progress bar iteration
|
# PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently)
|
||||||
field_result <- extract_CI_data(field,
|
result_list <- furrr::future_map(
|
||||||
|
valid_sub_fields,
|
||||||
|
.progress = TRUE,
|
||||||
|
.options = furrr::furrr_options(seed = TRUE),
|
||||||
|
function(field) {
|
||||||
|
# Call with verbose=FALSE to suppress warnings during parallel iteration
|
||||||
|
extract_CI_data(field,
|
||||||
harvesting_data = harvesting_data,
|
harvesting_data = harvesting_data,
|
||||||
field_CI_data = ci_data,
|
field_CI_data = ci_data,
|
||||||
season = yr,
|
season = yr,
|
||||||
verbose = FALSE)
|
verbose = FALSE)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return to sequential processing
|
||||||
|
future::plan(future::sequential)
|
||||||
|
|
||||||
|
# Process results and tracking
|
||||||
|
for (i in seq_along(result_list)) {
|
||||||
|
field_result <- result_list[[i]]
|
||||||
|
field_name <- valid_sub_fields[i]
|
||||||
|
|
||||||
if (nrow(field_result) > 0) {
|
if (nrow(field_result) > 0) {
|
||||||
successful_fields <<- successful_fields + 1
|
successful_fields <<- successful_fields + 1
|
||||||
result_list[[field]] <- field_result
|
|
||||||
} else {
|
} else {
|
||||||
# Track failed field
|
|
||||||
failed_fields[[length(failed_fields) + 1]] <<- list(
|
failed_fields[[length(failed_fields) + 1]] <<- list(
|
||||||
field = field,
|
field = field_name,
|
||||||
season = yr,
|
season = yr,
|
||||||
reason = "Unable to generate interpolated data"
|
reason = "Unable to generate interpolated data"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close(pb)
|
|
||||||
cat("\n") # Newline after progress bar
|
|
||||||
|
|
||||||
# Combine all results for this year
|
# Combine all results for this year
|
||||||
|
result_list <- result_list[sapply(result_list, nrow) > 0] # Keep only non-empty
|
||||||
if (length(result_list) > 0) {
|
if (length(result_list) > 0) {
|
||||||
purrr::list_rbind(result_list)
|
purrr::list_rbind(result_list)
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,12 @@ suppressPackageStartupMessages({
|
||||||
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
|
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
|
||||||
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
|
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
|
||||||
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
|
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
|
||||||
|
|
||||||
|
# Parallel processing (Windows PSOCK + Mac/Linux fork-safe)
|
||||||
|
library(future) # For setting up parallel execution plans
|
||||||
|
library(furrr) # For future_map_dfr (parallel file I/O and field processing)
|
||||||
|
library(parallel) # For detectCores (automatic CPU detection)
|
||||||
|
library(data.table) # For fast filtering on large datasets
|
||||||
})
|
})
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -110,23 +116,24 @@ main <- function() {
|
||||||
|
|
||||||
safe_log("Starting CI growth model interpolation")
|
safe_log("Starting CI growth model interpolation")
|
||||||
|
|
||||||
|
# Set up data directory paths
|
||||||
|
data_dir <- setup$data_dir
|
||||||
|
|
||||||
# Load and process the data
|
# Load and process the data
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Load the combined CI data (created by Script 20 per-field)
|
# Load the combined CI data (created by Script 20 per-field)
|
||||||
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
|
||||||
CI_data <- load_combined_ci_data(daily_vals_dir)
|
# OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files)
|
||||||
|
|
||||||
# Load harvesting data from harvest.xlsx for growth model phase assignment
|
|
||||||
# Use the centralized load_harvesting_data() function which handles NA season_end values
|
|
||||||
# by setting them to Sys.Date() (field is still in current growing season)
|
|
||||||
data_dir <- setup$data_dir
|
|
||||||
harvesting_data <- tryCatch({
|
harvesting_data <- tryCatch({
|
||||||
load_harvesting_data(data_dir)
|
load_harvesting_data(data_dir)
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
safe_log(paste("Error loading harvest data:", e$message), "WARNING")
|
safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING")
|
||||||
NULL
|
NULL
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Load CI data with date range pre-filtering
|
||||||
|
CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data)
|
||||||
|
|
||||||
# Validate harvesting data
|
# Validate harvesting data
|
||||||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||||||
safe_log("No harvesting data available", "ERROR")
|
safe_log("No harvesting data available", "ERROR")
|
||||||
|
|
|
||||||
|
|
@ -139,6 +139,7 @@ suppressPackageStartupMessages({
|
||||||
library(readr) # For reading CSV files (harvest predictions from Python)
|
library(readr) # For reading CSV files (harvest predictions from Python)
|
||||||
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
|
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
|
||||||
library(writexl) # For writing Excel outputs (KPI summary tables)
|
library(writexl) # For writing Excel outputs (KPI summary tables)
|
||||||
|
library(progress) # For progress bars during field processing
|
||||||
|
|
||||||
# ML/Analysis (optional - only for harvest model inference)
|
# ML/Analysis (optional - only for harvest model inference)
|
||||||
tryCatch({
|
tryCatch({
|
||||||
|
|
@ -573,8 +574,10 @@ main <- function() {
|
||||||
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
|
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
|
||||||
|
|
||||||
# Load weekly harvest probabilities from script 31 (if available)
|
# Load weekly harvest probabilities from script 31 (if available)
|
||||||
|
# Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
|
||||||
message("\n4. Loading harvest probabilities from script 31...")
|
message("\n4. Loading harvest probabilities from script 31...")
|
||||||
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
|
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
|
||||||
|
harvest_prob_file <- file.path(harvest_prob_dir,
|
||||||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
|
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
|
||||||
message(paste(" Looking for:", harvest_prob_file))
|
message(paste(" Looking for:", harvest_prob_file))
|
||||||
|
|
||||||
|
|
@ -846,7 +849,7 @@ main <- function() {
|
||||||
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
|
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
|
||||||
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
|
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
|
||||||
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
|
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
|
||||||
mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4),
|
mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
|
||||||
week = current_week,
|
week = current_week,
|
||||||
year = year,
|
year = year,
|
||||||
date = as.character(end_date)
|
date = as.character(end_date)
|
||||||
|
|
|
||||||
|
|
@ -605,7 +605,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
|
||||||
NULL
|
NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
|
output_subdir <- file.path(reports_dir, "field_analysis")
|
||||||
if (!dir.exists(output_subdir)) {
|
if (!dir.exists(output_subdir)) {
|
||||||
dir.create(output_subdir, recursive = TRUE)
|
dir.create(output_subdir, recursive = TRUE)
|
||||||
}
|
}
|
||||||
|
|
@ -637,7 +637,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
|
||||||
)
|
)
|
||||||
|
|
||||||
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
|
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
|
||||||
rds_path <- file.path(reports_dir, "kpis", rds_filename)
|
rds_path <- file.path(reports_dir, rds_filename)
|
||||||
|
|
||||||
saveRDS(kpi_data, rds_path)
|
saveRDS(kpi_data, rds_path)
|
||||||
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
||||||
|
|
@ -683,8 +683,16 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
|
||||||
message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
|
message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
|
||||||
results_list <- list()
|
results_list <- list()
|
||||||
|
|
||||||
|
# Initialize progress bar
|
||||||
|
pb <- progress::progress_bar$new(
|
||||||
|
format = " [:bar] :percent | Field :current/:total",
|
||||||
|
total = length(per_field_files),
|
||||||
|
width = 60
|
||||||
|
)
|
||||||
|
|
||||||
# Process each field's mosaic
|
# Process each field's mosaic
|
||||||
for (field_idx in seq_along(per_field_files)) {
|
for (field_idx in seq_along(per_field_files)) {
|
||||||
|
pb$tick() # Update progress bar
|
||||||
field_name <- names(per_field_files)[field_idx]
|
field_name <- names(per_field_files)[field_idx]
|
||||||
field_file <- per_field_files[[field_name]]
|
field_file <- per_field_files[[field_name]]
|
||||||
|
|
||||||
|
|
@ -751,8 +759,6 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
message(paste(" Field", field_idx, "of", length(per_field_files), "processed"))
|
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
message(paste(" [ERROR] Field", field_name, ":", e$message))
|
message(paste(" [ERROR] Field", field_name, ":", e$message))
|
||||||
})
|
})
|
||||||
|
|
@ -773,7 +779,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
|
||||||
mosaic_dir, reports_dir, report_date = Sys.Date()) {
|
mosaic_dir, reports_dir, report_date = Sys.Date()) {
|
||||||
|
|
||||||
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
|
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
|
||||||
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
|
rds_path <- file.path(reports_dir, "field_stats", rds_filename)
|
||||||
|
|
||||||
if (file.exists(rds_path)) {
|
if (file.exists(rds_path)) {
|
||||||
message(paste("Loading cached statistics from:", basename(rds_path)))
|
message(paste("Loading cached statistics from:", basename(rds_path)))
|
||||||
|
|
@ -783,7 +789,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
|
||||||
message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
|
message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
|
||||||
stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date)
|
stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date)
|
||||||
|
|
||||||
output_dir <- file.path(reports_dir, "kpis", "field_stats")
|
output_dir <- file.path(reports_dir, "field_stats")
|
||||||
if (!dir.exists(output_dir)) {
|
if (!dir.exists(output_dir)) {
|
||||||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
||||||
}
|
}
|
||||||
|
|
@ -812,7 +818,7 @@ load_historical_field_data <- function(project_dir, current_week, current_year,
|
||||||
target_year <- target$year
|
target_year <- target$year
|
||||||
|
|
||||||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
|
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
|
||||||
csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
|
csv_path <- file.path(reports_dir, "field_analysis", csv_filename)
|
||||||
|
|
||||||
if (file.exists(csv_path)) {
|
if (file.exists(csv_path)) {
|
||||||
tryCatch({
|
tryCatch({
|
||||||
|
|
@ -867,7 +873,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
|
||||||
prev_field_analysis <- NULL
|
prev_field_analysis <- NULL
|
||||||
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
analysis_dir <- file.path(reports_dir, "kpis", "field_analysis")
|
analysis_dir <- file.path(reports_dir, "field_analysis")
|
||||||
if (dir.exists(analysis_dir)) {
|
if (dir.exists(analysis_dir)) {
|
||||||
analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
|
analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
|
||||||
if (length(analysis_files) > 0) {
|
if (length(analysis_files) > 0) {
|
||||||
|
|
@ -899,7 +905,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
|
||||||
}
|
}
|
||||||
|
|
||||||
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
|
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
|
||||||
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
|
rds_path <- file.path(reports_dir, "field_stats", rds_filename)
|
||||||
|
|
||||||
if (file.exists(rds_path)) {
|
if (file.exists(rds_path)) {
|
||||||
tryCatch({
|
tryCatch({
|
||||||
|
|
@ -920,7 +926,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
|
||||||
}
|
}
|
||||||
|
|
||||||
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
|
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
|
||||||
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)
|
rds_path <- file.path(reports_dir, "field_stats", rds_filename)
|
||||||
|
|
||||||
if (file.exists(rds_path)) {
|
if (file.exists(rds_path)) {
|
||||||
tryCatch({
|
tryCatch({
|
||||||
|
|
|
||||||
|
|
@ -76,12 +76,19 @@
|
||||||
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
|
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
|
||||||
#
|
#
|
||||||
# COMMAND #2 - Batch Download (Multiple Dates):
|
# COMMAND #2 - Batch Download (Multiple Dates):
|
||||||
|
# For date ranges, MUST use download_planet_missing_dates.py (not Script 00)
|
||||||
#
|
#
|
||||||
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
|
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
|
||||||
#
|
#
|
||||||
# Example:
|
# Example:
|
||||||
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
|
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
|
||||||
#
|
#
|
||||||
|
# IMPORTANT DISTINCTION:
|
||||||
|
# - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates
|
||||||
|
# - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES
|
||||||
|
# Script 00 does NOT have --start/--end flags despite documentation suggestion
|
||||||
|
# Use the correct script for your use case!
|
||||||
|
#
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
|
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
|
||||||
#
|
#
|
||||||
|
|
@ -110,15 +117,27 @@
|
||||||
# - One TIFF per field per date (1185 fields × N dates in Angata)
|
# - One TIFF per field per date (1185 fields × N dates in Angata)
|
||||||
#
|
#
|
||||||
# PARAMETERS:
|
# PARAMETERS:
|
||||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
# PROJECT: angata, chemba, xinavane, esa, simba (default: angata)
|
||||||
|
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today)
|
||||||
|
# OFFSET: Days to look back (e.g., 7 for one week, default: 7)
|
||||||
#
|
#
|
||||||
# COMMAND:
|
# COMMAND #1 - Default (All dates, current date, 7-day window):
|
||||||
#
|
#
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT]
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||||
#
|
#
|
||||||
# Example:
|
# Example:
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
||||||
#
|
#
|
||||||
|
# COMMAND #2 - Specific Date Range:
|
||||||
|
#
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET]
|
||||||
|
#
|
||||||
|
# Example (one week back from 2026-02-09):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
|
||||||
|
#
|
||||||
|
# Example (two weeks back from 2026-02-09):
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14
|
||||||
|
#
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
|
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
|
||||||
# Storage location: laravel_app/storage/app/angata/field_tiles/
|
# Storage location: laravel_app/storage/app/angata/field_tiles/
|
||||||
|
|
@ -157,7 +176,7 @@
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
|
||||||
#
|
#
|
||||||
# Example:
|
# Example:
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7
|
||||||
#
|
#
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
|
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
|
||||||
|
|
@ -170,12 +189,6 @@
|
||||||
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
|
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
|
||||||
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
|
# To process all existing merged_tif files: Use large OFFSET (e.g., 365)
|
||||||
#
|
#
|
||||||
# TROUBLESHOOTING:
|
|
||||||
# ❌ If field_tiles_CI has fewer files than field_tiles:
|
|
||||||
# - Check if all field_tiles/{FIELD}/{DATE}.tif files exist
|
|
||||||
# - Script 20 may be skipping due to incomplete source files
|
|
||||||
# - Solution: Delete problematic files from field_tiles and re-run Script 10
|
|
||||||
#
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -208,7 +221,6 @@
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||||||
# Contains: Interpolated CI data for all fields (wide format)
|
# Contains: Interpolated CI data for all fields (wide format)
|
||||||
# Script execution time: 5-15 minutes
|
|
||||||
#
|
#
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|
@ -243,7 +255,6 @@
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# File: ci_data_for_python.csv (~5-10 MB)
|
# File: ci_data_for_python.csv (~5-10 MB)
|
||||||
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
|
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
|
||||||
# Script execution time: 1-2 minutes
|
|
||||||
#
|
#
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|
@ -283,7 +294,6 @@
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||||||
# Rows: One per field (e.g., 1185 rows for Angata)
|
# Rows: One per field (e.g., 1185 rows for Angata)
|
||||||
# Script execution time: 2-5 minutes
|
|
||||||
#
|
#
|
||||||
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
|
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
|
||||||
#
|
#
|
||||||
|
|
@ -319,9 +329,6 @@
|
||||||
# Example (one week window):
|
# Example (one week window):
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||||
#
|
#
|
||||||
# Example (two week window):
|
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 14 angata
|
|
||||||
#
|
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# Location: laravel_app/storage/app/angata/weekly_mosaic/
|
# Location: laravel_app/storage/app/angata/weekly_mosaic/
|
||||||
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
|
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
|
||||||
|
|
@ -360,23 +367,23 @@
|
||||||
# - 21 columns with field-level KPIs and alerts
|
# - 21 columns with field-level KPIs and alerts
|
||||||
#
|
#
|
||||||
# PARAMETERS:
|
# PARAMETERS:
|
||||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
# END_DATE: Report date in YYYY-MM-DD format (default: today)
|
||||||
# WEEK: ISO week number (1-53, optional - default current week)
|
# PROJECT: Project name: angata, chemba, xinavane, esa, simba (default: angata)
|
||||||
# YEAR: ISO year (optional - default current year)
|
# OFFSET: Days to look back for historical comparison (default: 7, for backward compatibility)
|
||||||
#
|
#
|
||||||
# COMMAND #1 - Current Week (Auto-detects from TODAY):
|
# COMMAND #1 - Current Date & Default Project (Auto-detects TODAY):
|
||||||
#
|
#
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT]
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||||||
#
|
#
|
||||||
# Example:
|
# Example:
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
|
||||||
#
|
#
|
||||||
# COMMAND #2 - Specific Week & Year:
|
# COMMAND #2 - Specific Date & Project:
|
||||||
#
|
#
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] [WEEK] [YEAR]
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
|
||||||
#
|
#
|
||||||
# Example (Week 5, Year 2026):
|
# Example (2026-02-09, angata, 7-day lookback):
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7
|
||||||
#
|
#
|
||||||
# EXPECTED OUTPUT:
|
# EXPECTED OUTPUT:
|
||||||
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||||
|
|
@ -390,6 +397,11 @@
|
||||||
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
|
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
|
||||||
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
|
# spatial_cluster, alert_urgency, alert_type, alert_message, etc.
|
||||||
#
|
#
|
||||||
|
# CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40:
|
||||||
|
# R40 order: [END_DATE] [OFFSET] [PROJECT]
|
||||||
|
# R80 order: [END_DATE] [PROJECT] [OFFSET]
|
||||||
|
# These are NOT the same! Ensure correct order for each script.
|
||||||
|
#
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -469,12 +481,15 @@
|
||||||
#
|
#
|
||||||
# Steps:
|
# Steps:
|
||||||
# 1. SKIP Python download (if you already have data)
|
# 1. SKIP Python download (if you already have data)
|
||||||
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 7
|
||||||
|
# (Argument order: [PROJECT] [END_DATE] [OFFSET])
|
||||||
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
|
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
|
||||||
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
|
||||||
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
|
||||||
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
|
||||||
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata
|
# (Argument order: [END_DATE] [OFFSET] [PROJECT])
|
||||||
|
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-04 angata 7
|
||||||
|
# (Argument order: [END_DATE] [PROJECT] [OFFSET] - DIFFERENT from R40!)
|
||||||
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
|
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
|
||||||
# OR from R console:
|
# OR from R console:
|
||||||
|
|
@ -492,7 +507,9 @@
|
||||||
#
|
#
|
||||||
# Steps:
|
# Steps:
|
||||||
# 1. Python download (your entire date range)
|
# 1. Python download (your entire date range)
|
||||||
# 2. Run R10 once (processes all dates)
|
# 2. Run R10 with large offset to process all historical dates:
|
||||||
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 365
|
||||||
|
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
||||||
# 3. Run R20 with large offset to process all historical dates:
|
# 3. Run R20 with large offset to process all historical dates:
|
||||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
|
||||||
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
|
||||||
|
|
@ -611,3 +628,4 @@
|
||||||
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
|
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
|
||||||
#
|
#
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
Loading…
Reference in a new issue