seperate scripts work for angata, except for the word doc.

This commit is contained in:
Timon 2026-02-09 20:34:11 +01:00
parent 3ee3f9e31c
commit bfd56ccd16
7 changed files with 250 additions and 156 deletions

View file

@ -19,13 +19,15 @@
# - Naming: Per-field GeoTIFFs organized by field and date # - Naming: Per-field GeoTIFFs organized by field and date
# #
# USAGE: # USAGE:
# Rscript 10_create_per_field_tiffs.R [project] # Rscript 10_create_per_field_tiffs.R [project] [end_date] [offset]
# #
# Example (Windows PowerShell): # Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
# #
# PARAMETERS: # PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba # - project: Project name (character) - angata, chemba, xinavane, esa, simba (default: angata)
# - end_date: End date for processing (YYYY-MM-DD format, default: today)
# - offset: Days to look back (numeric, default: 7)
# #
# CLIENT TYPES: # CLIENT TYPES:
# - cane_supply (ANGATA): Yes - primary data organization script # - cane_supply (ANGATA): Yes - primary data organization script
@ -70,10 +72,16 @@ main <- function() {
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R) # STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) == 0) "angata" else args[1]
# Make project_dir available to sourced files (they execute in global scope) # Parse arguments: [project] [end_date] [offset]
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
end_date_arg <- if (length(args) >= 2 && args[2] != "") as.Date(args[2], format = "%Y-%m-%d") else Sys.Date()
offset_arg <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
# Make variables available to sourced files (they execute in global scope)
assign("project_dir", project_dir, envir = .GlobalEnv) assign("project_dir", project_dir, envir = .GlobalEnv)
assign("end_date", end_date_arg, envir = .GlobalEnv)
assign("offset", offset_arg, envir = .GlobalEnv)
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined) # STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.) # Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
@ -97,7 +105,7 @@ main <- function() {
# Window: end_date - offset days to end_date # Window: end_date - offset days to end_date
# Always coerce to correct types to avoid issues with lingering/inherited values # Always coerce to correct types to avoid issues with lingering/inherited values
if (!exists("end_date") || !inherits(end_date, "Date")) { if (!exists("end_date") || !inherits(end_date, "Date")) {
end_date <- as.Date("2026-02-04") end_date <- Sys.Date()
safe_log(paste("Using default end_date:", end_date), "INFO") safe_log(paste("Using default end_date:", end_date), "INFO")
} }
if (!exists("offset") || !is.numeric(offset)) { if (!exists("offset") || !is.numeric(offset)) {

View file

@ -127,91 +127,96 @@ main <- function() {
} }
} }
# Process each DATE (OPTIMIZED: load TIFF once, process all fields) # Process each DATE (load merged TIFF once, extract all fields from it)
total_success <- 0 total_success <- 0
total_error <- 0 total_error <- 0
ci_results_by_date <- list()
for (date_str in dates_filter) { for (date_str in dates_filter) {
# Load the merged TIFF ONCE for this date # Load the MERGED TIFF (farm-wide) ONCE for this date
merged_tif_path <- file.path(setup$field_tiles_dir, fields[1], sprintf("%s.tif", date_str)) input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
# Find the actual TIFF path (it's in the first field that has it) if (!file.exists(input_tif_merged)) {
input_tif_full <- NULL safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
for (field in fields) { total_error <<- total_error + 1
candidate_path <- file.path(setup$field_tiles_dir, field, sprintf("%s.tif", date_str))
if (file.exists(candidate_path)) {
input_tif_full <- candidate_path
break
}
}
if (is.null(input_tif_full)) {
safe_log(sprintf(" %s: Input TIFF not found (skipping)", date_str))
next next
} }
tryCatch({ tryCatch({
# Load TIFF ONCE # Load 4-band TIFF ONCE
raster_4band <- terra::rast(input_tif_full) raster_4band <- terra::rast(input_tif_merged)
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
# Calculate CI from 4-band
ci_raster <- calc_ci_from_raster(raster_4band)
# Create 5-band (R, G, B, NIR, CI)
five_band <- c(raster_4band, ci_raster)
# Now process all fields from this single merged TIFF
fields_processed_this_date <- 0
# Now process all fields from this single TIFF
for (field in fields) { for (field in fields) {
field_ci_path <- file.path(setup$field_tiles_ci_dir, field) field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field) field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
# Pre-create output directories
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str)) output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str)) output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
# MODE 3: Skip if both outputs already exist # MODE 3: Skip if both outputs already exist
if (file.exists(output_tif) && file.exists(output_rds)) { if (file.exists(output_tif) && file.exists(output_rds)) {
next # Skip to next field next
} }
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists # MODE 2: Regeneration mode - RDS missing but CI TIFF exists
if (file.exists(output_tif) && !file.exists(output_rds)) { if (file.exists(output_tif) && !file.exists(output_rds)) {
tryCatch({ tryCatch({
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field) extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
total_success <<- total_success + 1 fields_processed_this_date <- fields_processed_this_date + 1
}, error = function(e) { }, error = function(e) {
total_error <<- total_error + 1 # Continue to next field
}) })
next next
} }
# MODE 1: Normal mode - calculate CI from 4-band input # MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
tryCatch({ tryCatch({
# Calculate CI # Crop 5-band TIFF to field boundary
ci_raster <- calc_ci_from_raster(raster_4band) field_geom <- field_boundaries_sf %>% filter(field == !!field)
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
# Create 5-band TIFF (R, G, B, NIR, CI) # Save 5-band field TIFF
five_band <- c(raster_4band, ci_raster) terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
# Save 5-band TIFF # Extract CI statistics by sub_field (from cropped CI raster)
terra::writeRaster(five_band, output_tif, overwrite = TRUE) ci_cropped <- five_band_cropped[[5]] # 5th band is CI
ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
# Extract CI statistics by sub_field
ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field)
# Save RDS # Save RDS
if (!is.null(ci_stats) && nrow(ci_stats) > 0) { if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
saveRDS(ci_stats, output_rds) saveRDS(ci_stats, output_rds)
# Store for daily aggregation
ci_stats_with_date <- ci_stats %>% mutate(date = date_str)
key <- sprintf("%s_%s", field, date_str)
ci_results_by_date[[key]] <<- ci_stats_with_date
} }
total_success <<- total_success + 1 fields_processed_this_date <- fields_processed_this_date + 1
}, error = function(e) { }, error = function(e) {
total_error <<- total_error + 1 # Error in individual field, continue to next
safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING")
}) })
} }
# Increment success counter if at least one field succeeded
if (fields_processed_this_date > 0) {
total_success <<- total_success + 1
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
}
}, error = function(e) { }, error = function(e) {
safe_log(sprintf(" %s: ✗ Error loading TIFF - %s", date_str, e$message), "ERROR")
total_error <<- total_error + 1 total_error <<- total_error + 1
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
}) })
} }

View file

@ -4,13 +4,22 @@
# =================== # ===================
# Utility functions for growth model interpolation and manipulation. # Utility functions for growth model interpolation and manipulation.
# These functions support the creation of continuous growth models from point measurements. # These functions support the creation of continuous growth models from point measurements.
#
# PERFORMANCE OPTIMIZATION:
# - Parallel file I/O: Reads 450k+ RDS files using furrr::future_map_dfr()
# - Parallel field interpolation: Processes fields in parallel (1 core per ~100 fields)
# - Dynamic CPU detection: Allocates workers based on available cores
# - Windows compatible: Uses furrr with plan(multisession) for cross-platform support
#' Load and prepare the combined CI data (Per-Field Architecture) #' Load and prepare the combined CI data (Per-Field Architecture)
#' OPTIMIZE: Filters by date during load (skip unnecessary date ranges)
#' PARALLELIZE: Reads 450k+ RDS files in parallel using furrr::future_map_dfr()
#' #'
#' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals) #' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals)
#' @param harvesting_data Optional: Dataframe with season dates. If provided, only loads files within season ranges (major speedup)
#' @return Long-format dataframe with CI values by date and field #' @return Long-format dataframe with CI values by date and field
#' #'
load_combined_ci_data <- function(daily_vals_dir) { load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) {
# For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals # For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals
# Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds # Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
@ -20,6 +29,17 @@ load_combined_ci_data <- function(daily_vals_dir) {
safe_log(paste("Loading per-field CI data from:", daily_vals_dir)) safe_log(paste("Loading per-field CI data from:", daily_vals_dir))
# OPTIMIZATION: If harvest data provided, extract date range to avoid loading unnecessary dates
date_filter_min <- NULL
date_filter_max <- NULL
if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
date_filter_min <- min(harvesting_data$season_start, na.rm = TRUE)
date_filter_max <- max(harvesting_data$season_end, na.rm = TRUE)
safe_log(sprintf("Pre-filtering by harvest season dates: %s to %s",
format(date_filter_min, "%Y-%m-%d"),
format(date_filter_max, "%Y-%m-%d")))
}
# Find all daily RDS files recursively (per-field structure) # Find all daily RDS files recursively (per-field structure)
# IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories # IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories
all_daily_files <- list.files( all_daily_files <- list.files(
@ -37,71 +57,87 @@ load_combined_ci_data <- function(daily_vals_dir) {
stop(paste("No per-field daily RDS files found in:", daily_vals_dir)) stop(paste("No per-field daily RDS files found in:", daily_vals_dir))
} }
safe_log(sprintf("Found %d per-field daily RDS files to load (filtered from legacy format)", length(all_daily_files))) safe_log(sprintf("Found %d per-field daily RDS files (filtered from legacy format)", length(all_daily_files)))
# Rebuild with explicit date and field tracking # OPTIMIZATION: Filter files by filename date BEFORE parallel loading
# File structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds # Skip files outside harvest season (can save 60-80% of I/O on large datasets)
combined_long <- data.frame() if (!is.null(date_filter_min) && !is.null(date_filter_max)) {
all_daily_files <- all_daily_files[
{
dates <- as.Date(tools::file_path_sans_ext(basename(all_daily_files)), format = "%Y-%m-%d")
!is.na(dates) & dates >= date_filter_min & dates <= date_filter_max
}
]
safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files)))
}
for (file in all_daily_files) { # Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking)
tryCatch({ # Automatically detect available cores and limit to reasonable number
n_cores <- min(parallel::detectCores() - 1, 8) # Use max 8 cores (diminishing returns after)
future::plan(strategy = future::multisession, workers = n_cores)
safe_log(sprintf("Using %d parallel workers for file I/O", n_cores))
# Parallel file reading: future_map_dfr processes each file in parallel
# Returns combined dataframe directly (no need to rbind)
combined_long <- furrr::future_map_dfr(
all_daily_files,
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE),
function(file) {
# Extract date from filename: {YYYY-MM-DD}.rds # Extract date from filename: {YYYY-MM-DD}.rds
filename <- basename(file) filename <- basename(file)
date_str <- tools::file_path_sans_ext(filename) date_str <- tools::file_path_sans_ext(filename)
# Parse date - handle various formats # Parse date
parsed_date <- NA
if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) { if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
parsed_date <- as.Date(date_str, format = "%Y-%m-%d") parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
} else { } else {
safe_log(sprintf("Warning: Could not parse date from filename: %s", filename), "WARNING") return(data.frame()) # Return empty dataframe if parse fails
next
} }
if (is.na(parsed_date)) { if (is.na(parsed_date)) {
safe_log(sprintf("Warning: Invalid date parsed from: %s", filename), "WARNING") return(data.frame())
next
} }
# Read RDS file # Read RDS file
rds_data <- tryCatch({ tryCatch({
readRDS(file) rds_data <- readRDS(file)
if (is.null(rds_data) || nrow(rds_data) == 0) {
return(data.frame())
}
# Add date column to the data
rds_data %>%
dplyr::mutate(Date = parsed_date)
}, error = function(e) { }, error = function(e) {
safe_log(sprintf("Error reading RDS file %s: %s", file, e$message), "WARNING") return(data.frame()) # Return empty dataframe on error
return(NULL)
}) })
}
if (is.null(rds_data) || nrow(rds_data) == 0) { )
next
} # Return to sequential processing to avoid nested parallelism
future::plan(future::sequential)
# Add date column to the data
rds_data <- rds_data %>%
dplyr::mutate(Date = parsed_date)
combined_long <- rbind(combined_long, rds_data)
}, error = function(e) {
safe_log(sprintf("Error processing file %s: %s", file, e$message), "WARNING")
})
}
if (nrow(combined_long) == 0) { if (nrow(combined_long) == 0) {
safe_log("Warning: No valid CI data loaded from daily files", "WARNING") safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
return(data.frame()) return(data.frame())
} }
# OPTIMIZATION: Use data.table for fast filtering (10-20x faster than dplyr on large datasets)
# Reshape to long format using ci_mean as the main CI value # Reshape to long format using ci_mean as the main CI value
# Only keep rows where ci_mean has valid data DT <- data.table::as.data.table(combined_long)
pivot_stats_long <- combined_long %>% DT <- DT[, .(field, sub_field, ci_mean, Date)]
dplyr::select(field, sub_field, ci_mean, Date) %>% DT[, c("value") := list(as.numeric(ci_mean))]
dplyr::rename(value = ci_mean) %>% DT[, ci_mean := NULL]
dplyr::mutate(value = as.numeric(value)) %>%
# Keep rows even if ci_mean is NA or 0 (might be valid), but drop if Date is missing # Fast filtering without .distinct() (which is slow on large datasets)
tidyr::drop_na(Date) %>% # Keep rows where Date is valid, field/sub_field exist, and value is finite
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% DT <- DT[!is.na(Date) & !is.na(sub_field) & !is.na(field) & is.finite(value)]
dplyr::filter(!is.infinite(value)) %>%
dplyr::distinct() # Convert back to tibble for compatibility with rest of pipeline
pivot_stats_long <- dplyr::as_tibble(DT)
safe_log(sprintf("Loaded %d CI data points from %d daily files", safe_log(sprintf("Loaded %d CI data points from %d daily files",
nrow(pivot_stats_long), length(all_daily_files))) nrow(pivot_stats_long), length(all_daily_files)))
@ -194,6 +230,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
} }
#' Generate interpolated CI data for all fields and seasons #' Generate interpolated CI data for all fields and seasons
#' PARALLELIZE: Processes fields in parallel using furrr::future_map_df()
#' #'
#' @param years Vector of years to process #' @param years Vector of years to process
#' @param harvesting_data Dataframe with harvesting information #' @param harvesting_data Dataframe with harvesting information
@ -227,40 +264,50 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
return(data.frame()) return(data.frame())
} }
# Initialize progress bar for this year
total_fields <<- total_fields + length(valid_sub_fields) total_fields <<- total_fields + length(valid_sub_fields)
pb <- txtProgressBar(min = 0, max = length(valid_sub_fields), style = 3, width = 50) safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields)))
counter <- 0
# Extract and interpolate data for each valid field with progress bar # Set up parallel future plan for field interpolation
result_list <- list() # Allocate 1 core per ~100 fields (with minimum 2 cores)
for (field in valid_sub_fields) { n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100)))
counter <- counter + 1 future::plan(strategy = future::multisession, workers = n_cores)
setTxtProgressBar(pb, counter)
# PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently)
# Call with verbose=FALSE to suppress warnings during progress bar iteration result_list <- furrr::future_map(
field_result <- extract_CI_data(field, valid_sub_fields,
harvesting_data = harvesting_data, .progress = TRUE,
field_CI_data = ci_data, .options = furrr::furrr_options(seed = TRUE),
season = yr, function(field) {
verbose = FALSE) # Call with verbose=FALSE to suppress warnings during parallel iteration
extract_CI_data(field,
harvesting_data = harvesting_data,
field_CI_data = ci_data,
season = yr,
verbose = FALSE)
}
)
# Return to sequential processing
future::plan(future::sequential)
# Process results and tracking
for (i in seq_along(result_list)) {
field_result <- result_list[[i]]
field_name <- valid_sub_fields[i]
if (nrow(field_result) > 0) { if (nrow(field_result) > 0) {
successful_fields <<- successful_fields + 1 successful_fields <<- successful_fields + 1
result_list[[field]] <- field_result
} else { } else {
# Track failed field
failed_fields[[length(failed_fields) + 1]] <<- list( failed_fields[[length(failed_fields) + 1]] <<- list(
field = field, field = field_name,
season = yr, season = yr,
reason = "Unable to generate interpolated data" reason = "Unable to generate interpolated data"
) )
} }
} }
close(pb)
cat("\n") # Newline after progress bar
# Combine all results for this year # Combine all results for this year
result_list <- result_list[sapply(result_list, nrow) > 0] # Keep only non-empty
if (length(result_list) > 0) { if (length(result_list) > 0) {
purrr::list_rbind(result_list) purrr::list_rbind(result_list)
} else { } else {

View file

@ -60,6 +60,12 @@ suppressPackageStartupMessages({
library(tidyverse) # For dplyr (data wrangling, grouping, mutating) library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction) library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases) library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
# Parallel processing (Windows PSOCK + Mac/Linux fork-safe)
library(future) # For setting up parallel execution plans
library(furrr) # For future_map_dfr (parallel file I/O and field processing)
library(parallel) # For detectCores (automatic CPU detection)
library(data.table) # For fast filtering on large datasets
}) })
# ============================================================================= # =============================================================================
@ -110,23 +116,24 @@ main <- function() {
safe_log("Starting CI growth model interpolation") safe_log("Starting CI growth model interpolation")
# Set up data directory paths
data_dir <- setup$data_dir
# Load and process the data # Load and process the data
tryCatch({ tryCatch({
# Load the combined CI data (created by Script 20 per-field) # Load the combined CI data (created by Script 20 per-field)
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds # Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
CI_data <- load_combined_ci_data(daily_vals_dir) # OPTIMIZATION: Pass harvest data to pre-filter by date range (skip unnecessary files)
# Load harvesting data from harvest.xlsx for growth model phase assignment
# Use the centralized load_harvesting_data() function which handles NA season_end values
# by setting them to Sys.Date() (field is still in current growing season)
data_dir <- setup$data_dir
harvesting_data <- tryCatch({ harvesting_data <- tryCatch({
load_harvesting_data(data_dir) load_harvesting_data(data_dir)
}, error = function(e) { }, error = function(e) {
safe_log(paste("Error loading harvest data:", e$message), "WARNING") safe_log(paste("Error loading harvest data for pre-filtering:", e$message), "WARNING")
NULL NULL
}) })
# Load CI data with date range pre-filtering
CI_data <- load_combined_ci_data(daily_vals_dir, harvesting_data = harvesting_data)
# Validate harvesting data # Validate harvesting data
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
safe_log("No harvesting data available", "ERROR") safe_log("No harvesting data available", "ERROR")

View file

@ -139,6 +139,7 @@ suppressPackageStartupMessages({
library(readr) # For reading CSV files (harvest predictions from Python) library(readr) # For reading CSV files (harvest predictions from Python)
library(readxl) # For reading harvest.xlsx (harvest dates for field mapping) library(readxl) # For reading harvest.xlsx (harvest dates for field mapping)
library(writexl) # For writing Excel outputs (KPI summary tables) library(writexl) # For writing Excel outputs (KPI summary tables)
library(progress) # For progress bars during field processing
# ML/Analysis (optional - only for harvest model inference) # ML/Analysis (optional - only for harvest model inference)
tryCatch({ tryCatch({
@ -573,8 +574,10 @@ main <- function() {
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
# Load weekly harvest probabilities from script 31 (if available) # Load weekly harvest probabilities from script 31 (if available)
# Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
message("\n4. Loading harvest probabilities from script 31...") message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats", harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
harvest_prob_file <- file.path(harvest_prob_dir,
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year)) sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
message(paste(" Looking for:", harvest_prob_file)) message(paste(" Looking for:", harvest_prob_file))
@ -846,7 +849,7 @@ main <- function() {
total_acreage = sum(field_data$Acreage, na.rm = TRUE), total_acreage = sum(field_data$Acreage, na.rm = TRUE),
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4), mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
week = current_week, week = current_week,
year = year, year = year,
date = as.character(end_date) date = as.character(end_date)

View file

@ -605,7 +605,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
NULL NULL
} }
output_subdir <- file.path(reports_dir, "kpis", "field_analysis") output_subdir <- file.path(reports_dir, "field_analysis")
if (!dir.exists(output_subdir)) { if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE) dir.create(output_subdir, recursive = TRUE)
} }
@ -637,7 +637,7 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
) )
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(reports_dir, "kpis", rds_filename) rds_path <- file.path(reports_dir, rds_filename)
saveRDS(kpi_data, rds_path) saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path)) message(paste("✓ Field analysis RDS exported to:", rds_path))
@ -683,8 +683,16 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num)) message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
results_list <- list() results_list <- list()
# Initialize progress bar
pb <- progress::progress_bar$new(
format = " [:bar] :percent | Field :current/:total",
total = length(per_field_files),
width = 60
)
# Process each field's mosaic # Process each field's mosaic
for (field_idx in seq_along(per_field_files)) { for (field_idx in seq_along(per_field_files)) {
pb$tick() # Update progress bar
field_name <- names(per_field_files)[field_idx] field_name <- names(per_field_files)[field_idx]
field_file <- per_field_files[[field_name]] field_file <- per_field_files[[field_name]]
@ -751,8 +759,6 @@ calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
message(paste(" Field", field_idx, "of", length(per_field_files), "processed"))
}, error = function(e) { }, error = function(e) {
message(paste(" [ERROR] Field", field_name, ":", e$message)) message(paste(" [ERROR] Field", field_name, ":", e$message))
}) })
@ -773,7 +779,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
mosaic_dir, reports_dir, report_date = Sys.Date()) { mosaic_dir, reports_dir, report_date = Sys.Date()) {
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) rds_path <- file.path(reports_dir, "field_stats", rds_filename)
if (file.exists(rds_path)) { if (file.exists(rds_path)) {
message(paste("Loading cached statistics from:", basename(rds_path))) message(paste("Loading cached statistics from:", basename(rds_path)))
@ -783,7 +789,7 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date) stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date)
output_dir <- file.path(reports_dir, "kpis", "field_stats") output_dir <- file.path(reports_dir, "field_stats")
if (!dir.exists(output_dir)) { if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
} }
@ -812,7 +818,7 @@ load_historical_field_data <- function(project_dir, current_week, current_year,
target_year <- target$year target_year <- target$year
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv") csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) csv_path <- file.path(reports_dir, "field_analysis", csv_filename)
if (file.exists(csv_path)) { if (file.exists(csv_path)) {
tryCatch({ tryCatch({
@ -867,7 +873,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
prev_field_analysis <- NULL prev_field_analysis <- NULL
tryCatch({ tryCatch({
analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") analysis_dir <- file.path(reports_dir, "field_analysis")
if (dir.exists(analysis_dir)) { if (dir.exists(analysis_dir)) {
analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
if (length(analysis_files) > 0) { if (length(analysis_files) > 0) {
@ -899,7 +905,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
} }
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) rds_path <- file.path(reports_dir, "field_stats", rds_filename)
if (file.exists(rds_path)) { if (file.exists(rds_path)) {
tryCatch({ tryCatch({
@ -920,7 +926,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
} }
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) rds_path <- file.path(reports_dir, "field_stats", rds_filename)
if (file.exists(rds_path)) { if (file.exists(rds_path)) {
tryCatch({ tryCatch({

View file

@ -76,12 +76,19 @@
# python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup # python 00_download_8band_pu_optimized.py angata --date 2026-02-04 --resolution 3 --cleanup
# #
# COMMAND #2 - Batch Download (Multiple Dates): # COMMAND #2 - Batch Download (Multiple Dates):
# For date ranges, MUST use download_planet_missing_dates.py (not Script 00)
# #
# python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT] # python download_planet_missing_dates.py --start [START_DATE] --end [END_DATE] --project [PROJECT]
# #
# Example: # Example:
# python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata # python download_planet_missing_dates.py --start 2026-01-28 --end 2026-02-04 --project angata
# #
# IMPORTANT DISTINCTION:
# - Script 00 (00_download_8band_pu_optimized.py): Only supports --date flag for SINGLE dates
# - Script download_planet_missing_dates.py: Supports --start/--end for DATE RANGES
# Script 00 does NOT have --start/--end flags despite documentation suggestion
# Use the correct script for your use case!
#
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file) # laravel_app/storage/app/angata/merged_tif/{YYYY-MM-DD}.tif (~150-300 MB per file)
# #
@ -110,15 +117,27 @@
# - One TIFF per field per date (1185 fields × N dates in Angata) # - One TIFF per field per date (1185 fields × N dates in Angata)
# #
# PARAMETERS: # PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba # PROJECT: angata, chemba, xinavane, esa, simba (default: angata)
# END_DATE: YYYY-MM-DD format (e.g., 2026-02-09, default: today)
# OFFSET: Days to look back (e.g., 7 for one week, default: 7)
# #
# COMMAND: # COMMAND #1 - Default (All dates, current date, 7-day window):
# #
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
# #
# Example: # Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
# #
# COMMAND #2 - Specific Date Range:
#
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R [PROJECT] [END_DATE] [OFFSET]
#
# Example (one week back from 2026-02-09):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 7
#
# Example (two weeks back from 2026-02-09):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-09 14
#
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files) # Total files created: #fields × #dates (e.g., 1185 × 8 = 9,480 files)
# Storage location: laravel_app/storage/app/angata/field_tiles/ # Storage location: laravel_app/storage/app/angata/field_tiles/
@ -157,7 +176,7 @@
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET] # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R [PROJECT] [END_DATE] [OFFSET]
# #
# Example: # Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7 # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-09 7
# #
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/ # Total files created: #fields × #dates in both field_tiles_CI/ and daily_vals/
@ -170,12 +189,6 @@
# Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates) # Example: END_DATE=2026-02-04, OFFSET=7 → processes 2026-01-28 to 2026-02-04 (8 dates)
# To process all existing merged_tif files: Use large OFFSET (e.g., 365) # To process all existing merged_tif files: Use large OFFSET (e.g., 365)
# #
# TROUBLESHOOTING:
# ❌ If field_tiles_CI has fewer files than field_tiles:
# - Check if all field_tiles/{FIELD}/{DATE}.tif files exist
# - Script 20 may be skipping due to incomplete source files
# - Solution: Delete problematic files from field_tiles and re-run Script 10
#
# ============================================================================ # ============================================================================
@ -208,7 +221,6 @@
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# File: All_pivots_Cumulative_CI_quadrant_year_v2.rds # File: All_pivots_Cumulative_CI_quadrant_year_v2.rds
# Contains: Interpolated CI data for all fields (wide format) # Contains: Interpolated CI data for all fields (wide format)
# Script execution time: 5-15 minutes
# #
# ============================================================================ # ============================================================================
@ -243,7 +255,6 @@
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# File: ci_data_for_python.csv (~5-10 MB) # File: ci_data_for_python.csv (~5-10 MB)
# Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows) # Rows: #fields × #dates (e.g., 1185 × 100 = ~118,500 rows)
# Script execution time: 1-2 minutes
# #
# ============================================================================ # ============================================================================
@ -283,7 +294,6 @@
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv # File: {PROJECT}_harvest_imminent_week_{WW}_{YYYY}.csv
# Rows: One per field (e.g., 1185 rows for Angata) # Rows: One per field (e.g., 1185 rows for Angata)
# Script execution time: 2-5 minutes
# #
# NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete # NOTE: Skip this step if harvest.xlsx doesn't exist or is incomplete
# #
@ -319,9 +329,6 @@
# Example (one week window): # Example (one week window):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
# #
# Example (two week window):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 14 angata
#
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# Location: laravel_app/storage/app/angata/weekly_mosaic/ # Location: laravel_app/storage/app/angata/weekly_mosaic/
# Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif # Directory structure: weekly_mosaic/{FIELD_ID}/week_06_2026.tif
@ -360,23 +367,23 @@
# - 21 columns with field-level KPIs and alerts # - 21 columns with field-level KPIs and alerts
# #
# PARAMETERS: # PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba # END_DATE: Report date in YYYY-MM-DD format (default: today)
# WEEK: ISO week number (1-53, optional - default current week) # PROJECT: Project name: angata, chemba, xinavane, esa, simba (default: angata)
# YEAR: ISO year (optional - default current year) # OFFSET: Days to look back for historical comparison (default: 7, for backward compatibility)
# #
# COMMAND #1 - Current Week (Auto-detects from TODAY): # COMMAND #1 - Current Date & Default Project (Auto-detects TODAY):
# #
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
# #
# Example: # Example:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R
# #
# COMMAND #2 - Specific Week & Year: # COMMAND #2 - Specific Date & Project:
# #
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [PROJECT] [WEEK] [YEAR] # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
# #
# Example (Week 5, Year 2026): # Example (2026-02-09, angata, 7-day lookback):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata 5 2026 # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7
# #
# EXPECTED OUTPUT: # EXPECTED OUTPUT:
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx # File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
@ -390,6 +397,11 @@
# tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence, # tcch_forecast, growth_4wk, growth_8wk, trend_indicator, weed_presence,
# spatial_cluster, alert_urgency, alert_type, alert_message, etc. # spatial_cluster, alert_urgency, alert_type, alert_message, etc.
# #
# CRITICAL DIFFERENCE - R80 Uses Different Argument Order Than R40:
# R40 order: [END_DATE] [OFFSET] [PROJECT]
# R80 order: [END_DATE] [PROJECT] [OFFSET]
# These are NOT the same! Ensure correct order for each script.
#
# ============================================================================ # ============================================================================
@ -469,12 +481,15 @@
# #
# Steps: # Steps:
# 1. SKIP Python download (if you already have data) # 1. SKIP Python download (if you already have data)
# 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata # 2. Run R10: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 7
# (Argument order: [PROJECT] [END_DATE] [OFFSET])
# 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7 # 3. Run R20: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 7
# 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata # 4. Run R30: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/30_interpolate_growth_model.R angata
# 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata # 5. Run R21: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/21_convert_ci_rds_to_csv.R angata
# 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata # 6. Run R40: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation_per_field.R 2026-02-04 7 angata
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R angata # (Argument order: [END_DATE] [OFFSET] [PROJECT])
# 7. Run R80: & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-04 angata 7
# (Argument order: [END_DATE] [PROJECT] [OFFSET] - DIFFERENT from R40!)
# 8. OPTIONAL R91 (Cane Supply) - Use automated runner: # 8. OPTIONAL R91 (Cane Supply) - Use automated runner:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/run_full_pipeline.R
# OR from R console: # OR from R console:
@ -492,7 +507,9 @@
# #
# Steps: # Steps:
# 1. Python download (your entire date range) # 1. Python download (your entire date range)
# 2. Run R10 once (processes all dates) # 2. Run R10 with large offset to process all historical dates:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata 2026-02-04 365
# (This processes from 2025-02-04 to 2026-02-04, covering entire year)
# 3. Run R20 with large offset to process all historical dates: # 3. Run R20 with large offset to process all historical dates:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365 # & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/20_ci_extraction_per_field.R angata 2026-02-04 365
# (This processes from 2025-02-04 to 2026-02-04, covering entire year) # (This processes from 2025-02-04 to 2026-02-04, covering entire year)
@ -611,3 +628,4 @@
# laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx # laravel_app/storage/app/{PROJECT}/output/SmartCane_Report_week{WW}_{YYYY}.docx
# #
# ============================================================================== # ==============================================================================