works for angata and aura

This commit is contained in:
Timon 2026-01-29 16:04:04 +01:00
parent dd915f9b9e
commit 4445f72e6f
11 changed files with 2566 additions and 273 deletions

View file

@ -123,9 +123,72 @@ main <- function() {
# 4. Generate date list for processing
# ---------------------------------
dates <- date_list(end_date, 7)
dates <- date_list(end_date, offset)
log_message(paste("Processing data for week", dates$week, "of", dates$year))
# 4a. CHECK DAILY CI EXTRACTION - Skip dates that already have extracted files
# -------------------------------------------------------------------------
log_message("\n===== CHECKING DAILY CI EXTRACTION STATUS =====")
# Check which dates already have extracted CI files
already_extracted <- c()
missing_extraction <- c()
if (dir.exists(daily_CI_vals_dir)) {
existing_ci_files <- list.files(daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$")
# Extract dates from filenames like "extracted_2025-12-31_quadrant.rds"
already_extracted <- sub("^extracted_(.+)_.*\\.rds$", "\\1", existing_ci_files)
}
# Find which dates in our processing range need extraction
missing_extraction <- dates$days_filter[!(dates$days_filter %in% already_extracted)]
cat(sprintf("[CI CHECK] Already extracted: %d dates\n", length(already_extracted)))
cat(sprintf("[CI CHECK] Need extraction: %d dates (from %s to %s)\n",
length(missing_extraction),
if(length(missing_extraction) > 0) min(missing_extraction) else "N/A",
if(length(missing_extraction) > 0) max(missing_extraction) else "N/A"))
# If any dates need extraction, we'll extract them
# If NO dates need extraction, we'll skip extraction but ALWAYS rebuild combined_CI_data.rds
skip_extraction <- (length(missing_extraction) == 0)
if (skip_extraction) {
log_message("✓ All dates in processing range already have extracted CI files - skipping extraction")
log_message("⚠ Will rebuild combined_CI_data.rds to ensure completeness")
}
# 4b. CHECK SOURCE DATA AVAILABILITY
# ---------------------------------------------------------------
# Verify that source data exists for dates we're going to extract
# If a date is missing from source, we'll skip it gracefully
log_message("\n===== CHECKING SOURCE DATA AVAILABILITY =====")
dates_with_source <- c()
dates_missing_source <- c()
if (!skip_extraction && length(missing_extraction) > 0) {
# Check which source dates are actually available
for (date_str in missing_extraction) {
# Look for the date in merged_tif directory
source_file_pattern <- sprintf("%s\\.tif$", date_str)
files_for_date <- list.files(planet_tif_folder, pattern = source_file_pattern)
if (length(files_for_date) > 0) {
dates_with_source <- c(dates_with_source, date_str)
} else {
dates_missing_source <- c(dates_missing_source, date_str)
}
}
cat(sprintf("[SOURCE CHECK] Dates with available source data: %d\n", length(dates_with_source)))
cat(sprintf("[SOURCE CHECK] Dates missing from source (will skip): %d\n", length(dates_missing_source)))
if (length(dates_missing_source) > 0) {
log_message(paste("⚠ Skipping extraction for missing source dates:", paste(dates_missing_source, collapse = ", ")))
}
}
# 5. Find and filter raster files by date - with grid size detection
# -----------------------------------
log_message("Searching for raster files")
@ -159,7 +222,9 @@ main <- function() {
assign("grid_size", grid_size, envir = .GlobalEnv)
tryCatch({
if (use_tiles) {
if (skip_extraction) {
log_message("\n===== SKIPPING CI EXTRACTION (all dates already processed) =====")
} else if (use_tiles) {
# Use tile-based processing
log_message(paste("Tile folder detected at", tile_folder))
log_message("Using tile-based CI extraction")
@ -196,6 +261,56 @@ main <- function() {
log_message(paste("Error in main processing:", e$message), level = "ERROR")
stop(e$message)
})
# 6. REBUILD combined_CI_data.rds from ALL daily extracted files
# -----------------------------------------------
# This ensures the combined file is complete and up-to-date
# even if extraction was skipped (because dates already existed)
# NOTE: Only rebuild if new dates were successfully extracted
# If all dates were missing from source, skip this step to avoid corrupting the file
log_message("\n===== HANDLING combined_CI_data.rds =====")
if (length(dates_with_source) == 0 && length(missing_extraction) > 0) {
# All missing dates had no source data - skip combined_CI_data.rds update
log_message("⚠ No new dates extracted (all source data missing) - skipping combined_CI_data.rds update")
} else if (skip_extraction) {
# All dates already extracted - optionally rebuild for consistency
log_message("✓ All dates already extracted - combined_CI_data.rds is up-to-date")
} else {
# New dates were extracted - rebuild combined_CI_data.rds from ALL daily files
log_message("Rebuilding combined_CI_data.rds from all daily extracted files...")
tryCatch({
if (!dir.exists(daily_CI_vals_dir)) {
log_message("Daily CI directory does not exist yet", level = "WARNING")
} else {
# List ALL daily CI files (not just new ones)
all_daily_files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE)
if (length(all_daily_files) == 0) {
log_message("No daily CI files found to combine", level = "WARNING")
} else {
log_message(paste("Combining all", length(all_daily_files), "daily CI files into combined_CI_data.rds"))
# Load and combine ALL daily files (creates complete dataset)
combined_ci_path <- file.path(cumulative_CI_vals_dir, "combined_CI_data.rds")
combined_data <- all_daily_files %>%
purrr::map(readRDS) %>%
purrr::list_rbind() %>%
dplyr::group_by(sub_field)
# Save the rebuilt combined data
saveRDS(combined_data, combined_ci_path)
log_message(paste("✓ Rebuilt combined_CI_data.rds with", nrow(combined_data), "total rows"))
}
}
}, error = function(e) {
log_message(paste("⚠ Error rebuilding combined_CI_data.rds (will skip):", e$message), level = "WARNING")
log_message(" Note: This is OK - Script 30 will use growth model RDS instead", level = "WARNING")
})
}
}
if (sys.nframe() == 0) {

View file

@ -6,9 +6,10 @@
# to create a continuous growth model. It generates daily values and cumulative
# CI statistics for each field.
#
# Usage: Rscript interpolate_growth_model.R [project_dir]
# Usage: Rscript interpolate_growth_model.R [project_dir] [data_source]
# - project_dir: Project directory name (e.g., "chemba")
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata
# - data_source: (Optional) Data source directory - "merged_tif" (default), "merged_tif_8b"
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/30_interpolate_growth_model.R angata merged_tif
# 1. Load required packages
# -----------------------
@ -34,8 +35,18 @@ main <- function() {
message("No project_dir provided. Using default:", project_dir)
}
# Make project_dir available globally so parameters_project.R can use it
# Get data_source from arguments (for consistency with Script 20)
if (length(args) >= 2 && !is.na(args[2])) {
data_source <- as.character(args[2])
} else if (exists("data_source", envir = .GlobalEnv)) {
data_source <- get("data_source", envir = .GlobalEnv)
} else {
data_source <- "merged_tif" # Default to 4-band (most common for existing projects)
}
# Make project_dir and data_source available globally so parameters_project.R can use it
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
ci_extraction_script <- TRUE

View file

@ -5,17 +5,17 @@
# This script creates weekly mosaics from daily satellite imagery.
# It handles command-line arguments and initiates the mosaic creation process.
#
# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [use_tiles] [tile_size]
# Usage: Rscript mosaic_creation.R [end_date] [offset] [project_dir] [file_name] [data_source]
# - end_date: End date for processing (YYYY-MM-DD format)
# - offset: Number of days to look back from end_date
# - project_dir: Project directory name (e.g., "chemba")
# - file_name: Optional custom output file name
# - use_tiles: Use tile-based processing for memory efficiency (TRUE/FALSE, default: FALSE)
# - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE)
# - offset: Number of days to look back from end_date (typically 7 for one week)
# - project_dir: Project directory name (e.g., "aura", "angata", "chemba", "esa")
# - file_name: Optional custom output file name (leave empty "" to use default: week_WW_YYYY.tif)
# - data_source: Optional data source folder (e.g., "merged_tif" or "merged_tif_8b")
# If not provided, auto-detects which folder contains actual data
#
# Examples:
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 aura
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2025-12-24 7 aura "" "merged_tif"
#
# 1. Load required packages
@ -77,20 +77,57 @@ main <- function() {
message("No offset provided. Using default:", offset, "days")
}
# Process data_source argument (optional, passed from pipeline)
# If provided, use it; otherwise auto-detect
data_source_from_args <- NULL
if (length(args) >= 5 && !is.na(args[5]) && nchar(args[5]) > 0) {
data_source_from_args <- as.character(args[5])
message("Data source explicitly provided via arguments: ", data_source_from_args)
}
# 3. Initialize project configuration
# --------------------------------
# Detect which data source directory exists (merged_tif or merged_tif_8b)
# IMPORTANT: Only consider a folder as valid if it contains actual files
laravel_storage <- here::here("laravel_app/storage/app", project_dir)
data_source <- if (dir.exists(file.path(laravel_storage, "merged_tif_8b"))) {
message("Detected data source: merged_tif_8b (8-band optimized)")
"merged_tif_8b"
} else if (dir.exists(file.path(laravel_storage, "merged_tif"))) {
message("Detected data source: merged_tif (legacy 4-band)")
"merged_tif"
} else {
message("Warning: No data source found. Using default: merged_tif_8b")
"merged_tif_8b"
# If data_source was explicitly provided from pipeline, validate it; otherwise auto-detect
if (!is.null(data_source_from_args)) {
# Use the provided data_source, but verify it has data
proposed_path <- file.path(laravel_storage, data_source_from_args)
has_data <- dir.exists(proposed_path) && length(list.files(proposed_path, pattern = "\\.tif$")) > 0
if (has_data) {
data_source <- data_source_from_args
message("✓ Using provided data source '", data_source, "' - contains files")
} else {
message("WARNING: Provided data source '", data_source_from_args, "' is empty or doesn't exist. Auto-detecting...")
data_source_from_args <- NULL # Fall through to auto-detection
}
}
# Auto-detect if no valid data_source was provided
if (is.null(data_source_from_args)) {
# Check merged_tif_8b - only if it exists AND contains files
merged_tif_8b_path <- file.path(laravel_storage, "merged_tif_8b")
has_8b_data <- dir.exists(merged_tif_8b_path) && length(list.files(merged_tif_8b_path, pattern = "\\.tif$")) > 0
# Check merged_tif - only if it exists AND contains files
merged_tif_path <- file.path(laravel_storage, "merged_tif")
has_legacy_data <- dir.exists(merged_tif_path) && length(list.files(merged_tif_path, pattern = "\\.tif$")) > 0
# Select data source based on what has actual data
data_source <- if (has_8b_data) {
message("Auto-detected data source: merged_tif_8b (8-band optimized) - contains files")
"merged_tif_8b"
} else if (has_legacy_data) {
message("Auto-detected data source: merged_tif (legacy 4-band) - contains files")
"merged_tif"
} else {
message("Warning: No valid data source found (both folders empty or missing). Using default: merged_tif")
"merged_tif"
}
}
# Set global data_source for parameters_project.R
@ -112,13 +149,30 @@ main <- function() {
})
})
# Extract path variables from global environment (set by parameters_project.R)
merged_final <- if (exists("merged_final", envir = .GlobalEnv)) {
get("merged_final", envir = .GlobalEnv)
} else {
file.path(laravel_storage, "merged_final_tif")
}
daily_vrt <- if (exists("daily_vrt", envir = .GlobalEnv)) {
get("daily_vrt", envir = .GlobalEnv)
} else {
file.path(laravel_storage, "Data", "vrt")
}
safe_log(paste("Using merged_final_tif directory:", merged_final))
safe_log(paste("Using daily VRT directory:", daily_vrt))
# 4. Generate date range for processing
# ---------------------------------
dates <- date_list(end_date, offset)
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
# Create output filename
file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) {
# Only use custom filename if explicitly provided (not empty string)
file_name_tif <- if (length(args) >= 4 && !is.na(args[4]) && nchar(args[4]) > 0) {
as.character(args[4])
} else {
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")

View file

@ -157,7 +157,9 @@ create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir,
#'
find_vrt_files <- function(vrt_directory, dates) {
# Get all VRT files in directory
vrt_files <- list.files(here::here(vrt_directory), full.names = TRUE)
# Note: vrt_directory is already a full/relative path from parameters_project.R
# Don't wrap it in here::here() again - that would create an incorrect path
vrt_files <- list.files(vrt_directory, full.names = TRUE)
if (length(vrt_files) == 0) {
warning("No VRT files found in directory: ", vrt_directory)

View file

@ -228,7 +228,7 @@ main <- function() {
message(strrep("=", 70))
message("Date:", format(end_date, "%Y-%m-%d"))
message("Project:", project_dir)
message("Mode: Per-field analysis (SC-64) + Farm-level KPIs")
message("Mode: Conditional KPI calculation based on client type")
message("")
# Load configuration and utilities
@ -240,6 +240,14 @@ main <- function() {
stop("Error loading parameters_project.R: ", e$message)
})
# DETERMINE CLIENT TYPE AND KPI CONFIGURATION
client_type <- get_client_type(project_dir)
client_config <- get_client_kpi_config(client_type)
message("Client Type:", client_type)
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
# Define paths for mosaic detection (used in PHASE 1)
base_project_path <- file.path("laravel_app", "storage", "app", project_dir)
weekly_tile_max <- file.path(base_project_path, "weekly_tile_max")
@ -251,18 +259,77 @@ main <- function() {
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
})
# ========== PER-FIELD ANALYSIS (SC-64) ==========
# CONDITIONAL EXECUTION BASED ON CLIENT TYPE
# ============================================
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
message(strrep("-", 70))
if (client_config$script_90_compatible && "kpi_summary_tables" %in% client_config$outputs) {
# AURA WORKFLOW: Run 6 farm-level KPIs for Script 90 compatibility
message("\n", strrep("=", 70))
message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
message(strrep("=", 70))
# Load 80_kpi_utils.R with all 6 KPI functions
# (Note: 80_kpi_utils.R includes all necessary helper functions from crop_messaging_utils.R)
tryCatch({
source(here("r_app", "80_kpi_utils.R"))
}, error = function(e) {
stop("Error loading 80_kpi_utils.R: ", e$message)
})
# Prepare inputs for KPI calculation
reports_dir_kpi <- file.path(base_project_path, "reports", "kpis")
if (!dir.exists(reports_dir_kpi)) {
dir.create(reports_dir_kpi, recursive = TRUE)
}
cumulative_CI_vals_dir <- file.path(base_project_path, "combined_CI")
# Load field boundaries and harvesting data (already loaded by parameters_project.R)
if (!exists("field_boundaries_sf")) {
stop("field_boundaries_sf not loaded. Check parameters_project.R initialization.")
}
if (!exists("harvesting_data")) {
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
}
# Calculate all 6 KPIs
kpi_results <- calculate_all_kpis(
report_date = end_date,
output_dir = reports_dir_kpi,
field_boundaries_sf = field_boundaries_sf,
harvesting_data = harvesting_data,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
weekly_CI_mosaic = weekly_mosaic,
reports_dir = reports_dir_kpi,
project_dir = project_dir
)
cat("\n=== AURA KPI CALCULATION COMPLETE ===\n")
cat("Summary tables saved for Script 90 integration\n")
cat("Output directory:", reports_dir_kpi, "\n\n")
} else if (client_config$script_91_compatible && "field_analysis_excel" %in% client_config$outputs) {
# CANE_SUPPLY WORKFLOW: Run per-field analysis with phase assignment
message("\n", strrep("=", 70))
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70))
# Continue with existing per-field analysis code below
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
message(strrep("-", 70))
current_week <- as.numeric(format(end_date, "%V")) # ISO week number (1-53)
year <- as.numeric(format(end_date, "%G")) # Use ISO week year (%G) to match Script 40's mosaic naming
current_week <- as.numeric(format(end_date, "%V"))
year <- as.numeric(format(end_date, "%Y"))
previous_week <- current_week - 1
if (previous_week < 1) previous_week <- 52
# Calculate previous week using authoritative helper (handles year boundaries correctly)
source("r_app/80_weekly_stats_utils.R") # Load helper function
previous_info <- calculate_target_week_and_year(current_week, year, offset_weeks = 1)
previous_week <- previous_info$week
previous_year <- previous_info$year
message(paste("Week:", current_week, "/ Year:", year))
message(paste("Week:", current_week, "/ Year (ISO):", year))
# Find mosaic files - support both tile-based AND single-file approaches
message("Finding mosaic files...")
@ -337,7 +404,7 @@ main <- function() {
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir,
historical_data <- load_historical_field_data(project_dir, current_week, year, reports_dir,
num_weeks = num_weeks_to_load,
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf)
@ -437,7 +504,7 @@ main <- function() {
prev_stats <- load_or_calculate_weekly_stats(
week_num = previous_week,
year = year,
year = previous_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = tile_grid$mosaic_dir,
@ -780,6 +847,14 @@ main <- function() {
cat(" - Per-field data exported\n")
cat(" - Farm-level KPIs calculated\n")
cat(" - All outputs in:", reports_dir, "\n\n")
} else {
# Unknown client type - log warning and exit
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
cat("\n⚠ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
}
}
if (sys.nframe() == 0) {

1417
r_app/80_kpi_utils.R Normal file

File diff suppressed because it is too large Load diff

View file

@ -13,6 +13,41 @@
# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts
# ============================================================================
# ============================================================================
# WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts)
# ============================================================================
#' Calculate week and year for a given lookback offset
#' This function handles ISO 8601 week numbering with proper year wrapping
#' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025)
#'
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year (from format(..., "%G"))
#' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.)
#'
#' @return List with: week (ISO week number), year (ISO week year)
#'
#' @details
#' This is the authoritative week/year calculation function.
#' Used by:
#' - load_historical_field_data() - to find RDS/CSV files for 4-week lookback
#' - Script 80 main - to calculate previous week with year wrapping
#' - Any other script needing to walk backwards through weeks
#'
#' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025)
calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) {
target_week <- current_week - offset_weeks
target_year <- current_year
# Handle wrapping: when going back from week 1, wrap to week 52 of previous year
while (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
return(list(week = target_week, year = target_year))
}
# ============================================================================
# TILE-AWARE HELPER FUNCTIONS
# ============================================================================
@ -720,16 +755,19 @@ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_bo
return(stats_df)
}
load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) {
load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) {
historical_data <- list()
loaded_weeks <- c()
missing_weeks <- c()
for (lookback in 0:(num_weeks - 1)) {
target_week <- current_week - lookback
if (target_week < 1) target_week <- target_week + 52
# Calculate target week and year using authoritative helper (handles year boundaries)
target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback)
target_week <- target$week
target_year <- target$year
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv")
# Construct filename with BOTH week and year (proper ISO format)
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
if (file.exists(csv_path)) {
@ -737,15 +775,16 @@ load_historical_field_data <- function(project_dir, current_week, reports_dir, n
data <- read_csv(csv_path, show_col_types = FALSE)
historical_data[[lookback + 1]] <- list(
week = target_week,
year = target_year,
data = data
)
loaded_weeks <- c(loaded_weeks, target_week)
loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
}, error = function(e) {
message(paste(" Warning: Could not load week", target_week, ":", e$message))
missing_weeks <<- c(missing_weeks, target_week)
message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message))
missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
})
} else {
missing_weeks <- c(missing_weeks, target_week)
missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
}
}
@ -788,7 +827,7 @@ load_historical_field_data <- function(project_dir, current_week, reports_dir, n
expected_weeks <- data.frame(
date = target_dates,
week = as.numeric(format(target_dates, "%V")),
year = as.numeric(format(target_dates, "%Y")),
year = as.numeric(format(target_dates, "%G")),
stringsAsFactors = FALSE
)
expected_weeks <- unique(expected_weeks)

View file

@ -120,7 +120,8 @@ date_suffix <- format(as.Date(report_date), "%Y%m%d")
# Calculate current week from report_date using ISO 8601 week numbering
current_week <- as.numeric(format(as.Date(report_date), "%V"))
week_suffix <- paste0("week", current_week)
current_year <- as.numeric(format(as.Date(report_date), "%G"))
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(

View file

@ -112,15 +112,22 @@ safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
```
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
```{r load_kpi_data, message=FALSE, warning=FALSE}
## SIMPLE KPI LOADING - robust lookup with fallbacks
# First, show working directory for debugging
cat("\n=== DEBUG: R Markdown Working Directory ===\n")
cat(paste("getwd():", getwd(), "\n"))
cat(paste("Expected knit_dir from R Markdown:", knitr::opts_knit$get("root.dir"), "\n\n"))
# Primary expected directory inside the laravel storage
kpi_data_dir <- file.path("..", "laravel_app", "storage", "app", project_dir, "reports", "kpis")
date_suffix <- format(as.Date(report_date), "%Y%m%d")
# Calculate current week from report_date using ISO 8601 week numbering
current_week <- as.numeric(format(as.Date(report_date), "%V"))
week_suffix <- paste0("week", current_week)
current_year <- as.numeric(format(as.Date(report_date), "%G"))
week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
@ -171,30 +178,69 @@ if (is.null(summary_file) || is.null(field_details_file)) {
# Final checks and load with safe error messages
kpi_files_exist <- FALSE
# Debug: log what we're looking for
cat("\n=== KPI LOADING DEBUG ===\n")
cat(paste("Working directory:", getwd(), "\n"))
cat(paste("project_dir:", project_dir, "\n"))
cat(paste("report_date:", report_date, "\n"))
cat(paste("Calculated week:", current_week, "year:", current_year, "\n"))
cat(paste("Looking for KPI files in:", kpi_data_dir, "\n"))
cat(paste("Directory exists:", dir.exists(kpi_data_dir), "\n"))
cat(paste("Expected filenames to match:\n"))
for (name in expected_summary_names) cat(paste(" -", name, "\n"))
# List what's actually in the directory
if (dir.exists(kpi_data_dir)) {
actual_files <- list.files(kpi_data_dir, pattern = ".*\\.rds$", full.names = FALSE)
cat(paste("Files in KPI directory (", length(actual_files), " total):\n"))
for (f in actual_files) cat(paste(" -", f, "\n"))
} else {
cat("KPI directory does NOT exist!\n")
}
if (!is.null(summary_file) && file.exists(summary_file)) {
safe_log(paste("Loading KPI summary from:", summary_file))
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { safe_log(paste("Failed to read summary RDS:", e$message), "ERROR"); NULL })
cat(paste("✓ FOUND summary file:", summary_file, "\n"))
cat(paste(" File size:", file.size(summary_file), "bytes\n"))
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL })
# Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format
if (!is.null(summary_data)) {
cat(paste(" ✓ Loaded successfully. Class:", class(summary_data), "\n"))
if (is.list(summary_data)) {
cat(paste(" List names:", paste(names(summary_data), collapse = ", "), "\n"))
}
# Convert new RDS structure (field_analysis, field_analysis_summary) to legacy summary_tables format
if (is.list(summary_data) && !is.data.frame(summary_data)) {
# New format from 09_field_analysis_weekly.R - just pass it through
if ("field_analysis_summary" %in% names(summary_data)) {
cat(" ✓ Found field_analysis_summary in list - will use this structure\n")
# Keep the new structure intact - combined_kpi_table will use it directly
kpi_files_exist <- TRUE
} else {
cat(" ! Old format detected\n")
# Old format - keep as is
summary_tables <- summary_data
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
}
} else {
cat(" ! Data frame format\n")
# Data frame format or direct tables
summary_tables <- summary_data
if (!is.null(summary_tables)) kpi_files_exist <- TRUE
}
} else {
cat(" ✗ Failed to load RDS - summary_data is NULL\n")
}
} else {
safe_log(paste("KPI summary file not found. Searched:", paste(expected_summary_names, collapse=", ")), "WARNING")
safe_log(paste("Attempted directory:", kpi_data_dir), "WARNING")
# Try searching the entire workspace as fallback
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "kpi.*\\.rds$", recursive = TRUE, full.names = TRUE)
safe_log(paste("Found", length(files), "KPI RDS files in workspace"), "INFO")
if (length(files) > 0) {
safe_log(paste("Available files:", paste(basename(files), collapse = ", ")), "INFO")
}
}
if (!is.null(field_details_file) && file.exists(field_details_file)) {
@ -508,125 +554,185 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
## Executive Summary - Key Performance Indicators
```{r combined_kpi_table, echo=FALSE}
```{r combined_kpi_table, echo=TRUE}
# Debug: check what variables exist
cat("\n=== DEBUG: combined_kpi_table chunk ===\n")
cat(paste("exists('summary_data'):", exists("summary_data"), "\n"))
cat(paste("exists('kpi_files_exist'):", exists("kpi_files_exist"), "\n"))
if (exists("kpi_files_exist")) {
cat(paste("kpi_files_exist value:", kpi_files_exist, "\n"))
}
if (exists("summary_data")) {
cat(paste("summary_data class:", class(summary_data), "\n"))
if (is.list(summary_data)) {
cat(paste("summary_data names:", paste(names(summary_data), collapse = ", "), "\n"))
cat(paste("has field_analysis_summary:", "field_analysis_summary" %in% names(summary_data), "\n"))
}
} else {
cat("summary_data DOES NOT EXIST in this chunk's environment!\n")
}
cat("\n")
# Create summary KPI table from field_analysis_summary data
# This shows: Phases, Triggers, Area Change, and Total Farm acreage
if (exists("summary_data") && !is.null(summary_data) && "field_analysis_summary" %in% names(summary_data)) {
field_analysis_summary <- summary_data$field_analysis_summary
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
# Load field analysis data
field_analysis_df <- summary_data$field_analysis
# If field_analysis_summary is NULL or doesn't exist, create it from field_analysis_df
if (is.null(summary_data$field_analysis_summary) || !("field_analysis_summary" %in% names(summary_data)) ||
!is.data.frame(summary_data$field_analysis_summary)) {
cat("\nNote: field_analysis_summary not in RDS, creating from field_analysis...\n")
# Create summary by aggregating by Status_Alert and Phase categories
# This groups fields by their phase and status to show distribution
phase_summary <- field_analysis_df %>%
filter(!is.na(Phase)) %>%
group_by(Phase) %>%
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
mutate(Category = Phase) %>%
select(Category, Acreage)
# Try to create Status trigger summary - use Status_Alert if available, otherwise use empty
trigger_summary <- tryCatch({
field_analysis_df %>%
filter(!is.na(Status_Alert), Status_Alert != "") %>%
group_by(Status_Alert) %>%
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
mutate(Category = Status_Alert) %>%
select(Category, Acreage)
}, error = function(e) {
cat("Could not create trigger summary:", e$message, "\n")
data.frame(Category = character(), Acreage = numeric())
})
# Combine into summary
field_analysis_summary <- bind_rows(phase_summary, trigger_summary)
cat(paste("Created summary with", nrow(field_analysis_summary), "category rows\n"))
} else {
# Use existing summary from RDS
field_analysis_summary <- summary_data$field_analysis_summary
}
# Phase names and trigger names to extract from summary
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected",
"Germination Complete", "Germination Started", "No Active Trigger")
"Germination Complete", "Germination Started", "No Active Trigger",
"Ready for harvest-check", "Strong decline in crop health", "Harvested/bare")
# Extract phase distribution - match on category names directly
phase_rows <- field_analysis_summary %>%
filter(Category %in% phase_names) %>%
select(Category, Acreage) %>%
mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1)
if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
phase_rows <- field_analysis_summary %>%
filter(Category %in% phase_names) %>%
select(Category, Acreage) %>%
mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1)
# Extract status triggers - match on category names directly
trigger_rows <- field_analysis_summary %>%
filter(Category %in% trigger_names) %>%
select(Category, Acreage) %>%
mutate(KPI_Group = "STATUS TRIGGERS", .before = 1)
# Extract status triggers - match on category names directly
trigger_rows <- field_analysis_summary %>%
filter(Category %in% trigger_names) %>%
select(Category, Acreage) %>%
mutate(KPI_Group = "STATUS TRIGGERS", .before = 1)
# Calculate area change from field_analysis data
total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE)
# Parse Weekly_ci_change to determine improvement/decline
parse_ci_change <- function(change_str) {
if (is.na(change_str)) return(NA)
match <- regexpr("^[+-]?[0-9]+\\.?[0-9]*", change_str)
if (match > 0) {
return(as.numeric(substr(change_str, match, attr(match, "match.length"))))
# Calculate area change from field_analysis data
total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE)
# Parse Weekly_ci_change to determine improvement/decline
parse_ci_change <- function(change_str) {
if (is.na(change_str)) return(NA)
match <- regexpr("^[+-]?[0-9]+\\.?[0-9]*", change_str)
if (match > 0) {
return(as.numeric(substr(change_str, match, attr(match, "match.length"))))
}
return(NA)
}
return(NA)
field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change)
improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE)
declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE)
stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 &
field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE)
improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0)
declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0)
stable_pct <- ifelse(total_acreage > 0, round(stable_acreage / total_acreage * 100, 1), 0)
# Calculate percentages for phases and triggers
phase_pcts <- phase_rows %>%
mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"))
trigger_pcts <- trigger_rows %>%
mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"))
area_change_rows <- data.frame(
KPI_Group = "AREA CHANGE",
Category = c("Improving", "Stable", "Declining"),
Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)),
Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")),
stringsAsFactors = FALSE
)
# Total farm row
total_row <- data.frame(
KPI_Group = "TOTAL FARM",
Category = "Total Acreage",
Acreage = round(total_acreage, 2),
Percent = "100%",
stringsAsFactors = FALSE
)
# Combine all rows with percentages for all
combined_df <- bind_rows(
phase_pcts,
trigger_pcts,
area_change_rows,
total_row
)
# Create grouped display where KPI_Group name appears only once per group
combined_df <- combined_df %>%
group_by(KPI_Group) %>%
mutate(
KPI_display = if_else(row_number() == 1, KPI_Group, "")
) %>%
ungroup() %>%
select(KPI_display, Category, Acreage, Percent)
# Render as flextable with merged cells
ft <- flextable(combined_df) %>%
set_header_labels(
KPI_display = "KPI Category",
Category = "Item",
Acreage = "Acreage",
Percent = "Percent"
) %>%
merge_v(j = "KPI_display") %>%
autofit()
# Add horizontal lines after each KPI group (at cumulative row positions)
# Calculate row positions: row 1 is header, then data rows follow
phase_count <- nrow(phase_rows)
trigger_count <- nrow(trigger_rows)
area_count <- nrow(area_change_rows)
# Add lines after phases, triggers, and area change groups (before totals)
if (phase_count > 0) {
ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1))
}
if (trigger_count > 0) {
ft <- ft %>% hline(i = phase_count + trigger_count, border = officer::fp_border(width = 1))
}
if (area_count > 0) {
ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1))
}
ft
} else {
cat("KPI summary data available but is empty/invalid.\n")
}
field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change)
improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE)
declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE)
stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 &
field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE)
improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0)
declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0)
stable_pct <- ifelse(total_acreage > 0, round(stable_acreage / total_acreage * 100, 1), 0)
# Calculate percentages for phases and triggers
phase_pcts <- phase_rows %>%
mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"))
trigger_pcts <- trigger_rows %>%
mutate(Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"))
area_change_rows <- data.frame(
KPI_Group = "AREA CHANGE",
Category = c("Improving", "Stable", "Declining"),
Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)),
Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")),
stringsAsFactors = FALSE
)
# Total farm row
total_row <- data.frame(
KPI_Group = "TOTAL FARM",
Category = "Total Acreage",
Acreage = round(total_acreage, 2),
Percent = "100%",
stringsAsFactors = FALSE
)
# Combine all rows with percentages for all
combined_df <- bind_rows(
phase_pcts,
trigger_pcts,
area_change_rows,
total_row
)
# Create grouped display where KPI_Group name appears only once per group
combined_df <- combined_df %>%
group_by(KPI_Group) %>%
mutate(
KPI_display = if_else(row_number() == 1, KPI_Group, "")
) %>%
ungroup() %>%
select(KPI_display, Category, Acreage, Percent)
# Render as flextable with merged cells
ft <- flextable(combined_df) %>%
set_header_labels(
KPI_display = "KPI Category",
Category = "Item",
Acreage = "Acreage",
Percent = "Percent"
) %>%
merge_v(j = "KPI_display") %>%
autofit()
# Add horizontal lines after each KPI group (at cumulative row positions)
# Calculate row positions: row 1 is header, then data rows follow
phase_count <- nrow(phase_rows)
trigger_count <- nrow(trigger_rows)
area_count <- nrow(area_change_rows)
# Add lines after phases, triggers, and area change groups (before totals)
if (phase_count > 0) {
ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1))
}
if (trigger_count > 0) {
ft <- ft %>% hline(i = phase_count + trigger_count, border = officer::fp_border(width = 1))
}
if (area_count > 0) {
ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1))
}
ft
} else {
cat("KPI summary data not available.\n")
}
@ -679,11 +785,11 @@ if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) {
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
field_analysis_table <- summary_data$field_analysis
# Extract fields with status triggers (non-null)
# Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger)
alerts_data <- field_analysis_table %>%
filter(!is.na(Status_trigger), Status_trigger != "") %>%
select(Field_id, Status_trigger) %>%
rename(Field = Field_id, Alert = Status_trigger)
filter(!is.na(Status_Alert), Status_Alert != "") %>%
select(Field_id, Status_Alert) %>%
rename(Field = Field_id, Alert = Status_Alert)
if (nrow(alerts_data) > 0) {
# Format alert messages for display
@ -717,21 +823,50 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
# The report renders KPI tables and field summaries from that data
```
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
# Load field boundaries from parameters
```{r load_field_boundaries, message=FALSE, warning=FALSE, include=FALSE}
# Load field boundaries from parameters (with fallback if geometry is invalid)
field_boundaries_loaded <- FALSE
tryCatch({
AllPivots0 <- field_boundaries_sf %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) # Filter out NA field names
safe_log("Successfully loaded field boundaries")
# Prepare merged field list for use in summaries
AllPivots_merged <- AllPivots0 %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) %>% # Filter out NA field names
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
# Try to load and validate the field boundaries
if (exists("field_boundaries_sf") && !is.null(field_boundaries_sf)) {
# Try to filter - this will trigger geometry validation
AllPivots0 <- field_boundaries_sf %>%
dplyr::filter(!is.na(field), !is.na(sub_field))
# If successful, also create merged field list
AllPivots_merged <- AllPivots0 %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
field_boundaries_loaded <- TRUE
safe_log("✓ Successfully loaded field boundaries")
} else {
safe_log("⚠ field_boundaries_sf not found in environment")
}
}, error = function(e) {
stop("Error loading field boundaries: ", e$message)
# If geometry is invalid, try to fix or skip
safe_log(paste("⚠ Error loading field boundaries:", e$message), "WARNING")
safe_log("Attempting to fix invalid geometries using st_make_valid()...", "WARNING")
tryCatch({
# Try to repair invalid geometries
field_boundaries_sf_fixed <<- sf::st_make_valid(field_boundaries_sf)
AllPivots0 <<- field_boundaries_sf_fixed %>%
dplyr::filter(!is.na(field), !is.na(sub_field))
AllPivots_merged <<- AllPivots0 %>%
dplyr::filter(!is.na(field), !is.na(sub_field)) %>%
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
field_boundaries_loaded <<- TRUE
safe_log("✓ Fixed invalid geometries and loaded field boundaries")
}, error = function(e2) {
safe_log(paste("⚠ Could not repair geometries:", e2$message), "WARNING")
safe_log("Continuing without field boundary data", "WARNING")
})
})
```
\newpage

View file

@ -43,6 +43,75 @@ get_client_type <- function(project_name) {
return(client_type)
}
# 2b. Client-specific KPI configurations
# ----------------------------------------
# Defines which KPIs and outputs are required for each client type
# This enables Script 80 to conditionally calculate only relevant metrics
#
# Structure:
# - kpi_calculations: Vector of KPI types to calculate for this client
# - outputs: Vector of output formats to generate (determines RDS/Excel naming)
# - requires_harvest_data: Boolean - whether Script 31 harvest predictions are needed
# - script_90_compatible: Boolean - whether output should match Script 90 expectations
# - script_91_compatible: Boolean - whether output should match Script 91 expectations
#
CLIENT_TYPE_CONFIGS <- list(
# Aura (agronomic_support): Farm-level KPI summaries for weekly reports to agronomists
"agronomic_support" = list(
client_type = "agronomic_support",
description = "Farm-level KPI summaries for agronomic decision support",
kpi_calculations = c(
"field_uniformity",
"area_change",
"tch_forecasted",
"growth_decline",
"weed_presence",
"gap_filling"
),
outputs = c(
"kpi_summary_tables", # Summary statistics for Script 90 report front page
"field_details" # Detailed field table for Script 90 report end section
),
requires_harvest_data = FALSE, # Script 31 predictions not used
script_90_compatible = TRUE, # Output format matches Script 90 expectations
script_91_compatible = FALSE
),
# Cane Supply (cane_supply): Per-field analysis with harvest timing prediction
"cane_supply" = list(
client_type = "cane_supply",
description = "Per-field analysis with harvest prediction and phase assignment",
kpi_calculations = c(
"per_field_analysis", # Use 80_weekly_stats_utils.R for field-level statistics
"phase_assignment", # Assign growth phases (Germination, Tillering, Grand Growth, Maturation)
"harvest_prediction", # Include Script 31 harvest age predictions if available
"status_triggers" # Calculate field status (Normal, Monitor, Alert, Urgent)
),
outputs = c(
"field_analysis_excel", # Excel file with per-field metrics
"field_analysis_summary" # Summary RDS for Script 91 report
),
requires_harvest_data = TRUE, # harvest.xlsx is required for phase assignment
script_90_compatible = FALSE,
script_91_compatible = TRUE
)
)
#' Get KPI configuration for a specific client type
#' @param client_type Character string of client type (e.g., "agronomic_support", "cane_supply")
#' @return List containing configuration for that client type
get_client_kpi_config <- function(client_type) {
config <- CLIENT_TYPE_CONFIGS[[client_type]]
if (is.null(config)) {
warning(sprintf("Client type '%s' not in CLIENT_TYPE_CONFIGS - defaulting to 'cane_supply'", client_type))
return(CLIENT_TYPE_CONFIGS[["cane_supply"]])
}
return(config)
}
# 3. Smart detection for tile-based vs single-file mosaic approach
# ----------------------------------------------------------------
detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {

View file

@ -30,9 +30,8 @@
# ==============================================================================
# *** EDIT THESE VARIABLES ***
end_date <- as.Date("2025-12-31") # or specify: as.Date("2026-01-27") , Sys.Date()
offset <- 7 # days to look back
project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba"
end_date <- as.Date("2026-01-07") # or specify: as.Date("2026-01-27") , Sys.Date()
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
data_source <- if (project_dir == "angata") "merged_tif_8b" else "merged_tif"
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
# ***************************
@ -42,12 +41,233 @@ source("r_app/parameters_project.R")
client_type <- get_client_type(project_dir)
cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type))
# ==============================================================================
# DETECT WHICH DATA SOURCE IS AVAILABLE (merged_tif vs merged_tif_8b)
# ==============================================================================
# Check which merged_tif folder actually has files for this project
laravel_storage_dir <- file.path("laravel_app", "storage", "app", project_dir)
merged_tif_path <- file.path(laravel_storage_dir, "merged_tif")
merged_tif_8b_path <- file.path(laravel_storage_dir, "merged_tif_8b")
data_source_used <- "merged_tif_8b" # Default
if (dir.exists(merged_tif_path)) {
tif_files <- list.files(merged_tif_path, pattern = "\\.tif$")
if (length(tif_files) > 0) {
data_source_used <- "merged_tif"
cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files)))
} else if (dir.exists(merged_tif_8b_path)) {
tif_files_8b <- list.files(merged_tif_8b_path, pattern = "\\.tif$")
if (length(tif_files_8b) > 0) {
data_source_used <- "merged_tif_8b"
cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files_8b)))
}
}
} else if (dir.exists(merged_tif_8b_path)) {
tif_files_8b <- list.files(merged_tif_8b_path, pattern = "\\.tif$")
if (length(tif_files_8b) > 0) {
data_source_used <- "merged_tif_8b"
cat(sprintf("[INFO] Detected data source: %s (%d TIF files)\n", data_source_used, length(tif_files_8b)))
}
}
# ==============================================================================
# DETERMINE REPORTING WINDOW (auto-calculated based on KPI requirements)
# ==============================================================================
# Script 80 (KPIs) needs N weeks of historical data for trend analysis and reporting
# We calculate this automatically based on client type
reporting_weeks_needed <- 4 # Default: KPIs need current week + 3 weeks history for trends
offset <- (reporting_weeks_needed - 1) * 7 # Convert weeks to days
cat(sprintf("\n[INFO] Reporting window: %d weeks (%d days of data)\n", reporting_weeks_needed, offset))
cat(sprintf(" Running week: %02d / %d\n", as.numeric(format(end_date, "%V")), as.numeric(format(end_date, "%Y"))))
cat(sprintf(" Date range: %s to %s\n", format(end_date - offset, "%Y-%m-%d"), format(end_date, "%Y-%m-%d")))
# Format dates
end_date_str <- format(as.Date(end_date), "%Y-%m-%d")
# Track success of pipeline
pipeline_success <- TRUE
# ==============================================================================
# EARLY PREREQ CHECK: Verify mosaic requirements BEFORE any downloads
# ==============================================================================
# This determines if we need more weeks of data than the initial reporting window
# Run this BEFORE downloads so we can download ONLY missing dates upfront
cat("\n========== EARLY CHECK: MOSAIC REQUIREMENTS FOR REPORTING WINDOW ==========\n")
# Detect mosaic mode early (before full checking section)
detect_mosaic_mode_early <- function(project_dir) {
weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
if (dir.exists(weekly_tile_max)) {
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
return("tiled")
}
}
weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
if (dir.exists(weekly_mosaic)) {
files <- list.files(weekly_mosaic, pattern = "^week_.*\\.tif$")
if (length(files) > 0) {
return("single-file")
}
}
return("unknown")
}
mosaic_mode <- detect_mosaic_mode_early(project_dir)
# Check what mosaics we NEED
weeks_needed <- data.frame()
for (weeks_back in 0:(reporting_weeks_needed - 1)) {
check_date <- end_date - (weeks_back * 7)
week_num <- as.numeric(format(check_date, "%V"))
year_num <- as.numeric(format(check_date, "%G")) # %G = ISO week year (not calendar year %Y)
weeks_needed <- rbind(weeks_needed, data.frame(week = week_num, year = year_num, date = check_date))
}
missing_weeks_dates <- c() # Will store the earliest date of missing weeks
earliest_missing_date <- end_date # Start with end_date, go back if needed
missing_weeks <- data.frame() # Track ALL missing weeks for later processing by Script 40
for (i in 1:nrow(weeks_needed)) {
week_num <- weeks_needed[i, "week"]
year_num <- weeks_needed[i, "year"]
check_date <- weeks_needed[i, "date"]
# Pattern must be flexible to match both:
# - Single-file: week_51_2025.tif
# - Tiled: week_51_2025_01.tif, week_51_2025_02.tif, etc.
week_pattern_check <- sprintf("week_%02d_%d", week_num, year_num)
files_this_week <- c()
if (mosaic_mode == "tiled") {
mosaic_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
if (dir.exists(mosaic_dir_check)) {
files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check)
}
} else if (mosaic_mode == "single-file") {
mosaic_dir_check <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
if (dir.exists(mosaic_dir_check)) {
files_this_week <- list.files(mosaic_dir_check, pattern = week_pattern_check)
}
}
cat(sprintf(" Week %02d/%d (%s): %s\n", week_num, year_num, format(check_date, "%Y-%m-%d"),
if(length(files_this_week) > 0) "✓ EXISTS" else "✗ MISSING"))
# If week is missing, track its date range for downloading/processing
if (length(files_this_week) == 0) {
week_start <- check_date - 6 # Monday of that week
if (week_start < earliest_missing_date) {
earliest_missing_date <- week_start
}
# Add to missing_weeks dataframe - Script 40 will process these
missing_weeks <- rbind(missing_weeks, data.frame(week = week_num, year = year_num, week_end_date = check_date))
}
}
# Calculate dynamic offset for preprocessing: only process from earliest missing week to end_date
if (earliest_missing_date < end_date) {
cat(sprintf("\n[INFO] Missing week(s) detected - need to fill from %s onwards\n", format(earliest_missing_date, "%Y-%m-%d")))
# Adjust offset to cover only the gap (from earliest missing week to end_date)
dynamic_offset <- as.numeric(end_date - earliest_missing_date)
cat(sprintf("[INFO] Will download/process ONLY missing dates: %d days (from %s to %s)\n",
dynamic_offset, format(earliest_missing_date, "%Y-%m-%d"), format(end_date, "%Y-%m-%d")))
# Use dynamic offset for data generation scripts (10, 20, 30, 40)
# But Script 80 still uses full reporting_weeks_needed offset for KPI calculations
data_generation_offset <- dynamic_offset
force_data_generation <- TRUE
} else {
cat("\n[INFO] ✓ All required mosaics exist - using normal reporting window\n")
data_generation_offset <- offset # Use default reporting window offset
force_data_generation <- FALSE
}
# ==============================================================================
# CHECK KPI REQUIREMENTS FOR REPORTING WINDOW
# ==============================================================================
# Scripts 90 (Word report) and 91 (Excel report) require KPIs for full reporting window
# Script 80 ALWAYS runs and will CALCULATE missing KPIs, so this is just for visibility
cat("\n========== KPI REQUIREMENT CHECK ==========\n")
cat(sprintf("KPIs needed for reporting: %d weeks (current week + %d weeks history)\n",
reporting_weeks_needed, reporting_weeks_needed - 1))
# Determine KPI directory based on client type
# - agronomic_support: field_level/ (6 farm-level KPIs)
# - cane_supply: field_analysis/ (per-field analysis)
kpi_subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis"
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir)
# Create KPI directory if it doesn't exist
if (!dir.exists(kpi_dir)) {
dir.create(kpi_dir, recursive = TRUE, showWarnings = FALSE)
cat(sprintf("[KPI_DIR_CREATED] Created directory: %s\n", kpi_dir))
}
kpis_needed <- data.frame()
kpis_missing_count <- 0
# Debug: Check if KPI directory exists
if (dir.exists(kpi_dir)) {
cat(sprintf("[KPI_DIR_EXISTS] %s\n", kpi_dir))
all_kpi_files <- list.files(kpi_dir)
cat(sprintf("[KPI_DEBUG] Total files in directory: %d\n", length(all_kpi_files)))
if (length(all_kpi_files) > 0) {
cat(sprintf("[KPI_DEBUG] Sample files: %s\n", paste(head(all_kpi_files, 3), collapse = ", ")))
}
} else {
cat(sprintf("[KPI_DIR_MISSING] Directory does not exist: %s\n", kpi_dir))
}
for (weeks_back in 0:(reporting_weeks_needed - 1)) {
check_date <- end_date - (weeks_back * 7)
week_num <- as.numeric(format(check_date, "%V"))
year_num <- as.numeric(format(check_date, "%G"))
# Check for any KPI file from that week - use more flexible pattern matching
week_pattern <- sprintf("week%02d_%d", week_num, year_num)
kpi_files_this_week <- c()
if (dir.exists(kpi_dir)) {
# List all files and manually check for pattern match
all_files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
kpi_files_this_week <- all_files[grepl(week_pattern, all_files, fixed = TRUE)]
# Debug output for first week
if (weeks_back == 0) {
cat(sprintf("[KPI_DEBUG_W%02d_%d] Pattern: '%s' | Found: %d files\n",
week_num, year_num, week_pattern, length(kpi_files_this_week)))
if (length(kpi_files_this_week) > 0) {
cat(sprintf("[KPI_DEBUG_W%02d_%d] Files: %s\n",
week_num, year_num, paste(kpi_files_this_week, collapse = ", ")))
}
}
}
has_kpis <- length(kpi_files_this_week) > 0
kpis_needed <- rbind(kpis_needed, data.frame(
week = week_num,
year = year_num,
date = check_date,
has_kpis = has_kpis
))
if (!has_kpis) {
kpis_missing_count <- kpis_missing_count + 1
}
cat(sprintf(" Week %02d/%d (%s): %s\n",
week_num, year_num, format(check_date, "%Y-%m-%d"),
if(has_kpis) "✓ EXISTS" else "✗ WILL BE CALCULATED"))
}
cat(sprintf("\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by Script 80\n",
nrow(kpis_needed) - kpis_missing_count, nrow(kpis_needed), kpis_missing_count))
# Define conditional script execution based on client type
# Client types:
# - "cane_supply": Runs Scripts 20,21,22,23,30,31,80,91 (full pipeline with Excel output)
@ -137,31 +357,14 @@ cat(sprintf("Script 20: %d CI daily RDS files exist\n", length(ci_files)))
# For now, just note that CSV is time-dependent, not a good skip indicator
cat("Script 21: CSV file exists but gets overwritten - will run if Script 20 runs\n")
# Check Script 40 outputs (mosaics) - check for THIS WEEK's mosaic specifically
# (important for Script 80, which needs the current week's mosaic)
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%Y"))
week_mosaic_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
# Check Script 40 outputs (mosaics) - check which weeks are missing (not just current week)
# The early check section already identified missing_weeks, so we use that
skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Only skip if NO missing weeks AND not forcing rerun
cat(sprintf("Script 40: %d missing week(s) to create\n", nrow(missing_weeks)))
mosaic_files <- c()
if (mosaic_mode == "tiled") {
# For tile-based: look in weekly_tile_max/{grid_size}/ for this week's file
weekly_tile_max <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
mosaic_dir <- file.path(weekly_tile_max, grid_patterns[1])
mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern)
}
} else if (mosaic_mode == "single-file") {
# For single-file: look in weekly_mosaic/ for this week's file
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
mosaic_files <- list.files(mosaic_dir, pattern = week_mosaic_pattern)
}
cat(sprintf("Script 40: %d mosaic files exist for week %02d\n", length(mosaic_files), current_week))
# Check Script 80 outputs (KPIs in reports/kpis/field_stats)
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
# Check Script 80 outputs (KPIs in reports/kpis/{field_level|field_analysis})
# Use the same kpi_subdir logic to find the right directory
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir)
kpi_files <- if (dir.exists(kpi_dir)) {
list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
} else {
@ -170,15 +373,15 @@ kpi_files <- if (dir.exists(kpi_dir)) {
cat(sprintf("Script 80: %d KPI files exist\n", length(kpi_files)))
# Determine if scripts should run based on outputs AND client type
skip_10 <- (length(tiles_dates) > 0 && !force_rerun) # Always check tiles
skip_10 <- (length(tiles_dates) > 0 && !force_rerun && !force_data_generation) # Force Script 10 if missing weeks detected
skip_20 <- FALSE # Script 20 ALWAYS runs for all client types - processes new downloaded data
skip_21 <- skip_cane_supply_only # Script 21 runs ONLY for cane_supply clients (CI→CSV conversion)
skip_22 <- skip_cane_supply_only # Script 22 runs ONLY for cane_supply clients
skip_23 <- skip_cane_supply_only # Script 23 runs ONLY for cane_supply clients
skip_30 <- FALSE # Script 30 ALWAYS runs for all client types
skip_31 <- skip_cane_supply_only # Script 31 runs ONLY for cane_supply clients
skip_40 <- (length(mosaic_files) > 0 && !force_rerun) # Always check mosaics
skip_80 <- FALSE # Script 80 ALWAYS runs for all client types - calculates KPIs for current week
skip_40 <- (nrow(missing_weeks) == 0 && !force_rerun) # Skip Script 40 only if NO missing weeks
skip_80 <- (kpis_missing_count == 0 && !force_rerun) # Skip Script 80 only if ALL KPIs exist AND not forcing rerun
cat("\nSkipping decisions (based on outputs AND client type):\n")
cat(sprintf(" Script 10: %s\n", if(skip_10) "SKIP" else "RUN"))
@ -188,7 +391,7 @@ cat(sprintf(" Script 22: %s %s\n", if(skip_22) "SKIP" else "RUN", if(skip_cane_
cat(sprintf(" Script 23: %s %s\n", if(skip_23) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf(" Script 30: %s (always runs)\n", if(skip_30) "SKIP" else "RUN"))
cat(sprintf(" Script 31: %s %s\n", if(skip_31) "SKIP" else "RUN", if(skip_cane_supply_only) "(non-cane_supply client)" else ""))
cat(sprintf(" Script 40: %s %s\n", if(skip_40) "SKIP" else "RUN", if(!skip_40) "" else "(mosaics exist)"))
cat(sprintf(" Script 40: %s (looping through %d missing weeks)\n", if(skip_40) "SKIP" else "RUN", nrow(missing_weeks)))
cat(sprintf(" Script 80: %s (always runs)\n", if(skip_80) "SKIP" else "RUN"))
cat(sprintf(" Script 90: %s %s\n", if(!run_legacy_report) "SKIP" else "RUN", if(run_legacy_report) "(agronomic_support legacy report)" else ""))
cat(sprintf(" Script 91: %s %s\n", if(!run_modern_report) "SKIP" else "RUN", if(run_modern_report) "(cane_supply modern report)" else ""))
@ -216,7 +419,7 @@ tryCatch({
}
# Find missing dates in the window
start_date <- end_date - offset
start_date <- end_date - data_generation_offset
date_seq <- seq(start_date, end_date, by = "day")
target_dates <- format(date_seq, "%Y-%m-%d")
@ -278,12 +481,14 @@ if (pipeline_success && !skip_10) {
tryCatch({
# CRITICAL: Save global variables before sourcing Script 10 (it overwrites end_date, offset, etc.)
saved_end_date <- end_date
saved_offset <- offset
saved_offset <- offset # Use FULL offset for tiling (not dynamic_offset)
saved_project_dir <- project_dir
saved_data_source <- data_source
# Set environment variables for the script (Script 10 uses these for filtering)
assign("PROJECT", project_dir, envir = .GlobalEnv)
assign("end_date", end_date, envir = .GlobalEnv)
assign("offset", offset, envir = .GlobalEnv) # Full reporting window
# Suppress verbose per-date output, show only summary
sink(nullfile())
@ -321,6 +526,7 @@ if (pipeline_success && !skip_20) {
tryCatch({
# Run Script 20 via system() to pass command-line args just like from terminal
# Arguments: end_date offset project_dir data_source
# Use FULL offset so CI extraction covers entire reporting window (not just new data)
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/20_ci_extraction.R "%s" %d "%s" "%s"',
format(end_date, "%Y-%m-%d"), offset, project_dir, data_source)
result <- system(cmd)
@ -382,9 +588,10 @@ if (pipeline_success && !skip_30) {
cat("\n========== RUNNING SCRIPT 30: INTERPOLATE GROWTH MODEL ==========\n")
tryCatch({
# Run Script 30 via system() to pass command-line args just like from terminal
# Script 30 expects: project_dir as first argument only
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s"',
project_dir)
# Script 30 expects: project_dir data_source as arguments
# Pass the same data_source that Script 20 is using
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/30_interpolate_growth_model.R "%s" "%s"',
project_dir, data_source_used)
result <- system(cmd)
if (result != 0) {
@ -442,85 +649,253 @@ if (pipeline_success && !skip_31) {
}
# ==============================================================================
# SCRIPT 40: MOSAIC CREATION
# SCRIPT 40: MOSAIC CREATION (LOOP THROUGH MISSING WEEKS)
# ==============================================================================
if (pipeline_success && !skip_40) {
cat("\n========== RUNNING SCRIPT 40: MOSAIC CREATION ==========\n")
tryCatch({
# Run Script 40 via system() to pass command-line args just like from terminal
# Use full path and --vanilla to avoid renv/environment issues
# Arguments: end_date offset project_dir (file_name_tif is auto-generated from dates)
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" %d "%s"',
format(end_date, "%Y-%m-%d"), offset, project_dir)
result <- system(cmd)
# If there are missing weeks, process them one at a time
if (nrow(missing_weeks) > 0) {
cat(sprintf("Found %d missing week(s) - running Script 40 once per week\n\n", nrow(missing_weeks)))
if (result != 0) {
stop("Script 40 exited with error code:", result)
# Loop through missing weeks in reverse chronological order (oldest first)
for (week_idx in nrow(missing_weeks):1) {
missing_week <- missing_weeks[week_idx, ]
week_num <- missing_week$week
year_num <- missing_week$year
week_end_date <- as.Date(missing_week$week_end_date)
cat(sprintf("--- Creating mosaic for week %02d/%d (ending %s) ---\n",
week_num, year_num, format(week_end_date, "%Y-%m-%d")))
tryCatch({
# Run Script 40 with offset=7 (one week only) for this specific week
# The end_date is the last day of the week, and offset=7 covers the full 7-day week
# IMPORTANT: Pass data_source so Script 40 uses the correct folder (not auto-detect which can be wrong)
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/40_mosaic_creation.R "%s" 7 "%s" "" "%s"',
format(week_end_date, "%Y-%m-%d"), project_dir, data_source)
result <- system(cmd)
if (result != 0) {
stop("Script 40 exited with error code:", result)
}
# Verify mosaic was created for this specific week
mosaic_created <- FALSE
if (mosaic_mode == "tiled") {
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
if (dir.exists(mosaic_dir)) {
week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
mosaic_files <- list.files(mosaic_dir, pattern = week_pattern)
mosaic_created <- length(mosaic_files) > 0
}
} else {
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
if (dir.exists(mosaic_dir)) {
week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
mosaic_files <- list.files(mosaic_dir, pattern = week_pattern)
mosaic_created <- length(mosaic_files) > 0
}
}
if (mosaic_created) {
cat(sprintf("✓ Week %02d/%d mosaic created successfully\n\n", week_num, year_num))
} else {
cat(sprintf("✓ Week %02d/%d processing completed (verify output)\n\n", week_num, year_num))
}
}, error = function(e) {
cat(sprintf("✗ Error creating mosaic for week %02d/%d: %s\n", week_num, year_num, e$message), "\n")
pipeline_success <<- FALSE
})
}
# Verify mosaic output - check based on mosaic mode (tiled vs single-file)
mosaic_files_check <- c()
if (mosaic_mode == "tiled") {
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max", "5x5")
if (dir.exists(mosaic_dir)) {
# Check for current week's file only
current_week_check <- as.numeric(format(end_date, "%V"))
current_year_check <- as.numeric(format(end_date, "%Y"))
week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check)
mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check)
}
} else {
mosaic_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
if (dir.exists(mosaic_dir)) {
# Check for current week's file only
current_week_check <- as.numeric(format(end_date, "%V"))
current_year_check <- as.numeric(format(end_date, "%Y"))
week_pattern_check <- sprintf("week_%02d_%d\\.tif", current_week_check, current_year_check)
mosaic_files_check <- list.files(mosaic_dir, pattern = week_pattern_check)
}
if (pipeline_success) {
cat(sprintf("✓ Script 40 completed - created all %d missing week mosaics\n", nrow(missing_weeks)))
}
if (length(mosaic_files_check) > 0) {
cat(sprintf("✓ Script 40 completed - created mosaic for week %02d\n", current_week))
} else {
cat("✓ Script 40 completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 40:", e$message, "\n")
pipeline_success <<- FALSE
})
} else {
cat("No missing weeks detected - skipping Script 40\n")
skip_40 <- TRUE
}
} else if (skip_40) {
cat("\n========== SKIPPING SCRIPT 40 (mosaics already created) ==========\n")
}
# ==============================================================================
# SCRIPT 80: CALCULATE KPIs
# SCRIPT 80: CALCULATE KPIs (LOOP THROUGH REPORTING WINDOW)
# ==============================================================================
if (pipeline_success) { # Always run Script 80 - it calculates KPIs for the current week
cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs ==========\n")
tryCatch({
# Run Script 80 via system() to pass command-line args just like from terminal
# Use full path and --vanilla to avoid renv/environment issues
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" %d "%s" "%s"',
format(end_date, "%Y-%m-%d"), offset, project_dir, data_source)
result <- system(cmd)
if (pipeline_success && !skip_80) {
cat("\n========== RUNNING SCRIPT 80: CALCULATE KPIs FOR REPORTING WINDOW ==========\n")
# Build list of weeks that NEED calculation (missing KPIs)
weeks_to_calculate <- kpis_needed[!kpis_needed$has_kpis, ] # Only weeks WITHOUT KPIs
if (nrow(weeks_to_calculate) > 0) {
# Sort by date (oldest to newest) for sequential processing
weeks_to_calculate <- weeks_to_calculate[order(weeks_to_calculate$date), ]
if (result != 0) {
stop("Script 80 exited with error code:", result)
}
cat(sprintf("Looping through %d missing week(s) in reporting window (from %s back to %s):\n\n",
nrow(weeks_to_calculate),
format(max(weeks_to_calculate$date), "%Y-%m-%d"),
format(min(weeks_to_calculate$date), "%Y-%m-%d")))
# Verify KPI output
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats")
if (dir.exists(kpi_dir)) {
files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
cat(sprintf("✓ Script 80 completed - generated %d KPI files\n", length(files)))
} else {
cat("✓ Script 80 completed\n")
tryCatch({
for (week_idx in 1:nrow(weeks_to_calculate)) {
week_row <- weeks_to_calculate[week_idx, ]
calc_date <- week_row$date
# Run Script 80 for this specific week with offset=7 (one week only)
# This ensures Script 80 calculates KPIs for THIS week with proper trend data
cmd <- sprintf('"C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" --vanilla r_app/80_calculate_kpis.R "%s" "%s" %d',
format(calc_date, "%Y-%m-%d"), project_dir, 7) # offset=7 for single week
cat(sprintf(" [Week %02d/%d] Running Script 80 with end_date=%s...\n",
week_row$week, week_row$year, format(calc_date, "%Y-%m-%d")))
result <- system(cmd, ignore.stdout = TRUE, ignore.stderr = TRUE)
if (result == 0) {
cat(sprintf(" ✓ KPIs calculated for week %02d/%d\n", week_row$week, week_row$year))
} else {
cat(sprintf(" ✗ Error calculating KPIs for week %02d/%d (exit code: %d)\n",
week_row$week, week_row$year, result))
}
}
# Verify total KPI output
kpi_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", kpi_subdir)
if (dir.exists(kpi_dir)) {
files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
cat(sprintf("\n✓ Script 80 loop completed - total %d KPI files in %s/\n", length(files), kpi_subdir))
} else {
cat("\n✓ Script 80 loop completed\n")
}
}, error = function(e) {
cat("✗ Error in Script 80 loop:", e$message, "\n")
pipeline_success <<- FALSE
})
} else {
cat(sprintf("✓ All %d weeks already have KPIs - skipping calculation\n", nrow(kpis_needed)))
}
} else if (skip_80) {
cat("\n========== SKIPPING SCRIPT 80 (all KPIs already exist) ==========\n")
}
# ==============================================================================
# VERIFY KPI COMPLETION AFTER SCRIPT 80
# ==============================================================================
# Recheck if all KPIs are now available (Script 80 should have calculated any missing ones)
cat("\n========== VERIFYING KPI COMPLETION ==========\n")
kpis_complete <- TRUE
if (dir.exists(kpi_dir)) {
for (weeks_back in 0:(reporting_weeks_needed - 1)) {
check_date <- end_date - (weeks_back * 7)
week_num <- as.numeric(format(check_date, "%V"))
year_num <- as.numeric(format(check_date, "%G"))
# Check for any KPI file from that week
week_pattern <- sprintf("week%02d_%d", week_num, year_num)
kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern)
if (length(kpi_files_this_week) == 0) {
kpis_complete <- FALSE
cat(sprintf(" Week %02d/%d: ✗ KPIs not found\n", week_num, year_num))
}
}, error = function(e) {
cat("✗ Error in Script 80:", e$message, "\n")
pipeline_success <<- FALSE
})
}
}
if (kpis_complete) {
cat("✓ All KPIs available - reports can be generated\n")
} else {
cat("⚠ Some KPIs still missing - reports will be skipped\n")
}
# ==============================================================================
# SCRIPT 90: LEGACY WORD REPORT (agronomic_support clients)
# ==============================================================================
if (pipeline_success && run_legacy_report) {
cat("\n========== RUNNING SCRIPT 90: LEGACY WORD REPORT ==========\n")
if (!kpis_complete) {
cat("⚠ Skipping Script 90 - KPIs not available for full reporting window\n")
} else {
tryCatch({
# Script 90 is an RMarkdown file - compile it with rmarkdown::render()
output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports")
# Ensure output directory exists
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
}
output_filename <- sprintf("CI_report_week%02d_%d.docx",
as.numeric(format(end_date, "%V")),
as.numeric(format(end_date, "%G")))
# Render the RMarkdown document
rmarkdown::render(
input = "r_app/90_CI_report_with_kpis_simple.Rmd",
output_dir = output_dir,
output_file = output_filename,
params = list(
report_date = format(end_date, "%Y-%m-%d"),
data_dir = project_dir
),
quiet = TRUE
)
cat(sprintf("✓ Script 90 completed - generated Word report: %s\n", output_filename))
}, error = function(e) {
cat("✗ Error in Script 90:", e$message, "\n")
pipeline_success <<- FALSE
})
}
} else if (run_legacy_report) {
cat("\n========== SKIPPING SCRIPT 90 (pipeline error or KPIs incomplete) ==========\n")
}
# ==============================================================================
# SCRIPT 91: MODERN WORD REPORT (cane_supply clients)
# ==============================================================================
if (pipeline_success && run_modern_report) {
cat("\n========== RUNNING SCRIPT 91: MODERN WORD REPORT ==========\n")
if (!kpis_complete) {
cat("⚠ Skipping Script 91 - KPIs not available for full reporting window\n")
} else {
tryCatch({
# Script 91 is an RMarkdown file - compile it with rmarkdown::render()
output_dir <- file.path("laravel_app", "storage", "app", project_dir, "reports")
# Ensure output directory exists
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
}
output_filename <- sprintf("CI_report_week%02d_%d.docx",
as.numeric(format(end_date, "%V")),
as.numeric(format(end_date, "%G")))
# Render the RMarkdown document
rmarkdown::render(
input = "r_app/91_CI_report_with_kpis_Angata.Rmd",
output_dir = output_dir,
output_file = output_filename,
params = list(
report_date = format(end_date, "%Y-%m-%d"),
data_dir = project_dir
),
quiet = TRUE
)
cat(sprintf("✓ Script 91 completed - generated Word report: %s\n", output_filename))
}, error = function(e) {
cat("✗ Error in Script 91:", e$message, "\n")
pipeline_success <<- FALSE
})
}
} else if (run_modern_report) {
cat("\n========== SKIPPING SCRIPT 91 (pipeline error or KPIs incomplete) ==========\n")
}
# ==============================================================================
@ -535,4 +910,4 @@ if (pipeline_success) {
} else {
cat("Status: ✗ Pipeline failed - check errors above\n")
}
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80\n")
cat("Pipeline sequence: Python Download → R 10 → R 20 → R 21 → R 30 → Python 31 → R 40 → R 80 → R 90/91\n")