aura until word creation works. word cration itself needs more work.

This commit is contained in:
Timon 2026-02-04 12:24:02 +01:00
parent 5c29c9b549
commit e16677eb78
15 changed files with 2590 additions and 1436 deletions

View file

@ -319,4 +319,4 @@ After each major stage, verify:
---
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`. For related Linear issues (code quality, architecture docs), see SC-59, SC-60, SC-61._
_For detailed system architecture, see `r_app/system_architecture/system_architecture.md`.

View file

@ -1,500 +1,356 @@
# ==============================================================================
# 00_COMMON_UTILS.R
# # 00_COMMON_UTILS.R
# ==============================================================================
# GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
#
# PURPOSE:
# Centralized location for foundational utilities used across multiple scripts.
# These functions have NO project knowledge, NO client-type dependencies,
# NO domain-specific logic.
#
# USAGE:
# All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
#
# source(here::here("r_app", "parameters_project.R")) # Config first
# source(here::here("r_app", "00_common_utils.R")) # Then common utilities
#
# FUNCTIONS:
# 1. safe_log() — Generic logging with [LEVEL] prefix
# 2. smartcane_debug() — Conditional debug logging
# 3. smartcane_warn() — Convenience wrapper for WARN-level messages
# 4. date_list() — Generate date sequences for processing windows
# 5. get_iso_week() — Extract ISO week number from date
# 6. get_iso_year() — Extract ISO year from date
# 7. get_iso_week_year() — Extract both ISO week and year as list
# 8. format_week_label() — Format date as week/year label (e.g., "week01_2025")
# 9. load_field_boundaries() — Load field geometries from GeoJSON
# 10. load_harvesting_data() — Load harvest schedule from Excel
#
# # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
# #
# # PURPOSE:
# # Centralized location for foundational utilities used across multiple scripts.
# # These functions have NO project knowledge, NO client-type dependencies,
# # NO domain-specific logic.
# #
# # USAGE:
# # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
# #
# # source(here::here("r_app", "parameters_project.R")) # Config first
# # source(here::here("r_app", "00_common_utils.R")) # Then common utilities
# #
# # FUNCTIONS:
# # 1. safe_log() — Generic logging with [LEVEL] prefix
# # 2. smartcane_debug() — Conditional debug logging
# # 3. smartcane_warn() — Convenience wrapper for WARN-level messages
# # 4. date_list() — Generate date sequences for processing windows
# # 5. load_field_boundaries() — Load field geometries from GeoJSON
# # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects
# #
# # DATE FUNCTIONS (now in parameters_project.R):
# # - get_iso_week() — Extract ISO week number from date
# # - get_iso_year() — Extract ISO year from date
# # - get_iso_week_year() — Extract both ISO week and year as list
# # - format_week_label() — Format date as week/year label
# # - load_harvesting_data() — Load harvest schedule from Excel
# #
# ==============================================================================
#' Safe Logging Function
#'
#' Generic logging with [LEVEL] prefix. Works standalone without any framework.
#' Consistent with SmartCane logging standard.
#'
#' @param message The message to log
#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
#' @return NULL (invisible, used for side effects)
#'
#' @examples
#' safe_log("Processing started", "INFO")
#' safe_log("Check input file", "WARNING")
#' safe_log("Failed to load data", "ERROR")
#'
safe_log <- function(message, level = "INFO") {
prefix <- sprintf("[%s]", level)
cat(sprintf("%s %s\n", prefix, message))
}
# # Source parameters first to get shared date utility functions
# source("parameters_project.R")
#' SmartCane Debug Logging (Conditional)
#'
#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
#' Useful for development/troubleshooting without cluttering normal output.
#'
#' @param message The message to log
#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
#' @return NULL (invisible, used for side effects)
#'
#' @examples
#' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
#' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
#'
smartcane_debug <- function(message, verbose = FALSE) {
if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
return(invisible(NULL))
}
safe_log(message, level = "DEBUG")
}
# #' Safe Logging Function
# #'
# #' Generic logging with [LEVEL] prefix. Works standalone without any framework.
# #' Consistent with SmartCane logging standard.
# #'
# #' @param message The message to log
# #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' safe_log("Processing started", "INFO")
# #' safe_log("Check input file", "WARNING")
# #' safe_log("Failed to load data", "ERROR")
# #'
# safe_log <- function(message, level = "INFO") {
# prefix <- sprintf("[%s]", level)
# cat(sprintf("%s %s\n", prefix, message))
# }
#' SmartCane Warning Logging
#'
#' Logs WARN-level messages. Convenience wrapper around safe_log().
#'
#' @param message The message to log
#' @return NULL (invisible, used for side effects)
#'
#' @examples
#' smartcane_warn("Check data format before proceeding")
#'
smartcane_warn <- function(message) {
safe_log(message, level = "WARN")
}
# #' SmartCane Debug Logging (Conditional)
# #'
# #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
# #' Useful for development/troubleshooting without cluttering normal output.
# #'
# #' @param message The message to log
# #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
# #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
# #'
# smartcane_debug <- function(message, verbose = FALSE) {
# if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
# return(invisible(NULL))
# }
# safe_log(message, level = "DEBUG")
# }
#' Extract ISO Week Number from Date
#'
#' Extracts ISO week number (1-53) from a date using %V format.
#' ISO weeks follow the international standard: Week 1 starts on Monday.
#'
#' @param date A Date object or string convertible to Date
#' @return Numeric: ISO week number (1-53)
#'
#' @examples
#' get_iso_week(as.Date("2025-01-15")) # Returns: 3
#'
get_iso_week <- function(date) {
as.numeric(format(date, "%V"))
}
# #' SmartCane Warning Logging
# #'
# #' Logs WARN-level messages. Convenience wrapper around safe_log().
# #'
# #' @param message The message to log
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' smartcane_warn("Check data format before proceeding")
# #'
# smartcane_warn <- function(message) {
# safe_log(message, level = "WARN")
# }
#' Extract ISO Year from Date
#'
#' Extracts ISO year from a date using %G format.
#' ISO year can differ from calendar year around year boundaries.
#'
#' @param date A Date object or string convertible to Date
#' @return Numeric: ISO year
#'
#' @examples
#' get_iso_year(as.Date("2025-01-01")) # Returns: 2025
#'
get_iso_year <- function(date) {
as.numeric(format(date, "%G"))
}
# #' Load Field Boundaries from GeoJSON
# #'
# #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
# #' Handles CRS validation and column standardization.
# #'
# #' @param data_dir Directory containing GeoJSON file
# #' @return List with elements:
# #' - field_boundaries_sf: sf (Simple Features) object
# #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
# #'
# #' @details
# #' Automatically selects pivot_2.geojson for ESA project during CI extraction,
# #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
# #'
# #' @examples
# #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
# #' head(boundaries$field_boundaries_sf)
# #'
# load_field_boundaries <- function(data_dir) {
# # Choose field boundaries file based on project and script type
# # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
# # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
# use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
# exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
#' Extract ISO Week and Year as List
#'
#' Combines get_iso_week() and get_iso_year() for convenience.
#'
#' @param date A Date object or string convertible to Date
#' @return List with elements: week (1-53), year
#'
#' @examples
#' wwy <- get_iso_week_year(as.Date("2025-01-15"))
#' # Returns: list(week = 3, year = 2025)
#'
get_iso_week_year <- function(date) {
list(
week = as.numeric(format(date, "%V")),
year = as.numeric(format(date, "%G"))
)
}
# if (use_pivot_2) {
# field_boundaries_path <- file.path(data_dir, "pivot_2.geojson")
# } else {
# field_boundaries_path <- file.path(data_dir, "pivot.geojson")
# }
#' Format Date as Week/Year Label
#'
#' Converts a date into a readable week label format.
#' Useful for filenames, directory names, and output identification.
#'
#' @param date A Date object or string convertible to Date
#' @param separator Separator between week number and year (default: "_")
#' @return String in format "week##_YYYY" (e.g., "week03_2025")
#'
#' @examples
#' format_week_label(as.Date("2025-01-15")) # "week03_2025"
#' format_week_label(as.Date("2025-01-15"), "-") # "week03-2025"
#'
format_week_label <- function(date, separator = "_") {
wwy <- get_iso_week_year(date)
sprintf("week%02d%s%d", wwy$week, separator, wwy$year)
}
# if (!file.exists(field_boundaries_path)) {
# stop(paste("Field boundaries file not found at path:", field_boundaries_path))
# }
#' Load Field Boundaries from GeoJSON
#'
#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
#' Handles CRS validation and column standardization.
#'
#' @param data_dir Directory containing GeoJSON file
#' @return List with elements:
#' - field_boundaries_sf: sf (Simple Features) object
#' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
#'
#' @details
#' Automatically selects pivot_2.geojson for ESA project during CI extraction,
#' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
#'
#' @examples
#' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
#' head(boundaries$field_boundaries_sf)
#'
load_field_boundaries <- function(data_dir) {
# Choose field boundaries file based on project and script type
# ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
# All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
# tryCatch({
# # Read GeoJSON with explicit CRS handling
# field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
if (use_pivot_2) {
field_boundaries_path <- here(data_dir, "pivot_2.geojson")
} else {
field_boundaries_path <- here(data_dir, "Data", "pivot.geojson")
}
# # Remove OBJECTID column immediately if it exists
# if ("OBJECTID" %in% names(field_boundaries_sf)) {
# field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
# }
if (!file.exists(field_boundaries_path)) {
stop(paste("Field boundaries file not found at path:", field_boundaries_path))
}
# # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
# # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
# # to prevent S2 geometry validation errors during downstream processing
# field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
tryCatch({
# Read GeoJSON with explicit CRS handling
field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
# # Validate and fix CRS if needed
# tryCatch({
# # Simply assign WGS84 if not already set (safe approach)
# if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
# st_crs(field_boundaries_sf) <- 4326
# warning("CRS was missing, assigned WGS84 (EPSG:4326)")
# }
# }, error = function(e) {
# tryCatch({
# st_crs(field_boundaries_sf) <<- 4326
# }, error = function(e2) {
# warning(paste("Could not set CRS:", e2$message))
# })
# })
# Remove OBJECTID column immediately if it exists
if ("OBJECTID" %in% names(field_boundaries_sf)) {
field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
}
# # Handle column names - accommodate optional sub_area column
# if ("sub_area" %in% names(field_boundaries_sf)) {
# field_boundaries_sf <- field_boundaries_sf %>%
# dplyr::select(field, sub_field, sub_area) %>%
# sf::st_set_geometry("geometry")
# } else {
# field_boundaries_sf <- field_boundaries_sf %>%
# dplyr::select(field, sub_field) %>%
# sf::st_set_geometry("geometry")
# }
# **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
# This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
# to prevent S2 geometry validation errors during downstream processing
field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
# # Convert to terra vector if possible, otherwise use sf
# field_boundaries <- tryCatch({
# field_boundaries_terra <- terra::vect(field_boundaries_sf)
# crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
# crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
# Validate and fix CRS if needed
tryCatch({
# Simply assign WGS84 if not already set (safe approach)
if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
st_crs(field_boundaries_sf) <- 4326
warning("CRS was missing, assigned WGS84 (EPSG:4326)")
}
}, error = function(e) {
tryCatch({
st_crs(field_boundaries_sf) <<- 4326
}, error = function(e2) {
warning(paste("Could not set CRS:", e2$message))
})
})
# if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
# terra::crs(field_boundaries_terra) <- "EPSG:4326"
# warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
# }
# field_boundaries_terra
# Handle column names - accommodate optional sub_area column
if ("sub_area" %in% names(field_boundaries_sf)) {
field_boundaries_sf <- field_boundaries_sf %>%
dplyr::select(field, sub_field, sub_area) %>%
sf::st_set_geometry("geometry")
} else {
field_boundaries_sf <- field_boundaries_sf %>%
dplyr::select(field, sub_field) %>%
sf::st_set_geometry("geometry")
}
# }, error = function(e) {
# warning(paste("Terra conversion failed, using sf object instead:", e$message))
# field_boundaries_sf
# })
# Convert to terra vector if possible, otherwise use sf
field_boundaries <- tryCatch({
field_boundaries_terra <- terra::vect(field_boundaries_sf)
crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
# return(list(
# field_boundaries_sf = field_boundaries_sf,
# field_boundaries = field_boundaries
# ))
# }, error = function(e) {
# cat("[DEBUG] Error in load_field_boundaries:\n")
# cat(" Message:", e$message, "\n")
# cat(" Call:", deparse(e$call), "\n")
# stop(paste("Error loading field boundaries:", e$message))
# })
# }
if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
terra::crs(field_boundaries_terra) <- "EPSG:4326"
warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
}
field_boundaries_terra
}, error = function(e) {
warning(paste("Terra conversion failed, using sf object instead:", e$message))
field_boundaries_sf
})
return(list(
field_boundaries_sf = field_boundaries_sf,
field_boundaries = field_boundaries
))
}, error = function(e) {
cat("[DEBUG] Error in load_field_boundaries:\n")
cat(" Message:", e$message, "\n")
cat(" Call:", deparse(e$call), "\n")
stop(paste("Error loading field boundaries:", e$message))
})
}
# #' Generate a Sequence of Dates for Processing
# #'
# #' Creates a date range from start_date to end_date and extracts week/year info.
# #' Used by Scripts 20, 30, 40 to determine data processing windows.
# #'
# #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
# #' @param offset Number of days to look back from end_date (e.g., 7 for one week)
# #' @return A list containing:
# #' - week: ISO week number of start_date
# #' - year: ISO year of start_date
# #' - days_filter: Vector of dates in "YYYY-MM-DD" format
# #' - start_date: Start date as Date object
# #' - end_date: End date as Date object
# #'
# #' @details
# #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
# #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
# #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
# #'
# #' @examples
# #' dates <- date_list(as.Date("2025-01-15"), offset = 7)
# #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
# #'
# #' dates <- date_list("2025-12-31", offset = 14)
# #' # Handles string input and returns 14 days of data
# #'
# date_list <- function(end_date, offset) {
# # Input validation
# if (!lubridate::is.Date(end_date)) {
# end_date <- as.Date(end_date)
# if (is.na(end_date)) {
# stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
# }
# }
#' Load Harvesting Data from Excel
#'
#' Loads crop harvest schedule from harvest.xlsx file.
#' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.).
#'
#' @param data_dir Directory containing harvest.xlsx file
#' @return Data frame with columns: field, sub_field, year, season_start, season_end,
#' age (weeks), sub_area, tonnage_ha. Returns NULL if file not found.
#'
#' @examples
#' harvest <- load_harvesting_data("laravel_app/storage/app/angata")
#' head(harvest)
#'
load_harvesting_data <- function(data_dir) {
harvest_file <- here(data_dir, "harvest.xlsx")
# offset <- as.numeric(offset)
# if (is.na(offset) || offset < 1) {
# stop("Invalid offset provided. Expected a positive number.")
# }
if (!file.exists(harvest_file)) {
warning(paste("Harvest data file not found at path:", harvest_file))
return(NULL)
}
# # Calculate date range
# offset <- offset - 1 # Adjust offset to include end_date
# start_date <- end_date - lubridate::days(offset)
# Helper function to parse dates with multiple format detection
parse_flexible_date <- function(x) {
if (is.na(x) || is.null(x)) return(NA_real_)
if (inherits(x, "Date")) return(x)
if (inherits(x, "POSIXct")) return(as.Date(x))
# # Extract ISO week and year information (from END date for reporting period)
# week <- lubridate::isoweek(end_date)
# year <- lubridate::isoyear(end_date)
# If it's numeric (Excel date serial), convert directly
if (is.numeric(x)) {
return(as.Date(x, origin = "1899-12-30"))
}
# # Generate sequence of dates
# days_filter <- seq(from = start_date, to = end_date, by = "day")
# days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
# Try character conversion with multiple formats
x_char <- as.character(x)
formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
# # Log the date range
# safe_log(paste("Date range generated from", start_date, "to", end_date))
for (fmt in formats) {
result <- suppressWarnings(as.Date(x_char, format = fmt))
if (!is.na(result)) return(result)
}
return(NA)
}
tryCatch({
harvesting_data <- read_excel(harvest_file) %>%
dplyr::select(
c(
"field",
"sub_field",
"year",
"season_start",
"season_end",
"age",
"sub_area",
"tonnage_ha"
)
) %>%
mutate(
field = as.character(field),
sub_field = as.character(sub_field),
year = as.numeric(year),
season_start = sapply(season_start, parse_flexible_date),
season_end = sapply(season_end, parse_flexible_date),
season_start = as.Date(season_start, origin = "1970-01-01"),
season_end = as.Date(season_end, origin = "1970-01-01"),
age = as.numeric(age),
sub_area = as.character(sub_area),
tonnage_ha = as.numeric(tonnage_ha)
) %>%
mutate(
season_end = case_when(
season_end > Sys.Date() ~ Sys.Date(),
is.na(season_end) ~ Sys.Date(),
TRUE ~ season_end
),
age = round(as.numeric(season_end - season_start) / 7, 0)
)
return(harvesting_data)
}, error = function(e) {
warning(paste("Error loading harvesting data:", e$message))
return(NULL)
})
}
#' Generate a Sequence of Dates for Processing
#'
#' Creates a date range from start_date to end_date and extracts week/year info.
#' Used by Scripts 20, 30, 40 to determine data processing windows.
#'
#' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
#' @param offset Number of days to look back from end_date (e.g., 7 for one week)
#' @return A list containing:
#' - week: ISO week number of start_date
#' - year: ISO year of start_date
#' - days_filter: Vector of dates in "YYYY-MM-DD" format
#' - start_date: Start date as Date object
#' - end_date: End date as Date object
#'
#' @details
#' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
#' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
#' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
#'
#' @examples
#' dates <- date_list(as.Date("2025-01-15"), offset = 7)
#' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
#'
#' dates <- date_list("2025-12-31", offset = 14)
#' # Handles string input and returns 14 days of data
#'
date_list <- function(end_date, offset) {
# Input validation
if (!lubridate::is.Date(end_date)) {
end_date <- as.Date(end_date)
if (is.na(end_date)) {
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
}
}
offset <- as.numeric(offset)
if (is.na(offset) || offset < 1) {
stop("Invalid offset provided. Expected a positive number.")
}
# Calculate date range
offset <- offset - 1 # Adjust offset to include end_date
start_date <- end_date - lubridate::days(offset)
# Extract ISO week and year information (from END date for reporting period)
week <- lubridate::isoweek(end_date)
year <- lubridate::isoyear(end_date)
# Generate sequence of dates
days_filter <- seq(from = start_date, to = end_date, by = "day")
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
# Log the date range
safe_log(paste("Date range generated from", start_date, "to", end_date))
return(list(
"week" = week,
"year" = year,
"days_filter" = days_filter,
"start_date" = start_date,
"end_date" = end_date
))
}
# return(list(
# "week" = week,
# "year" = year,
# "days_filter" = days_filter,
# "start_date" = start_date,
# "end_date" = end_date
# ))
# }
# ==============================================================================
#' Repair Invalid GeoJSON Geometries
#'
#' Fixes common geometry issues in GeoJSON/sf objects:
#' - Degenerate vertices (duplicate points)
#' - Self-intersecting polygons
#' - Invalid ring orientation
#' - Empty or NULL geometries
#'
#' Uses sf::st_make_valid() with buffer trick as fallback.
#'
#' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
#' @return sf object with repaired geometries
#'
#' @details
#' **Why this matters:**
#' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
#' rings from manual editing or GIS data sources. These cause errors when using
#' S2 geometry (strict validation) during cropping operations.
#'
#' **Repair strategy (priority order):**
#' 1. Try st_make_valid() - GEOS-based repair (most reliable)
#' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
#' 3. Last resort: Silently keep original if repair fails
#'
#' @examples
#' \dontrun{
#' fields <- st_read("pivot.geojson")
#' fields_fixed <- repair_geojson_geometries(fields)
#' cat(paste("Fixed geometries: before=",
#' nrow(fields[!st_is_valid(fields), ]),
#' ", after=",
#' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
#' }
#'
repair_geojson_geometries <- function(sf_object) {
if (!inherits(sf_object, "sf")) {
stop("Input must be an sf (Simple Features) object")
}
# #' Repair Invalid GeoJSON Geometries
# #'
# #' Fixes common geometry issues in GeoJSON/sf objects:
# #' - Degenerate vertices (duplicate points)
# #' - Self-intersecting polygons
# #' - Invalid ring orientation
# #' - Empty or NULL geometries
# #'
# #' Uses sf::st_make_valid() with buffer trick as fallback.
# #'
# #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
# #' @return sf object with repaired geometries
# #'
# #' @details
# #' **Why this matters:**
# #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
# #' rings from manual editing or GIS data sources. These cause errors when using
# #' S2 geometry (strict validation) during cropping operations.
# #'
# #' **Repair strategy (priority order):**
# #' 1. Try st_make_valid() - GEOS-based repair (most reliable)
# #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
# #' 3. Last resort: Silently keep original if repair fails
# #'
# #' @examples
# #' \dontrun{
# #' fields <- st_read("pivot.geojson")
# #' fields_fixed <- repair_geojson_geometries(fields)
# #' cat(paste("Fixed geometries: before=",
# #' nrow(fields[!st_is_valid(fields), ]),
# #' ", after=",
# #' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
# #' }
# #'
# repair_geojson_geometries <- function(sf_object) {
# if (!inherits(sf_object, "sf")) {
# stop("Input must be an sf (Simple Features) object")
# }
# Count invalid geometries BEFORE repair
invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
# # Count invalid geometries BEFORE repair
# invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
if (invalid_before == 0) {
safe_log("All geometries already valid - no repair needed", "INFO")
return(sf_object)
}
# if (invalid_before == 0) {
# safe_log("All geometries already valid - no repair needed", "INFO")
# return(sf_object)
# }
safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
# safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
# STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
# This handles degenerate vertices, self-intersections, invalid rings while preserving all features
repaired <- tryCatch({
# st_make_valid() on entire sf object preserves all features and attributes
repaired_geom <- sf::st_make_valid(sf_object)
# # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
# # This handles degenerate vertices, self-intersections, invalid rings while preserving all features
# repaired <- tryCatch({
# # st_make_valid() on entire sf object preserves all features and attributes
# repaired_geom <- sf::st_make_valid(sf_object)
# Verify we still have the same number of rows
if (nrow(repaired_geom) != nrow(sf_object)) {
warning("st_make_valid() changed number of features - attempting row-wise repair")
# # Verify we still have the same number of rows
# if (nrow(repaired_geom) != nrow(sf_object)) {
# warning("st_make_valid() changed number of features - attempting row-wise repair")
# Fallback: Repair row-by-row to maintain original structure
repaired_geom <- sf_object
for (i in seq_len(nrow(sf_object))) {
tryCatch({
if (!sf::st_is_valid(sf_object[i, ])) {
repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
}
}, error = function(e) {
safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
})
}
}
# # Fallback: Repair row-by-row to maintain original structure
# repaired_geom <- sf_object
# for (i in seq_len(nrow(sf_object))) {
# tryCatch({
# if (!sf::st_is_valid(sf_object[i, ])) {
# repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
# }
# }, error = function(e) {
# safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
# })
# }
# }
safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
repaired_geom
}, error = function(e) {
safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
NULL
})
# safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
# repaired_geom
# }, error = function(e) {
# safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
# NULL
# })
# If repair failed, keep original
if (is.null(repaired)) {
safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
"WARNING")
return(sf_object)
}
# # If repair failed, keep original
# if (is.null(repaired)) {
# safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
# "WARNING")
# return(sf_object)
# }
# Count invalid geometries AFTER repair
invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
# # Count invalid geometries AFTER repair
# invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
# safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
return(repaired)
}
# return(repaired)
# }
# ==============================================================================
# END 00_COMMON_UTILS.R
# # END 00_COMMON_UTILS.R
# ==============================================================================

View file

@ -51,28 +51,49 @@
# ============================================================================
# Spatial data handling
suppressPackageStartupMessages({
library(terra) # For raster operations (reading/writing GeoTIFFs, cropping to field boundaries)
library(sf) # For spatial operations (reading field boundaries GeoJSON, masking)
library(here) # For relative path resolution
})
# ==============================================================================
# LOAD CENTRALIZED PARAMETERS & PATHS
# MAIN PROCESSING FUNCTION
# ==============================================================================
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "00_common_utils.R"))
source(here::here("r_app", "10_create_per_field_tiffs_utils.R"))
# Get project parameter from command line
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
PROJECT <- "angata"
} else {
PROJECT <- args[1]
main <- function() {
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(PROJECT)
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
}, error = function(e) {
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
safe_log(paste("Project:", PROJECT))
# Load Script 10-specific utilities
tryCatch({
source("r_app/10_create_per_field_tiffs_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 10_create_per_field_tiffs_utils.R: %s\n", e$message))
stop(e)
})
# STEP 3: Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) == 0) "angata" else args[1]
# STEP 4: Now all utilities are loaded, proceed with script logic
# Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(project_dir)
safe_log(paste("Project:", project_dir))
safe_log(paste("Base path:", paths$laravel_storage_dir))
safe_log(paste("Data dir:", paths$data_dir))
@ -98,3 +119,11 @@ safe_log(paste("Processing: created =", process_result$total_created,
", errors =", process_result$total_errors), "INFO")
safe_log("Script 10 complete", "INFO")
safe_log("========================================\n", "INFO")
quit(status = 0)
}
# Execute main if called from command line
if (sys.nframe() == 0) {
main()
}

View file

@ -25,52 +25,52 @@ suppressPackageStartupMessages({
library(here)
})
# =============================================================================
# Load utility functions from 20_ci_extraction_utils.R
# =============================================================================
source("r_app/20_ci_extraction_utils.R")
# =============================================================================
# Main Processing
# =============================================================================
main <- function() {
# IMPORTANT: Set working directory to project root (smartcane/)
# This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# Parse command-line arguments
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
# Parse command-line arguments FIRST
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date()
offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
# IMPORTANT: Make project_dir available globally for parameters_project.R
# Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
# Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
}, error = function(e) {
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
# Load CI extraction utilities
tryCatch({
source("r_app/20_ci_extraction_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message))
stop(e)
})
# STEP 3: Now all utilities are loaded, proceed with script logic
safe_log(sprintf("=== Script 20: CI Extraction Per-Field ==="))
safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days",
project_dir, format(end_date, "%Y-%m-%d"), offset))
# 1. Load parameters (includes field boundaries setup)
# ---------------------------------------------------
tryCatch({
source("r_app/parameters_project.R")
safe_log("Loaded parameters_project.R")
}, error = function(e) {
safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
stop(e)
})
# 2. Set up directory paths from parameters FIRST (before using setup$...)
# -----------------------------------------------------------------------
# Set up directory paths from parameters
setup <- setup_project_directories(project_dir)
# 3. Load field boundaries directly from field_boundaries_path in setup
# ------------------------------------------------------------------
# Load field boundaries directly from field_boundaries_path in setup
tryCatch({
field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE)
safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path))
@ -79,17 +79,16 @@ main <- function() {
stop(e)
})
# 4. Get list of dates to process
# Get list of dates to process
dates <- date_list(end_date, offset)
safe_log(sprintf("Processing dates: %s to %s (%d dates)",
dates$start_date, dates$end_date, length(dates$days_filter)))
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
safe_log(sprintf("Output RDS directory: %s", setup$daily_vals_per_field_dir))
safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir))
# 5. Process each field
# ----------------------
# Process each field
if (!dir.exists(setup$field_tiles_dir)) {
safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR")
stop("Script 10 output not found. Run Script 10 first.")
@ -105,14 +104,21 @@ main <- function() {
safe_log(sprintf("Found %d fields to process", length(fields)))
# DEBUG: Check what paths are available in setup
safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", ")))
safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir))
safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir))
# Use daily_ci_vals_dir for per-field daily CI output
# Pre-create output subdirectories for all fields
for (field in fields) {
dir.create(file.path(field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE)
dir.create(file.path(setup$daily_vals_per_field_dir, field), showWarnings = FALSE, recursive = TRUE)
dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE)
if (!is.null(setup$daily_ci_vals_dir)) {
dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE)
}
}
# 6. Process each DATE (OPTIMIZED: load TIFF once, process all fields)
# -----------------------------------------------------------------------
# Process each DATE (OPTIMIZED: load TIFF once, process all fields)
total_success <- 0
total_error <- 0
ci_results_by_date <- list()
@ -124,7 +130,7 @@ main <- function() {
# Find the actual TIFF path (it's in the first field that has it)
input_tif_full <- NULL
for (field in fields) {
candidate_path <- file.path(field_tiles_dir, field, sprintf("%s.tif", date_str))
candidate_path <- file.path(setup$field_tiles_dir, field, sprintf("%s.tif", date_str))
if (file.exists(candidate_path)) {
input_tif_full <- candidate_path
break
@ -142,8 +148,8 @@ main <- function() {
# Now process all fields from this single TIFF
for (field in fields) {
field_ci_path <- file.path(field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(setup$daily_vals_per_field_dir, field)
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
@ -200,8 +206,7 @@ main <- function() {
})
}
# 7. Summary
# ----------
# Summary
safe_log(sprintf("\n=== Processing Complete ==="))
safe_log(sprintf("Successfully processed: %d", total_success))
safe_log(sprintf("Errors encountered: %d", total_error))
@ -209,7 +214,7 @@ main <- function() {
if (total_success > 0) {
safe_log("Output files created in:")
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
safe_log(sprintf(" RDS: %s", setup$daily_vals_per_field_dir))
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))
}
}

View file

@ -1027,30 +1027,16 @@ calc_ci_from_raster <- function(raster_obj) {
#' @return Data frame with field, sub_field, and CI statistics; NULL if field not found
#'
extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) {
# Filter to current field
field_poly <- field_boundaries_sf %>%
filter(field == field_name)
# NOTE: Per-field TIFFs are already cropped to field boundaries by Script 10
# No need to mask again - just extract all valid pixels from the raster
if (nrow(field_poly) == 0) {
safe_log(sprintf("Field '%s' not found in boundaries", field_name), "WARNING")
return(NULL)
}
# Extract CI values by sub_field
results <- list()
# Group by sub_field within this field
for (sub_field in unique(field_poly$sub_field)) {
sub_poly <- field_poly %>% filter(sub_field == sub_field)
ci_sub <- terra::mask(ci_raster, sub_poly)
# Get statistics
ci_values <- terra::values(ci_sub, na.rm = TRUE)
# Extract ALL CI values (no masking needed for pre-cropped per-field TIFFs)
ci_values <- terra::values(ci_raster, na.rm = TRUE)
if (length(ci_values) > 0) {
result_row <- data.frame(
field = field_name,
sub_field = sub_field,
sub_field = field_name, # Use field_name as sub_field since TIFF is already field-specific
ci_mean = mean(ci_values, na.rm = TRUE),
ci_median = median(ci_values, na.rm = TRUE),
ci_sd = sd(ci_values, na.rm = TRUE),
@ -1062,7 +1048,7 @@ extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) {
} else {
result_row <- data.frame(
field = field_name,
sub_field = sub_field,
sub_field = field_name,
ci_mean = NA_real_,
ci_median = NA_real_,
ci_sd = NA_real_,
@ -1072,10 +1058,8 @@ extract_ci_by_subfield <- function(ci_raster, field_boundaries_sf, field_name) {
stringsAsFactors = FALSE
)
}
results[[length(results) + 1]] <- result_row
}
return(dplyr::bind_rows(results))
return(result_row)
}
#' Extract RDS from existing CI TIFF (Migration/Regeneration Mode)

View file

@ -31,7 +31,7 @@
#
# DEPENDENCIES:
# - Packages: tidyverse, lubridate, zoo
# - Utils files: parameters_project.R, 00_common_utils.R
# - Utils files: parameters_project.R
# - Input data: combined_CI_data.rds from Script 20
# - Data directories: extracted_ci/cumulative_vals/
#

View file

@ -272,11 +272,19 @@ calculate_growth_metrics <- function(interpolated_data) {
#' @return Path to the saved file
#'
save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") {
# Validate input
if (is.null(output_dir) || !is.character(output_dir) || length(output_dir) == 0) {
stop("output_dir must be a non-empty character string")
}
# Normalize path separators for Windows compatibility
output_dir <- normalizePath(output_dir, winslash = "/", mustWork = FALSE)
# Create output directory if it doesn't exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Create full file path
file_path <- here::here(output_dir, file_name)
# Create full file path using file.path (more robust than here::here for absolute paths)
file_path <- file.path(output_dir, file_name)
# Save the data
saveRDS(data, file_path)

View file

@ -59,64 +59,74 @@ suppressPackageStartupMessages({
# Data manipulation
library(tidyverse) # For dplyr (data wrangling, grouping, mutating)
library(lubridate) # For date/time operations (date arithmetic, ISO week extraction)
library(readxl) # For reading harvest.xlsx (harvest dates for growth model phases)
})
# =============================================================================
# Load configuration and utility functions
# =============================================================================
source(here::here("r_app", "parameters_project.R"))
source(here::here("r_app", "00_common_utils.R"))
source(here::here("r_app", "30_growth_model_utils.R"))
# =============================================================================
# Main Processing
# MAIN PROCESSING FUNCTION
# =============================================================================
main <- function() {
# IMPORTANT: Set working directory to project root (smartcane/)
# This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# Parse command-line arguments
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
# Parse command-line arguments FIRST
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
# IMPORTANT: Make project_dir available globally for parameters_project.R
# Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
safe_log(sprintf("Project: %s", project_dir))
# 1. Load parameters (includes field boundaries setup)
# ---------------------------------------------------
# Load parameters_project.R (provides setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
safe_log("Loaded parameters_project.R")
}, error = function(e) {
safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
# 2. Set up directory paths from parameters
# -----------------------------------------------
# Load growth model utilities
tryCatch({
source("r_app/30_growth_model_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 30_growth_model_utils.R: %s\n", e$message))
stop(e)
})
# STEP 3: Now all utilities are loaded, proceed with script logic
safe_log(sprintf("=== Script 30: Growth Model Interpolation ==="))
safe_log(sprintf("Project: %s", project_dir))
# Set up directory paths from parameters
setup <- setup_project_directories(project_dir)
# For per-field architecture: read from daily_vals_per_field_dir (Script 20 per-field output)
daily_vals_dir <- setup$daily_vals_per_field_dir
# For per-field architecture: read from daily_ci_vals_dir (Script 20 per-field output)
daily_vals_dir <- setup$daily_ci_vals_dir
safe_log(sprintf("Using per-field daily CI directory: %s", daily_vals_dir))
safe_log("Starting CI growth model interpolation")
# 3. Load and process the data
# ----------------------------
# Load and process the data
tryCatch({
# Load the combined CI data (created by Script 20 per-field)
# Script 20 per-field outputs: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds
CI_data <- load_combined_ci_data(daily_vals_dir)
# Load harvesting data from harvest.xlsx for growth model phase assignment
# Use the centralized load_harvesting_data() function which handles NA season_end values
# by setting them to Sys.Date() (field is still in current growing season)
data_dir <- setup$data_dir
harvesting_data <- tryCatch({
load_harvesting_data(data_dir)
}, error = function(e) {
safe_log(paste("Error loading harvest data:", e$message), "WARNING")
NULL
})
# Validate harvesting data
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
safe_log("No harvesting data available", "ERROR")
@ -146,7 +156,7 @@ main <- function() {
# Save the processed data to cumulative_vals directory
save_growth_model(
CI_all_with_metrics,
setup$cumulative_CI_vals_dir,
setup$cumulative_ci_vals_dir,
"All_pivots_Cumulative_CI_quadrant_year_v2.rds"
)
} else {

View file

@ -33,7 +33,7 @@
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse, lubridate
# - Utils files: parameters_project.R, 00_common_utils.R, 40_mosaic_creation_per_field_utils.R
# - Utils files: parameters_project.R, 40_mosaic_creation_per_field_utils.R
# - Input data: Daily per-field CI TIFFs from Script 20
# - Data directories: field_tiles_CI/, weekly_mosaic/
#

View file

@ -153,8 +153,9 @@ suppressPackageStartupMessages({
})
# ============================================================================
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
# LOAD CONFIGURATION - MUST BE DONE FIRST
# ============================================================================
# Parameters must be loaded early to determine client type and paths
tryCatch({
source(here("r_app", "parameters_project.R"))
@ -162,14 +163,19 @@ tryCatch({
stop("Error loading parameters_project.R: ", e$message)
})
tryCatch({
source(here("r_app", "00_common_utils.R"))
}, error = function(e) {
stop("Error loading 00_common_utils.R: ", e$message)
})
# Get client configuration from global project setup
# NOTE: This cannot be done until parameters_project.R is sourced
# We determine client_type from the current project_dir (if running in main() context)
# For now, set a placeholder that will be overridden in main()
if (exists("project_dir", envir = .GlobalEnv)) {
temp_client_type <- get_client_type(get("project_dir", envir = .GlobalEnv))
} else {
temp_client_type <- "cane_supply" # Safe default
}
temp_client_config <- get_client_kpi_config(temp_client_type)
# ============================================================================
# LOAD CLIENT-AWARE UTILITIES
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
# ============================================================================
# All clients use the common utilities (shared statistical functions, reporting)
tryCatch({
@ -178,25 +184,20 @@ tryCatch({
stop("Error loading 80_utils_common.R: ", e$message)
})
# Client-specific utilities based on client_config$script_90_compatible
# script_90_compatible = TRUE -> AURA workflow (6 KPIs)
# script_90_compatible = FALSE -> CANE_SUPPLY workflow (weekly stats + basic reporting)
if (client_config$script_90_compatible) {
message("Loading AURA client utilities (80_utils_agronomic_support.R)...")
# Load both client-specific utilities (functions will be available for both workflows)
# This avoids needing to determine client type at startup time
message("Loading client-specific utilities (80_utils_agronomic_support.R and 80_utils_cane_supply.R)...")
tryCatch({
source(here("r_app", "80_utils_agronomic_support.R"))
}, error = function(e) {
stop("Error loading 80_utils_agronomic_support.R: ", e$message)
})
} else {
message("Loading CANE_SUPPLY client utilities (80_utils_cane_supply.R)...")
tryCatch({
source(here("r_app", "80_utils_cane_supply.R"))
}, error = function(e) {
stop("Error loading 80_utils_cane_supply.R: ", e$message)
})
}
# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
@ -311,6 +312,9 @@ main <- function() {
client_type <- get_client_type(project_dir)
client_config <- get_client_kpi_config(client_type)
# Assign to global environment so utilities and downstream scripts can access it
assign("client_config", client_config, envir = .GlobalEnv)
message("Client Type:", client_type)
message("KPI Calculations:", paste(client_config$kpi_calculations, collapse = ", "))
message("Output Formats:", paste(client_config$outputs, collapse = ", "))
@ -335,38 +339,49 @@ main <- function() {
message("AURA WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
message(strrep("=", 70))
# Load 80_kpi_utils.R with all 6 KPI functions
# (Note: 80_kpi_utils.R includes all necessary helper functions from crop_messaging_utils.R)
tryCatch({
source(here("r_app", "80_kpi_utils.R"))
}, error = function(e) {
stop("Error loading 80_kpi_utils.R: ", e$message)
})
# Prepare inputs for KPI calculation (already created by setup_project_directories)
reports_dir_kpi <- setup$kpi_reports_dir
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
# Load field boundaries and harvesting data (already loaded by parameters_project.R)
if (!exists("field_boundaries_sf")) {
stop("field_boundaries_sf not loaded. Check parameters_project.R initialization.")
# Load field boundaries for AURA workflow (use data_dir from setup)
message("\nLoading field boundaries for AURA KPI calculation...")
tryCatch({
boundaries_result <- load_field_boundaries(setup$data_dir)
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
field_boundaries_sf <- boundaries_result$field_boundaries_sf
} else {
field_boundaries_sf <- boundaries_result
}
if (nrow(field_boundaries_sf) == 0) {
stop("No fields loaded from boundaries")
}
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
}, error = function(e) {
stop("ERROR loading field boundaries: ", e$message)
})
# Load harvesting data
if (!exists("harvesting_data")) {
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
}
# Calculate all 6 KPIs
# Extract current week/year from end_date
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%G"))
# Call with correct signature
kpi_results <- calculate_all_kpis(
report_date = end_date,
output_dir = reports_dir_kpi,
field_boundaries_sf = field_boundaries_sf,
current_week = current_week,
current_year = current_year,
current_mosaic_dir = setup$weekly_mosaic_dir,
harvesting_data = harvesting_data,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
weekly_CI_mosaic = weekly_mosaic,
reports_dir = reports_dir_kpi,
project_dir = project_dir
ci_rds_path = cumulative_CI_vals_dir,
output_dir = reports_dir_kpi
)
cat("\n=== AURA KPI CALCULATION COMPLETE ===\n")

View file

@ -109,7 +109,12 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
morans_i <- NA_real_
if (!is.null(ci_band)) {
morans_i <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ])
morans_result <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ])
if (is.list(morans_result)) {
morans_i <- morans_result$morans_i
} else {
morans_i <- morans_result
}
}
# Normalize CV (0-1 scale, invert so lower CV = higher score)
@ -332,7 +337,9 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
result$low_ci_percent[field_idx] <- round(low_ci_pct, 2)
result$fragmentation_index[field_idx] <- round(fragmentation, 3)
if (fragmentation > 0.15) {
if (is.na(fragmentation)) {
result$weed_pressure_risk[field_idx] <- "No data"
} else if (fragmentation > 0.15) {
result$weed_pressure_risk[field_idx] <- "High"
} else if (fragmentation > 0.08) {
result$weed_pressure_risk[field_idx] <- "Medium"
@ -354,6 +361,20 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
#'
#' @return Data frame with gap-filling quality metrics
calculate_gap_filling_kpi <- function(ci_rds_path) {
# If ci_rds_path is NULL or not a valid path, return placeholder
if (is.null(ci_rds_path) || !is.character(ci_rds_path) || length(ci_rds_path) == 0) {
return(NULL)
}
# If ci_rds_path is a directory, find the cumulative CI file
if (dir.exists(ci_rds_path)) {
ci_files <- list.files(ci_rds_path, pattern = "^All_pivots.*\\.rds$", full.names = TRUE)
if (length(ci_files) == 0) {
return(NULL)
}
ci_rds_path <- ci_files[1]
}
if (!file.exists(ci_rds_path)) {
return(NULL)
}
@ -425,8 +446,12 @@ create_summary_tables <- function(all_kpis) {
weed_pressure = all_kpis$weed_presence %>%
select(field_idx, fragmentation_index, weed_pressure_risk),
gap_filling = all_kpis$gap_filling %>%
gap_filling = if (!is.null(all_kpis$gap_filling)) {
all_kpis$gap_filling %>%
select(field_idx, na_percent_pre_interpolation, gap_filling_success)
} else {
NULL
}
)
return(kpi_summary)
@ -494,13 +519,13 @@ create_field_kpi_text <- function(all_kpis) {
#'
#' @return List of output file paths
export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) {
kpi_subdir <- file.path(output_dir, "kpis")
if (!dir.exists(kpi_subdir)) {
dir.create(kpi_subdir, recursive = TRUE)
# Ensure output directory exists
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
# Export all KPI tables to a single Excel file
excel_file <- paste0(kpi_subdir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx")
excel_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx")
sheets <- list(
"Uniformity" = as.data.frame(kpi_summary$uniformity),
@ -515,7 +540,7 @@ export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) {
message(paste("✓ AURA KPI data exported to:", excel_file))
# Also export to RDS for programmatic access
rds_file <- paste0(kpi_subdir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds")
rds_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds")
saveRDS(all_kpis, rds_file)
message(paste("✓ AURA KPI RDS exported to:", rds_file))
@ -558,14 +583,14 @@ calculate_all_kpis <- function(
previous_mosaic_dir = NULL,
ci_rds_path = NULL,
harvesting_data = NULL,
output_dir = file.path(PROJECT_DIR, "output")
output_dir = NULL
) {
message("\n============ AURA KPI CALCULATION (6 KPIs) ============")
# Load current week mosaic
message("Loading current week mosaic...")
current_mosaic <- load_weekly_ci_mosaic(current_mosaic_dir, current_week, current_year)
current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir)
if (is.null(current_mosaic)) {
stop("Could not load current week mosaic")
@ -581,7 +606,7 @@ calculate_all_kpis <- function(
if (!is.null(previous_mosaic_dir)) {
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")..."))
previous_mosaic <- load_weekly_ci_mosaic(previous_mosaic_dir, target_prev$week, target_prev$year)
previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir)
if (!is.null(previous_mosaic)) {
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -30,8 +30,8 @@
# ==============================================================================
# *** EDIT THESE VARIABLES ***
end_date <- Sys.Date() # or specify: as.Date("2026-01-27") , Sys.Date()
project_dir <- "angata" # project name: "esa", "aura", "angata", "chemba"
end_date <- as.Date("2026-01-27") # or specify: as.Date("2026-01-27") , Sys.Date()
project_dir <- "aura" # project name: "esa", "aura", "angata", "chemba"
data_source <- "merged_tif" # Standard data source directory
force_rerun <- FALSE # Set to TRUE to force all scripts to run even if outputs exist
# ***************************
@ -51,8 +51,8 @@ cat(sprintf("\nProject: %s → Client Type: %s\n", project_dir, client_type))
# ==============================================================================
# Script 80 (KPIs) needs N weeks of historical data for trend analysis and reporting
# We calculate this automatically based on client type
reporting_weeks_needed <- 1 # Default: KPIs need current week of data for trends
offset <- reporting_weeks_needed * 7 # Convert weeks to days (minimum 7 days for 1 week)
reporting_weeks_needed <- 8 # CRITICAL: Need 8 weeks for 8-week trend analysis (Script 80 requirement)
offset <- reporting_weeks_needed * 7 # Convert weeks to days (8 weeks = 56 days)
cat(sprintf("\n[INFO] Reporting window: %d weeks (%d days of data)\n", reporting_weeks_needed, offset))
wwy_current <- get_iso_week_year(end_date)
@ -176,6 +176,7 @@ if (!dir.exists(kpi_dir)) {
}
# Display status for each week
if (nrow(kpis_needed) > 0) {
for (i in 1:nrow(kpis_needed)) {
row <- kpis_needed[i, ]
cat(sprintf(
@ -185,6 +186,9 @@ for (i in 1:nrow(kpis_needed)) {
row$file_count
))
}
} else {
cat(" (No weeks in reporting window)\n")
}
cat(sprintf(
"\nKPI Summary: %d/%d weeks exist, %d week(s) will be calculated by Script 80\n",
@ -263,8 +267,9 @@ cat(sprintf("Script 40: %d missing week(s) to create\n", nrow(missing_weeks)))
# Check Script 80 outputs (KPIs in reports/kpis/{field_level|field_analysis})
# kpi_dir already set by check_kpi_completeness() above
# Script 80 exports to .xlsx (Excel) and .rds (RDS) formats
kpi_files <- if (dir.exists(kpi_dir)) {
list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
list.files(kpi_dir, pattern = "\\.xlsx$|\\.rds$")
} else {
c()
}
@ -317,6 +322,11 @@ tryCatch(
cat(sprintf("[DEBUG] Sample files: %s\n", paste(head(existing_tiff_files, 3), collapse=", ")))
}
# Find missing dates in the window
start_date <- end_date - data_generation_offset
date_seq <- seq(start_date, end_date, by = "day")
target_dates <- format(date_seq, "%Y-%m-%d")
# Get existing dates from tiles (better indicator of completion for tiled projects)
existing_tile_dates <- tiles_dates
@ -325,14 +335,11 @@ tryCatch(
# We don't download again if the file exists, regardless of whether tiles have been created yet
if (length(existing_tiff_dates) > 0) {
cat(sprintf("[DEBUG] Using TIFF dates for existence check (found %d existing files)\n", length(existing_tiff_dates)))
existing_tile_dates <- existing_tiff_dates
# IMPORTANT: Only consider existing TIFF dates that fall within our target window
# This prevents old 2025 data from masking missing 2026 data
existing_tile_dates <- existing_tiff_dates[existing_tiff_dates %in% target_dates]
}
# Find missing dates in the window
start_date <- end_date - data_generation_offset
date_seq <- seq(start_date, end_date, by = "day")
target_dates <- format(date_seq, "%Y-%m-%d")
# Only download if files don't exist yet (tiles for tiled projects, TIFFs for single-file)
missing_dates <- target_dates[!(target_dates %in% existing_tile_dates)]
@ -394,7 +401,7 @@ if (pipeline_success && !skip_10) {
# Run Script 10 via system() - NEW per-field version
# Arguments: project_dir
cmd <- sprintf(
'"%s" --vanilla r_app/10_create_per_field_tiffs.R "%s"',
'"%s" r_app/10_create_per_field_tiffs.R "%s"',
RSCRIPT_PATH,
project_dir
)
@ -424,6 +431,96 @@ if (pipeline_success && !skip_10) {
cat("\n========== SKIPPING SCRIPT 10 (per-field TIFFs already exist) ==========\n")
}
# ==============================================================================
# CHECK: Per-Field TIFFs Without CI Data
# ==============================================================================
# IMPORTANT: Script 10 creates per-field TIFFs for ALL dates in merged_tif/
# But Script 20 only processes dates within the offset window.
# This check finds dates that have per-field TIFFs but NO CI data,
# and forces Script 20 to process them regardless of offset.
cat("\n========== CHECKING FOR PER-FIELD TIFFs WITHOUT CI DATA ==========\n")
field_tiles_dir <- paths$field_tiles_dir
field_tiles_ci_dir <- paths$field_tiles_ci_dir
ci_daily_dir <- paths$daily_ci_vals_dir
# Get all dates that have per-field TIFFs
tiff_dates_all <- c()
if (dir.exists(field_tiles_dir)) {
# Check all field subdirectories
fields <- list.dirs(field_tiles_dir, full.names = FALSE, recursive = FALSE)
fields <- fields[fields != ""]
if (length(fields) > 0) {
for (field in fields) {
field_path <- file.path(field_tiles_dir, field)
# Get dates from TIFF filenames: YYYY-MM-DD_*.tif or similar
tiff_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}.*\\.tif$")
dates_in_field <- unique(sub("_.*$", "", tiff_files)) # Extract YYYY-MM-DD
tiff_dates_all <- unique(c(tiff_dates_all, dates_in_field))
}
}
}
# Get all dates that have CI data (either from field_tiles_CI or extracted_ci)
ci_dates_all <- c()
if (dir.exists(field_tiles_ci_dir)) {
# Check all field subdirectories for CI TIFFs
fields_ci <- list.dirs(field_tiles_ci_dir, full.names = FALSE, recursive = FALSE)
fields_ci <- fields_ci[fields_ci != ""]
if (length(fields_ci) > 0) {
for (field in fields_ci) {
field_path <- file.path(field_tiles_ci_dir, field)
ci_tiff_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}.*\\.tif$")
dates_in_field <- unique(sub("_.*$", "", ci_tiff_files))
ci_dates_all <- unique(c(ci_dates_all, dates_in_field))
}
}
}
# Also check extracted_ci RDS files as source of truth
if (dir.exists(ci_daily_dir)) {
fields_rds <- list.dirs(ci_daily_dir, full.names = FALSE, recursive = FALSE)
fields_rds <- fields_rds[fields_rds != ""]
if (length(fields_rds) > 0) {
for (field in fields_rds) {
field_path <- file.path(ci_daily_dir, field)
rds_files <- list.files(field_path, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$")
dates_in_field <- sub("\\.rds$", "", rds_files)
ci_dates_all <- unique(c(ci_dates_all, dates_in_field))
}
}
}
# Find dates with TIFFs but no CI data
dates_missing_ci <- setdiff(tiff_dates_all, ci_dates_all)
cat(sprintf("Total per-field TIFF dates: %d\n", length(tiff_dates_all)))
cat(sprintf("Total CI data dates: %d\n", length(ci_dates_all)))
cat(sprintf("Dates with TIFFs but NO CI: %d\n", length(dates_missing_ci)))
# If there are per-field TIFFs without CI, force Script 20 to run with extended date range
if (length(dates_missing_ci) > 0) {
cat("\n⚠ Found per-field TIFFs without CI data - forcing Script 20 to process them\n")
cat(sprintf(" Sample missing dates: %s\n", paste(head(dates_missing_ci, 3), collapse=", ")))
# Calculate extended date range: from earliest missing date to end_date
earliest_missing_tiff <- min(as.Date(dates_missing_ci))
extended_offset <- as.numeric(end_date - earliest_missing_tiff)
cat(sprintf(" Extended offset: %d days (from %s to %s)\n",
extended_offset, format(earliest_missing_tiff, "%Y-%m-%d"), format(end_date, "%Y-%m-%d")))
# Use extended offset for Script 20
offset_for_ci <- extended_offset
skip_20 <- FALSE # Force Script 20 to run
} else {
cat("✓ All per-field TIFFs have corresponding CI data\n")
offset_for_ci <- offset # Use normal offset
}
# ==============================================================================
# SCRIPT 20: CI EXTRACTION
# ==============================================================================
@ -433,11 +530,11 @@ if (pipeline_success && !skip_20) {
{
# Run Script 20 via system() to pass command-line args just like from terminal
# Arguments: project_dir end_date offset
# Use FULL offset so CI extraction covers entire reporting window (not just new data)
# Use offset_for_ci which may have been extended if per-field TIFFs exist without CI
cmd <- sprintf(
'"%s" --vanilla r_app/20_ci_extraction_per_field.R "%s" "%s" %d',
'"%s" r_app/20_ci_extraction_per_field.R "%s" "%s" %d',
RSCRIPT_PATH,
project_dir, format(end_date, "%Y-%m-%d"), offset
project_dir, format(end_date, "%Y-%m-%d"), offset_for_ci
)
result <- system(cmd)
@ -507,7 +604,7 @@ if (pipeline_success && !skip_30) {
# Script 30 expects: project_dir only
# Per-field version reads CI data from Script 20 per-field output location
cmd <- sprintf(
'"%s" --vanilla r_app/30_interpolate_growth_model.R "%s"',
'"%s" r_app/30_interpolate_growth_model.R "%s"',
RSCRIPT_PATH,
project_dir
)
@ -517,11 +614,11 @@ if (pipeline_success && !skip_30) {
stop("Script 30 exited with error code:", result)
}
# Verify interpolated output
growth_dir <- paths$growth_model_interpolated_dir
if (dir.exists(growth_dir)) {
files <- list.files(growth_dir, pattern = "\\.rds$|\\.csv$")
cat(sprintf("✓ Script 30 completed - generated %d growth model files\n", length(files)))
# Verify interpolated output - Script 30 saves to cumulative_ci_vals_dir
cumulative_ci_vals_dir <- paths$cumulative_ci_vals_dir
if (dir.exists(cumulative_ci_vals_dir)) {
files <- list.files(cumulative_ci_vals_dir, pattern = "\\.rds$")
cat(sprintf("✓ Script 30 completed - generated %d interpolated RDS file(s)\n", length(files)))
} else {
cat("✓ Script 30 completed\n")
}
@ -549,12 +646,9 @@ if (pipeline_success && !skip_31) {
if (result == 0) {
# Verify harvest output - check for THIS WEEK's specific file
wwy_current_31 <- get_iso_week_year(end_date)
expected_file <- file.path(
"laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats",
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, wwy_current_31$week, wwy_current_31$year)
)
harvest_exists <- check_harvest_output_exists(project_dir, wwy_current_31$week, wwy_current_31$year)
if (file.exists(expected_file)) {
if (harvest_exists) {
cat(sprintf("✓ Script 31 completed - generated harvest imminent file for week %02d\n", wwy_current_31$week))
} else {
cat("✓ Script 31 completed (check if harvest.xlsx is available)\n")
@ -600,7 +694,7 @@ if (pipeline_success && !skip_40) {
# The end_date is the last day of the week, and offset=7 covers the full 7-day week
# Arguments: end_date offset project_dir
cmd <- sprintf(
'"%s" --vanilla r_app/40_mosaic_creation_per_field.R "%s" 7 "%s"',
'"%s" r_app/40_mosaic_creation_per_field.R "%s" 7 "%s"',
RSCRIPT_PATH,
format(week_end_date, "%Y-%m-%d"), project_dir
)
@ -610,24 +704,9 @@ if (pipeline_success && !skip_40) {
stop("Script 40 exited with error code:", result)
}
# Verify mosaic was created for this specific week
mosaic_created <- FALSE
if (mosaic_mode == "tiled") {
mosaic_dir <- get_mosaic_dir(project_dir, mosaic_mode = "tiled")
if (dir.exists(mosaic_dir)) {
week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
mosaic_files <- list.files(mosaic_dir, pattern = week_pattern)
mosaic_created <- length(mosaic_files) > 0
}
} else {
mosaic_dir <- paths$weekly_mosaic_dir
if (dir.exists(mosaic_dir)) {
week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
# NEW: Support per-field architecture - search recursively for mosaics in field subdirectories
mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE)
mosaic_created <- length(mosaic_files) > 0
}
}
# Verify mosaic was created for this specific week (centralized helper function)
mosaic_check <- check_mosaic_exists(project_dir, week_num, year_num, mosaic_mode)
mosaic_created <- mosaic_check$created
if (mosaic_created) {
cat(sprintf("✓ Week %02d/%d mosaic created successfully\n\n", week_num, year_num))
@ -682,7 +761,7 @@ if (pipeline_success && !skip_80) {
# Run Script 80 for this specific week with offset=7 (one week only)
# This ensures Script 80 calculates KPIs for THIS week with proper trend data
cmd <- sprintf(
'"%s" --vanilla r_app/80_calculate_kpis.R "%s" "%s" %d',
'"%s" r_app/80_calculate_kpis.R "%s" "%s" %d',
RSCRIPT_PATH,
format(calc_date, "%Y-%m-%d"), project_dir, 7
) # offset=7 for single week
@ -692,7 +771,7 @@ if (pipeline_success && !skip_80) {
week_row$week, week_row$year, format(calc_date, "%Y-%m-%d")
))
result <- system(cmd, ignore.stdout = TRUE, ignore.stderr = TRUE)
result <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE)
if (result == 0) {
cat(sprintf(" ✓ KPIs calculated for week %02d/%d\n", week_row$week, week_row$year))
@ -706,7 +785,7 @@ if (pipeline_success && !skip_80) {
# Verify total KPI output (kpi_dir defined by check_kpi_completeness() earlier)
if (dir.exists(kpi_dir)) {
files <- list.files(kpi_dir, pattern = "\\.csv$|\\.json$")
files <- list.files(kpi_dir, pattern = "\\.xlsx$|\\.rds$")
# Extract subdir name from kpi_dir path for display
subdir_name <- basename(kpi_dir)
cat(sprintf("\n✓ Script 80 loop completed - total %d KPI files in %s/\n", length(files), subdir_name))
@ -739,12 +818,15 @@ if (dir.exists(kpi_dir)) {
week_num <- as.numeric(format(check_date, "%V"))
year_num <- as.numeric(format(check_date, "%G"))
# Check for any KPI file from that week
week_pattern <- sprintf("week%02d_%d", week_num, year_num)
# Check for any KPI file from that week (flexible pattern to match all formats)
# Matches: week_05_2026, AURA_KPI_week_05_2026, etc.
week_pattern <- sprintf("_week_%02d_%d|week_%02d_%d", week_num, year_num, week_num, year_num)
# NEW: Support per-field architecture - search recursively for KPI files in field subdirectories
kpi_files_this_week <- list.files(kpi_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE)
if (length(kpi_files_this_week) == 0) {
if (length(kpi_files_this_week) > 0) {
cat(sprintf(" Week %02d/%d: ✓ KPIs found (%d files)\n", week_num, year_num, length(kpi_files_this_week)))
} else {
kpis_complete <- FALSE
cat(sprintf(" Week %02d/%d: ✗ KPIs not found\n", week_num, year_num))
}
@ -752,9 +834,9 @@ if (dir.exists(kpi_dir)) {
}
if (kpis_complete) {
cat("✓ All KPIs available - reports can be generated\n")
cat("✓ All KPIs available - full reporting window complete\n")
} else {
cat("Some KPIs still missing - reports will be skipped\n")
cat("Note: Some KPIs may still be missing - Script 80 calculated what was available\n")
}
# ==============================================================================
@ -763,9 +845,6 @@ if (kpis_complete) {
if (pipeline_success && run_legacy_report) {
cat("\n========== RUNNING SCRIPT 90: LEGACY WORD REPORT ==========\n")
if (!kpis_complete) {
cat("⚠ Skipping Script 90 - KPIs not available for full reporting window\n")
} else {
tryCatch(
{
# Script 90 is an RMarkdown file - compile it with rmarkdown::render()
@ -798,9 +877,8 @@ if (pipeline_success && run_legacy_report) {
pipeline_success <<- FALSE
}
)
}
} else if (run_legacy_report) {
cat("\n========== SKIPPING SCRIPT 90 (pipeline error or KPIs incomplete) ==========\n")
cat("\n========== SKIPPING SCRIPT 90 (pipeline error) ==========\n")
}
# ==============================================================================
@ -809,9 +887,6 @@ if (pipeline_success && run_legacy_report) {
if (pipeline_success && run_modern_report) {
cat("\n========== RUNNING SCRIPT 91: MODERN WORD REPORT ==========\n")
if (!kpis_complete) {
cat("⚠ Skipping Script 91 - KPIs not available for full reporting window\n")
} else {
tryCatch(
{
# Script 91 is an RMarkdown file - compile it with rmarkdown::render()
@ -844,9 +919,8 @@ if (pipeline_success && run_modern_report) {
pipeline_success <<- FALSE
}
)
}
} else if (run_modern_report) {
cat("\n========== SKIPPING SCRIPT 91 (pipeline error or KPIs incomplete) ==========\n")
cat("\n========== SKIPPING SCRIPT 91 (pipeline error) ==========\n")
}
# ==============================================================================