SmartCane/r_app/00_common_utils.R

357 lines
14 KiB
R

# ==============================================================================
# # 00_COMMON_UTILS.R
# ==============================================================================
# # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
# #
# # PURPOSE:
# # Centralized location for foundational utilities used across multiple scripts.
# # These functions have NO project knowledge, NO client-type dependencies,
# # NO domain-specific logic.
# #
# # USAGE:
# # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
# #
# # source(here::here("r_app", "parameters_project.R")) # Config first
# # source(here::here("r_app", "00_common_utils.R")) # Then common utilities
# #
# # FUNCTIONS:
# # 1. safe_log() — Generic logging with [LEVEL] prefix
# # 2. smartcane_debug() — Conditional debug logging
# # 3. smartcane_warn() — Convenience wrapper for WARN-level messages
# # 4. date_list() — Generate date sequences for processing windows
# # 5. load_field_boundaries() — Load field geometries from GeoJSON
# # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects
# #
# # DATE FUNCTIONS (now in parameters_project.R):
# # - get_iso_week() — Extract ISO week number from date
# # - get_iso_year() — Extract ISO year from date
# # - get_iso_week_year() — Extract both ISO week and year as list
# # - format_week_label() — Format date as week/year label
# # - load_harvesting_data() — Load harvest schedule from Excel
# #
# ==============================================================================
# # Source parameters first to get shared date utility functions
# source("parameters_project.R")
# #' Safe Logging Function
# #'
# #' Generic logging with [LEVEL] prefix. Works standalone without any framework.
# #' Consistent with SmartCane logging standard.
# #'
# #' @param message The message to log
# #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' safe_log("Processing started", "INFO")
# #' safe_log("Check input file", "WARNING")
# #' safe_log("Failed to load data", "ERROR")
# #'
# safe_log <- function(message, level = "INFO") {
# prefix <- sprintf("[%s]", level)
# cat(sprintf("%s %s\n", prefix, message))
# }
# #' SmartCane Debug Logging (Conditional)
# #'
# #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
# #' Useful for development/troubleshooting without cluttering normal output.
# #'
# #' @param message The message to log
# #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
# #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
# #'
# smartcane_debug <- function(message, verbose = FALSE) {
# if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
# return(invisible(NULL))
# }
# safe_log(message, level = "DEBUG")
# }
# #' SmartCane Warning Logging
# #'
# #' Logs WARN-level messages. Convenience wrapper around safe_log().
# #'
# #' @param message The message to log
# #' @return NULL (invisible, used for side effects)
# #'
# #' @examples
# #' smartcane_warn("Check data format before proceeding")
# #'
# smartcane_warn <- function(message) {
# safe_log(message, level = "WARN")
# }
# #' Load Field Boundaries from GeoJSON
# #'
# #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
# #' Handles CRS validation and column standardization.
# #'
# #' @param data_dir Directory containing GeoJSON file
# #' @return List with elements:
# #' - field_boundaries_sf: sf (Simple Features) object
# #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
# #'
# #' @details
# #' Automatically selects pivot_2.geojson for ESA project during CI extraction,
# #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
# #'
# #' @examples
# #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
# #' head(boundaries$field_boundaries_sf)
# #'
# load_field_boundaries <- function(data_dir) {
# # Choose field boundaries file based on project and script type
# # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
# # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
# use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
# exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
# if (use_pivot_2) {
# field_boundaries_path <- file.path(data_dir, "pivot_2.geojson")
# } else {
# field_boundaries_path <- file.path(data_dir, "pivot.geojson")
# }
# if (!file.exists(field_boundaries_path)) {
# stop(paste("Field boundaries file not found at path:", field_boundaries_path))
# }
# tryCatch({
# # Read GeoJSON with explicit CRS handling
# field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
# # Remove OBJECTID column immediately if it exists
# if ("OBJECTID" %in% names(field_boundaries_sf)) {
# field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
# }
# # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
# # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
# # to prevent S2 geometry validation errors during downstream processing
# field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
# # Validate and fix CRS if needed
# tryCatch({
# # Simply assign WGS84 if not already set (safe approach)
# if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
# st_crs(field_boundaries_sf) <- 4326
# warning("CRS was missing, assigned WGS84 (EPSG:4326)")
# }
# }, error = function(e) {
# tryCatch({
# st_crs(field_boundaries_sf) <<- 4326
# }, error = function(e2) {
# warning(paste("Could not set CRS:", e2$message))
# })
# })
# # Handle column names - accommodate optional sub_area column
# if ("sub_area" %in% names(field_boundaries_sf)) {
# field_boundaries_sf <- field_boundaries_sf %>%
# dplyr::select(field, sub_field, sub_area) %>%
# sf::st_set_geometry("geometry")
# } else {
# field_boundaries_sf <- field_boundaries_sf %>%
# dplyr::select(field, sub_field) %>%
# sf::st_set_geometry("geometry")
# }
# # Convert to terra vector if possible, otherwise use sf
# field_boundaries <- tryCatch({
# field_boundaries_terra <- terra::vect(field_boundaries_sf)
# crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
# crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
# if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
# terra::crs(field_boundaries_terra) <- "EPSG:4326"
# warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
# }
# field_boundaries_terra
# }, error = function(e) {
# warning(paste("Terra conversion failed, using sf object instead:", e$message))
# field_boundaries_sf
# })
# return(list(
# field_boundaries_sf = field_boundaries_sf,
# field_boundaries = field_boundaries
# ))
# }, error = function(e) {
# cat("[DEBUG] Error in load_field_boundaries:\n")
# cat(" Message:", e$message, "\n")
# cat(" Call:", deparse(e$call), "\n")
# stop(paste("Error loading field boundaries:", e$message))
# })
# }
# #' Generate a Sequence of Dates for Processing
# #'
# #' Creates a date range from start_date to end_date and extracts week/year info.
# #' Used by Scripts 20, 30, 40 to determine data processing windows.
# #'
# #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
# #' @param offset Number of days to look back from end_date (e.g., 7 for one week)
# #' @return A list containing:
# #' - week: ISO week number of start_date
# #' - year: ISO year of start_date
# #' - days_filter: Vector of dates in "YYYY-MM-DD" format
# #' - start_date: Start date as Date object
# #' - end_date: End date as Date object
# #'
# #' @details
# #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
# #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
# #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
# #'
# #' @examples
# #' dates <- date_list(as.Date("2025-01-15"), offset = 7)
# #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
# #'
# #' dates <- date_list("2025-12-31", offset = 14)
# #' # Handles string input and returns 14 days of data
# #'
# date_list <- function(end_date, offset) {
# # Input validation
# if (!lubridate::is.Date(end_date)) {
# end_date <- as.Date(end_date)
# if (is.na(end_date)) {
# stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
# }
# }
# offset <- as.numeric(offset)
# if (is.na(offset) || offset < 1) {
# stop("Invalid offset provided. Expected a positive number.")
# }
# # Calculate date range
# offset <- offset - 1 # Adjust offset to include end_date
# start_date <- end_date - lubridate::days(offset)
# # Extract ISO week and year information (from END date for reporting period)
# week <- lubridate::isoweek(end_date)
# year <- lubridate::isoyear(end_date)
# # Generate sequence of dates
# days_filter <- seq(from = start_date, to = end_date, by = "day")
# days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
# # Log the date range
# safe_log(paste("Date range generated from", start_date, "to", end_date))
# return(list(
# "week" = week,
# "year" = year,
# "days_filter" = days_filter,
# "start_date" = start_date,
# "end_date" = end_date
# ))
# }
# ==============================================================================
# #' Repair Invalid GeoJSON Geometries
# #'
# #' Fixes common geometry issues in GeoJSON/sf objects:
# #' - Degenerate vertices (duplicate points)
# #' - Self-intersecting polygons
# #' - Invalid ring orientation
# #' - Empty or NULL geometries
# #'
# #' Uses sf::st_make_valid() with buffer trick as fallback.
# #'
# #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
# #' @return sf object with repaired geometries
# #'
# #' @details
# #' **Why this matters:**
# #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
# #' rings from manual editing or GIS data sources. These cause errors when using
# #' S2 geometry (strict validation) during cropping operations.
# #'
# #' **Repair strategy (priority order):**
# #' 1. Try st_make_valid() - GEOS-based repair (most reliable)
# #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
# #' 3. Last resort: Silently keep original if repair fails
# #'
# #' @examples
# #' \dontrun{
# #' fields <- st_read("pivot.geojson")
# #' fields_fixed <- repair_geojson_geometries(fields)
# #' cat(paste("Fixed geometries: before=",
# #' nrow(fields[!st_is_valid(fields), ]),
# #' ", after=",
# #' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
# #' }
# #'
# repair_geojson_geometries <- function(sf_object) {
# if (!inherits(sf_object, "sf")) {
# stop("Input must be an sf (Simple Features) object")
# }
# # Count invalid geometries BEFORE repair
# invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
# if (invalid_before == 0) {
# safe_log("All geometries already valid - no repair needed", "INFO")
# return(sf_object)
# }
# safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
# # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
# # This handles degenerate vertices, self-intersections, invalid rings while preserving all features
# repaired <- tryCatch({
# # st_make_valid() on entire sf object preserves all features and attributes
# repaired_geom <- sf::st_make_valid(sf_object)
# # Verify we still have the same number of rows
# if (nrow(repaired_geom) != nrow(sf_object)) {
# warning("st_make_valid() changed number of features - attempting row-wise repair")
# # Fallback: Repair row-by-row to maintain original structure
# repaired_geom <- sf_object
# for (i in seq_len(nrow(sf_object))) {
# tryCatch({
# if (!sf::st_is_valid(sf_object[i, ])) {
# repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
# }
# }, error = function(e) {
# safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
# })
# }
# }
# safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
# repaired_geom
# }, error = function(e) {
# safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
# NULL
# })
# # If repair failed, keep original
# if (is.null(repaired)) {
# safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
# "WARNING")
# return(sf_object)
# }
# # Count invalid geometries AFTER repair
# invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
# safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
# return(repaired)
# }
# ==============================================================================
# # END 00_COMMON_UTILS.R
# ==============================================================================