# ============================================================================== # # 00_COMMON_UTILS.R # ============================================================================== # # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE # # # # PURPOSE: # # Centralized location for foundational utilities used across multiple scripts. # # These functions have NO project knowledge, NO client-type dependencies, # # NO domain-specific logic. # # # # USAGE: # # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file: # # # # source(here::here("r_app", "parameters_project.R")) # Config first # # source(here::here("r_app", "00_common_utils.R")) # Then common utilities # # # # FUNCTIONS: # # 1. safe_log() — Generic logging with [LEVEL] prefix # # 2. smartcane_debug() — Conditional debug logging # # 3. smartcane_warn() — Convenience wrapper for WARN-level messages # # 4. date_list() — Generate date sequences for processing windows # # 5. load_field_boundaries() — Load field geometries from GeoJSON # # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects # # # # DATE FUNCTIONS (now in parameters_project.R): # # - get_iso_week() — Extract ISO week number from date # # - get_iso_year() — Extract ISO year from date # # - get_iso_week_year() — Extract both ISO week and year as list # # - format_week_label() — Format date as week/year label # # - load_harvesting_data() — Load harvest schedule from Excel # # # ============================================================================== # # Source parameters first to get shared date utility functions # source("parameters_project.R") # #' Safe Logging Function # #' # #' Generic logging with [LEVEL] prefix. Works standalone without any framework. # #' Consistent with SmartCane logging standard. # #' # #' @param message The message to log # #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG" # #' @return NULL (invisible, used for side effects) # #' # #' @examples # #' safe_log("Processing started", "INFO") # #' safe_log("Check input file", "WARNING") # #' safe_log("Failed to load data", "ERROR") # #' # safe_log <- function(message, level = "INFO") { # prefix <- sprintf("[%s]", level) # cat(sprintf("%s %s\n", prefix, message)) # } # #' SmartCane Debug Logging (Conditional) # #' # #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set. # #' Useful for development/troubleshooting without cluttering normal output. # #' # #' @param message The message to log # #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE) # #' @return NULL (invisible, used for side effects) # #' # #' @examples # #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE # #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs # #' # smartcane_debug <- function(message, verbose = FALSE) { # if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") { # return(invisible(NULL)) # } # safe_log(message, level = "DEBUG") # } # #' SmartCane Warning Logging # #' # #' Logs WARN-level messages. Convenience wrapper around safe_log(). # #' # #' @param message The message to log # #' @return NULL (invisible, used for side effects) # #' # #' @examples # #' smartcane_warn("Check data format before proceeding") # #' # smartcane_warn <- function(message) { # safe_log(message, level = "WARN") # } # #' Load Field Boundaries from GeoJSON # #' # #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson). # #' Handles CRS validation and column standardization. # #' # #' @param data_dir Directory containing GeoJSON file # #' @return List with elements: # #' - field_boundaries_sf: sf (Simple Features) object # #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback) # #' # #' @details # #' Automatically selects pivot_2.geojson for ESA project during CI extraction, # #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries. # #' # #' @examples # #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata") # #' head(boundaries$field_boundaries_sf) # #' # load_field_boundaries <- function(data_dir) { # # Choose field boundaries file based on project and script type # # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model) # # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson # use_pivot_2 <- exists("project_dir") && project_dir == "esa" && # exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03 # if (use_pivot_2) { # field_boundaries_path <- file.path(data_dir, "pivot_2.geojson") # } else { # field_boundaries_path <- file.path(data_dir, "pivot.geojson") # } # if (!file.exists(field_boundaries_path)) { # stop(paste("Field boundaries file not found at path:", field_boundaries_path)) # } # tryCatch({ # # Read GeoJSON with explicit CRS handling # field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE) # # Remove OBJECTID column immediately if it exists # if ("OBJECTID" %in% names(field_boundaries_sf)) { # field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) # } # # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.) # # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.) # # to prevent S2 geometry validation errors during downstream processing # field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf) # # Validate and fix CRS if needed # tryCatch({ # # Simply assign WGS84 if not already set (safe approach) # if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) { # st_crs(field_boundaries_sf) <- 4326 # warning("CRS was missing, assigned WGS84 (EPSG:4326)") # } # }, error = function(e) { # tryCatch({ # st_crs(field_boundaries_sf) <<- 4326 # }, error = function(e2) { # warning(paste("Could not set CRS:", e2$message)) # }) # }) # # Handle column names - accommodate optional sub_area column # if ("sub_area" %in% names(field_boundaries_sf)) { # field_boundaries_sf <- field_boundaries_sf %>% # dplyr::select(field, sub_field, sub_area) %>% # sf::st_set_geometry("geometry") # } else { # field_boundaries_sf <- field_boundaries_sf %>% # dplyr::select(field, sub_field) %>% # sf::st_set_geometry("geometry") # } # # Convert to terra vector if possible, otherwise use sf # field_boundaries <- tryCatch({ # field_boundaries_terra <- terra::vect(field_boundaries_sf) # crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL) # crs_str <- if (!is.null(crs_value)) as.character(crs_value) else "" # if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) { # terra::crs(field_boundaries_terra) <- "EPSG:4326" # warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)") # } # field_boundaries_terra # }, error = function(e) { # warning(paste("Terra conversion failed, using sf object instead:", e$message)) # field_boundaries_sf # }) # return(list( # field_boundaries_sf = field_boundaries_sf, # field_boundaries = field_boundaries # )) # }, error = function(e) { # cat("[DEBUG] Error in load_field_boundaries:\n") # cat(" Message:", e$message, "\n") # cat(" Call:", deparse(e$call), "\n") # stop(paste("Error loading field boundaries:", e$message)) # }) # } # #' Generate a Sequence of Dates for Processing # #' # #' Creates a date range from start_date to end_date and extracts week/year info. # #' Used by Scripts 20, 30, 40 to determine data processing windows. # #' # #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string) # #' @param offset Number of days to look back from end_date (e.g., 7 for one week) # #' @return A list containing: # #' - week: ISO week number of start_date # #' - year: ISO year of start_date # #' - days_filter: Vector of dates in "YYYY-MM-DD" format # #' - start_date: Start date as Date object # #' - end_date: End date as Date object # #' # #' @details # #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return # #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations, # #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead. # #' # #' @examples # #' dates <- date_list(as.Date("2025-01-15"), offset = 7) # #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15") # #' # #' dates <- date_list("2025-12-31", offset = 14) # #' # Handles string input and returns 14 days of data # #' # date_list <- function(end_date, offset) { # # Input validation # if (!lubridate::is.Date(end_date)) { # end_date <- as.Date(end_date) # if (is.na(end_date)) { # stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.") # } # } # offset <- as.numeric(offset) # if (is.na(offset) || offset < 1) { # stop("Invalid offset provided. Expected a positive number.") # } # # Calculate date range # offset <- offset - 1 # Adjust offset to include end_date # start_date <- end_date - lubridate::days(offset) # # Extract ISO week and year information (from END date for reporting period) # week <- lubridate::isoweek(end_date) # year <- lubridate::isoyear(end_date) # # Generate sequence of dates # days_filter <- seq(from = start_date, to = end_date, by = "day") # days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering # # Log the date range # safe_log(paste("Date range generated from", start_date, "to", end_date)) # return(list( # "week" = week, # "year" = year, # "days_filter" = days_filter, # "start_date" = start_date, # "end_date" = end_date # )) # } # ============================================================================== # #' Repair Invalid GeoJSON Geometries # #' # #' Fixes common geometry issues in GeoJSON/sf objects: # #' - Degenerate vertices (duplicate points) # #' - Self-intersecting polygons # #' - Invalid ring orientation # #' - Empty or NULL geometries # #' # #' Uses sf::st_make_valid() with buffer trick as fallback. # #' # #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries # #' @return sf object with repaired geometries # #' # #' @details # #' **Why this matters:** # #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting # #' rings from manual editing or GIS data sources. These cause errors when using # #' S2 geometry (strict validation) during cropping operations. # #' # #' **Repair strategy (priority order):** # #' 1. Try st_make_valid() - GEOS-based repair (most reliable) # #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity # #' 3. Last resort: Silently keep original if repair fails # #' # #' @examples # #' \dontrun{ # #' fields <- st_read("pivot.geojson") # #' fields_fixed <- repair_geojson_geometries(fields) # #' cat(paste("Fixed geometries: before=", # #' nrow(fields[!st_is_valid(fields), ]), # #' ", after=", # #' nrow(fields_fixed[!st_is_valid(fields_fixed), ]))) # #' } # #' # repair_geojson_geometries <- function(sf_object) { # if (!inherits(sf_object, "sf")) { # stop("Input must be an sf (Simple Features) object") # } # # Count invalid geometries BEFORE repair # invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE) # if (invalid_before == 0) { # safe_log("All geometries already valid - no repair needed", "INFO") # return(sf_object) # } # safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING") # # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS) # # This handles degenerate vertices, self-intersections, invalid rings while preserving all features # repaired <- tryCatch({ # # st_make_valid() on entire sf object preserves all features and attributes # repaired_geom <- sf::st_make_valid(sf_object) # # Verify we still have the same number of rows # if (nrow(repaired_geom) != nrow(sf_object)) { # warning("st_make_valid() changed number of features - attempting row-wise repair") # # Fallback: Repair row-by-row to maintain original structure # repaired_geom <- sf_object # for (i in seq_len(nrow(sf_object))) { # tryCatch({ # if (!sf::st_is_valid(sf_object[i, ])) { # repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ]) # } # }, error = function(e) { # safe_log(paste("Could not repair row", i, "-", e$message), "WARNING") # }) # } # } # safe_log("✓ st_make_valid() successfully repaired geometries", "INFO") # repaired_geom # }, error = function(e) { # safe_log(paste("st_make_valid() failed:", e$message), "WARNING") # NULL # }) # # If repair failed, keep original # if (is.null(repaired)) { # safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"), # "WARNING") # return(sf_object) # } # # Count invalid geometries AFTER repair # invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE) # safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO") # return(repaired) # } # ============================================================================== # # END 00_COMMON_UTILS.R # ==============================================================================