# ============================================================================== # 00_COMMON_UTILS.R # ============================================================================== # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE # # PURPOSE: # Centralized location for foundational utilities used across multiple scripts. # These functions have NO project knowledge, NO client-type dependencies, # NO domain-specific logic. # # USAGE: # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file: # # source(here::here("r_app", "parameters_project.R")) # Config first # source(here::here("r_app", "00_common_utils.R")) # Then common utilities # # FUNCTIONS: # 1. safe_log() — Generic logging with [LEVEL] prefix # 2. smartcane_debug() — Conditional debug logging # 3. smartcane_warn() — Convenience wrapper for WARN-level messages # 4. date_list() — Generate date sequences for processing windows # 5. get_iso_week() — Extract ISO week number from date # 6. get_iso_year() — Extract ISO year from date # 7. get_iso_week_year() — Extract both ISO week and year as list # 8. format_week_label() — Format date as week/year label (e.g., "week01_2025") # 9. load_field_boundaries() — Load field geometries from GeoJSON # 10. load_harvesting_data() — Load harvest schedule from Excel # # ============================================================================== #' Safe Logging Function #' #' Generic logging with [LEVEL] prefix. Works standalone without any framework. #' Consistent with SmartCane logging standard. #' #' @param message The message to log #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG" #' @return NULL (invisible, used for side effects) #' #' @examples #' safe_log("Processing started", "INFO") #' safe_log("Check input file", "WARNING") #' safe_log("Failed to load data", "ERROR") #' safe_log <- function(message, level = "INFO") { prefix <- sprintf("[%s]", level) cat(sprintf("%s %s\n", prefix, message)) } #' SmartCane Debug Logging (Conditional) #' #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set. #' Useful for development/troubleshooting without cluttering normal output. #' #' @param message The message to log #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE) #' @return NULL (invisible, used for side effects) #' #' @examples #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs #' smartcane_debug <- function(message, verbose = FALSE) { if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") { return(invisible(NULL)) } safe_log(message, level = "DEBUG") } #' SmartCane Warning Logging #' #' Logs WARN-level messages. Convenience wrapper around safe_log(). #' #' @param message The message to log #' @return NULL (invisible, used for side effects) #' #' @examples #' smartcane_warn("Check data format before proceeding") #' smartcane_warn <- function(message) { safe_log(message, level = "WARN") } #' Extract ISO Week Number from Date #' #' Extracts ISO week number (1-53) from a date using %V format. #' ISO weeks follow the international standard: Week 1 starts on Monday. #' #' @param date A Date object or string convertible to Date #' @return Numeric: ISO week number (1-53) #' #' @examples #' get_iso_week(as.Date("2025-01-15")) # Returns: 3 #' get_iso_week <- function(date) { as.numeric(format(date, "%V")) } #' Extract ISO Year from Date #' #' Extracts ISO year from a date using %G format. #' ISO year can differ from calendar year around year boundaries. #' #' @param date A Date object or string convertible to Date #' @return Numeric: ISO year #' #' @examples #' get_iso_year(as.Date("2025-01-01")) # Returns: 2025 #' get_iso_year <- function(date) { as.numeric(format(date, "%G")) } #' Extract ISO Week and Year as List #' #' Combines get_iso_week() and get_iso_year() for convenience. #' #' @param date A Date object or string convertible to Date #' @return List with elements: week (1-53), year #' #' @examples #' wwy <- get_iso_week_year(as.Date("2025-01-15")) #' # Returns: list(week = 3, year = 2025) #' get_iso_week_year <- function(date) { list( week = as.numeric(format(date, "%V")), year = as.numeric(format(date, "%G")) ) } #' Format Date as Week/Year Label #' #' Converts a date into a readable week label format. #' Useful for filenames, directory names, and output identification. #' #' @param date A Date object or string convertible to Date #' @param separator Separator between week number and year (default: "_") #' @return String in format "week##_YYYY" (e.g., "week03_2025") #' #' @examples #' format_week_label(as.Date("2025-01-15")) # "week03_2025" #' format_week_label(as.Date("2025-01-15"), "-") # "week03-2025" #' format_week_label <- function(date, separator = "_") { wwy <- get_iso_week_year(date) sprintf("week%02d%s%d", wwy$week, separator, wwy$year) } #' Load Field Boundaries from GeoJSON #' #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson). #' Handles CRS validation and column standardization. #' #' @param data_dir Directory containing GeoJSON file #' @return List with elements: #' - field_boundaries_sf: sf (Simple Features) object #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback) #' #' @details #' Automatically selects pivot_2.geojson for ESA project during CI extraction, #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries. #' #' @examples #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata") #' head(boundaries$field_boundaries_sf) #' load_field_boundaries <- function(data_dir) { # Choose field boundaries file based on project and script type # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model) # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson use_pivot_2 <- exists("project_dir") && project_dir == "esa" && exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03 if (use_pivot_2) { field_boundaries_path <- here(data_dir, "pivot_2.geojson") } else { field_boundaries_path <- here(data_dir, "Data", "pivot.geojson") } if (!file.exists(field_boundaries_path)) { stop(paste("Field boundaries file not found at path:", field_boundaries_path)) } tryCatch({ # Read GeoJSON with explicit CRS handling field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE) # Remove OBJECTID column immediately if it exists if ("OBJECTID" %in% names(field_boundaries_sf)) { field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) } # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.) # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.) # to prevent S2 geometry validation errors during downstream processing field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf) # Validate and fix CRS if needed tryCatch({ # Simply assign WGS84 if not already set (safe approach) if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) { st_crs(field_boundaries_sf) <- 4326 warning("CRS was missing, assigned WGS84 (EPSG:4326)") } }, error = function(e) { tryCatch({ st_crs(field_boundaries_sf) <<- 4326 }, error = function(e2) { warning(paste("Could not set CRS:", e2$message)) }) }) # Handle column names - accommodate optional sub_area column if ("sub_area" %in% names(field_boundaries_sf)) { field_boundaries_sf <- field_boundaries_sf %>% dplyr::select(field, sub_field, sub_area) %>% sf::st_set_geometry("geometry") } else { field_boundaries_sf <- field_boundaries_sf %>% dplyr::select(field, sub_field) %>% sf::st_set_geometry("geometry") } # Convert to terra vector if possible, otherwise use sf field_boundaries <- tryCatch({ field_boundaries_terra <- terra::vect(field_boundaries_sf) crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL) crs_str <- if (!is.null(crs_value)) as.character(crs_value) else "" if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) { terra::crs(field_boundaries_terra) <- "EPSG:4326" warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)") } field_boundaries_terra }, error = function(e) { warning(paste("Terra conversion failed, using sf object instead:", e$message)) field_boundaries_sf }) return(list( field_boundaries_sf = field_boundaries_sf, field_boundaries = field_boundaries )) }, error = function(e) { cat("[DEBUG] Error in load_field_boundaries:\n") cat(" Message:", e$message, "\n") cat(" Call:", deparse(e$call), "\n") stop(paste("Error loading field boundaries:", e$message)) }) } #' Load Harvesting Data from Excel #' #' Loads crop harvest schedule from harvest.xlsx file. #' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.). #' #' @param data_dir Directory containing harvest.xlsx file #' @return Data frame with columns: field, sub_field, year, season_start, season_end, #' age (weeks), sub_area, tonnage_ha. Returns NULL if file not found. #' #' @examples #' harvest <- load_harvesting_data("laravel_app/storage/app/angata") #' head(harvest) #' load_harvesting_data <- function(data_dir) { harvest_file <- here(data_dir, "harvest.xlsx") if (!file.exists(harvest_file)) { warning(paste("Harvest data file not found at path:", harvest_file)) return(NULL) } # Helper function to parse dates with multiple format detection parse_flexible_date <- function(x) { if (is.na(x) || is.null(x)) return(NA_real_) if (inherits(x, "Date")) return(x) if (inherits(x, "POSIXct")) return(as.Date(x)) # If it's numeric (Excel date serial), convert directly if (is.numeric(x)) { return(as.Date(x, origin = "1899-12-30")) } # Try character conversion with multiple formats x_char <- as.character(x) formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S") for (fmt in formats) { result <- suppressWarnings(as.Date(x_char, format = fmt)) if (!is.na(result)) return(result) } return(NA) } tryCatch({ harvesting_data <- read_excel(harvest_file) %>% dplyr::select( c( "field", "sub_field", "year", "season_start", "season_end", "age", "sub_area", "tonnage_ha" ) ) %>% mutate( field = as.character(field), sub_field = as.character(sub_field), year = as.numeric(year), season_start = sapply(season_start, parse_flexible_date), season_end = sapply(season_end, parse_flexible_date), season_start = as.Date(season_start, origin = "1970-01-01"), season_end = as.Date(season_end, origin = "1970-01-01"), age = as.numeric(age), sub_area = as.character(sub_area), tonnage_ha = as.numeric(tonnage_ha) ) %>% mutate( season_end = case_when( season_end > Sys.Date() ~ Sys.Date(), is.na(season_end) ~ Sys.Date(), TRUE ~ season_end ), age = round(as.numeric(season_end - season_start) / 7, 0) ) return(harvesting_data) }, error = function(e) { warning(paste("Error loading harvesting data:", e$message)) return(NULL) }) } #' Generate a Sequence of Dates for Processing #' #' Creates a date range from start_date to end_date and extracts week/year info. #' Used by Scripts 20, 30, 40 to determine data processing windows. #' #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string) #' @param offset Number of days to look back from end_date (e.g., 7 for one week) #' @return A list containing: #' - week: ISO week number of start_date #' - year: ISO year of start_date #' - days_filter: Vector of dates in "YYYY-MM-DD" format #' - start_date: Start date as Date object #' - end_date: End date as Date object #' #' @details #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations, #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead. #' #' @examples #' dates <- date_list(as.Date("2025-01-15"), offset = 7) #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15") #' #' dates <- date_list("2025-12-31", offset = 14) #' # Handles string input and returns 14 days of data #' date_list <- function(end_date, offset) { # Input validation if (!lubridate::is.Date(end_date)) { end_date <- as.Date(end_date) if (is.na(end_date)) { stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.") } } offset <- as.numeric(offset) if (is.na(offset) || offset < 1) { stop("Invalid offset provided. Expected a positive number.") } # Calculate date range offset <- offset - 1 # Adjust offset to include end_date start_date <- end_date - lubridate::days(offset) # Extract ISO week and year information (from END date for reporting period) week <- lubridate::isoweek(end_date) year <- lubridate::isoyear(end_date) # Generate sequence of dates days_filter <- seq(from = start_date, to = end_date, by = "day") days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering # Log the date range safe_log(paste("Date range generated from", start_date, "to", end_date)) return(list( "week" = week, "year" = year, "days_filter" = days_filter, "start_date" = start_date, "end_date" = end_date )) } # ============================================================================== #' Repair Invalid GeoJSON Geometries #' #' Fixes common geometry issues in GeoJSON/sf objects: #' - Degenerate vertices (duplicate points) #' - Self-intersecting polygons #' - Invalid ring orientation #' - Empty or NULL geometries #' #' Uses sf::st_make_valid() with buffer trick as fallback. #' #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries #' @return sf object with repaired geometries #' #' @details #' **Why this matters:** #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting #' rings from manual editing or GIS data sources. These cause errors when using #' S2 geometry (strict validation) during cropping operations. #' #' **Repair strategy (priority order):** #' 1. Try st_make_valid() - GEOS-based repair (most reliable) #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity #' 3. Last resort: Silently keep original if repair fails #' #' @examples #' \dontrun{ #' fields <- st_read("pivot.geojson") #' fields_fixed <- repair_geojson_geometries(fields) #' cat(paste("Fixed geometries: before=", #' nrow(fields[!st_is_valid(fields), ]), #' ", after=", #' nrow(fields_fixed[!st_is_valid(fields_fixed), ]))) #' } #' repair_geojson_geometries <- function(sf_object) { if (!inherits(sf_object, "sf")) { stop("Input must be an sf (Simple Features) object") } # Count invalid geometries BEFORE repair invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE) if (invalid_before == 0) { safe_log("All geometries already valid - no repair needed", "INFO") return(sf_object) } safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING") # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS) # This handles degenerate vertices, self-intersections, invalid rings while preserving all features repaired <- tryCatch({ # st_make_valid() on entire sf object preserves all features and attributes repaired_geom <- sf::st_make_valid(sf_object) # Verify we still have the same number of rows if (nrow(repaired_geom) != nrow(sf_object)) { warning("st_make_valid() changed number of features - attempting row-wise repair") # Fallback: Repair row-by-row to maintain original structure repaired_geom <- sf_object for (i in seq_len(nrow(sf_object))) { tryCatch({ if (!sf::st_is_valid(sf_object[i, ])) { repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ]) } }, error = function(e) { safe_log(paste("Could not repair row", i, "-", e$message), "WARNING") }) } } safe_log("✓ st_make_valid() successfully repaired geometries", "INFO") repaired_geom }, error = function(e) { safe_log(paste("st_make_valid() failed:", e$message), "WARNING") NULL }) # If repair failed, keep original if (is.null(repaired)) { safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"), "WARNING") return(sf_object) } # Count invalid geometries AFTER repair invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE) safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO") return(repaired) } # ============================================================================== # END 00_COMMON_UTILS.R # ==============================================================================