357 lines
14 KiB
R
357 lines
14 KiB
R
# ==============================================================================
|
|
# # 00_COMMON_UTILS.R
|
|
# ==============================================================================
|
|
# # GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
|
|
# #
|
|
# # PURPOSE:
|
|
# # Centralized location for foundational utilities used across multiple scripts.
|
|
# # These functions have NO project knowledge, NO client-type dependencies,
|
|
# # NO domain-specific logic.
|
|
# #
|
|
# # USAGE:
|
|
# # All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
|
|
# #
|
|
# # source(here::here("r_app", "parameters_project.R")) # Config first
|
|
# # source(here::here("r_app", "00_common_utils.R")) # Then common utilities
|
|
# #
|
|
# # FUNCTIONS:
|
|
# # 1. safe_log() — Generic logging with [LEVEL] prefix
|
|
# # 2. smartcane_debug() — Conditional debug logging
|
|
# # 3. smartcane_warn() — Convenience wrapper for WARN-level messages
|
|
# # 4. date_list() — Generate date sequences for processing windows
|
|
# # 5. load_field_boundaries() — Load field geometries from GeoJSON
|
|
# # 6. repair_geojson_geometries() — Fix invalid geometries in GeoJSON objects
|
|
# #
|
|
# # DATE FUNCTIONS (now in parameters_project.R):
|
|
# # - get_iso_week() — Extract ISO week number from date
|
|
# # - get_iso_year() — Extract ISO year from date
|
|
# # - get_iso_week_year() — Extract both ISO week and year as list
|
|
# # - format_week_label() — Format date as week/year label
|
|
# # - load_harvesting_data() — Load harvest schedule from Excel
|
|
# #
|
|
# ==============================================================================
|
|
|
|
# # Source parameters first to get shared date utility functions
|
|
# source("parameters_project.R")
|
|
|
|
# #' Safe Logging Function
|
|
# #'
|
|
# #' Generic logging with [LEVEL] prefix. Works standalone without any framework.
|
|
# #' Consistent with SmartCane logging standard.
|
|
# #'
|
|
# #' @param message The message to log
|
|
# #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
|
|
# #' @return NULL (invisible, used for side effects)
|
|
# #'
|
|
# #' @examples
|
|
# #' safe_log("Processing started", "INFO")
|
|
# #' safe_log("Check input file", "WARNING")
|
|
# #' safe_log("Failed to load data", "ERROR")
|
|
# #'
|
|
# safe_log <- function(message, level = "INFO") {
|
|
# prefix <- sprintf("[%s]", level)
|
|
# cat(sprintf("%s %s\n", prefix, message))
|
|
# }
|
|
|
|
# #' SmartCane Debug Logging (Conditional)
|
|
# #'
|
|
# #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
|
|
# #' Useful for development/troubleshooting without cluttering normal output.
|
|
# #'
|
|
# #' @param message The message to log
|
|
# #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
|
|
# #' @return NULL (invisible, used for side effects)
|
|
# #'
|
|
# #' @examples
|
|
# #' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
|
|
# #' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
|
|
# #'
|
|
# smartcane_debug <- function(message, verbose = FALSE) {
|
|
# if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
|
|
# return(invisible(NULL))
|
|
# }
|
|
# safe_log(message, level = "DEBUG")
|
|
# }
|
|
|
|
# #' SmartCane Warning Logging
|
|
# #'
|
|
# #' Logs WARN-level messages. Convenience wrapper around safe_log().
|
|
# #'
|
|
# #' @param message The message to log
|
|
# #' @return NULL (invisible, used for side effects)
|
|
# #'
|
|
# #' @examples
|
|
# #' smartcane_warn("Check data format before proceeding")
|
|
# #'
|
|
# smartcane_warn <- function(message) {
|
|
# safe_log(message, level = "WARN")
|
|
# }
|
|
|
|
# #' Load Field Boundaries from GeoJSON
|
|
# #'
|
|
# #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
|
|
# #' Handles CRS validation and column standardization.
|
|
# #'
|
|
# #' @param data_dir Directory containing GeoJSON file
|
|
# #' @return List with elements:
|
|
# #' - field_boundaries_sf: sf (Simple Features) object
|
|
# #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
|
|
# #'
|
|
# #' @details
|
|
# #' Automatically selects pivot_2.geojson for ESA project during CI extraction,
|
|
# #' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
|
|
# #'
|
|
# #' @examples
|
|
# #' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
|
|
# #' head(boundaries$field_boundaries_sf)
|
|
# #'
|
|
# load_field_boundaries <- function(data_dir) {
|
|
# # Choose field boundaries file based on project and script type
|
|
# # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
|
|
# # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
|
|
# use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
|
|
# exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
|
|
|
|
# if (use_pivot_2) {
|
|
# field_boundaries_path <- file.path(data_dir, "pivot_2.geojson")
|
|
# } else {
|
|
# field_boundaries_path <- file.path(data_dir, "pivot.geojson")
|
|
# }
|
|
|
|
# if (!file.exists(field_boundaries_path)) {
|
|
# stop(paste("Field boundaries file not found at path:", field_boundaries_path))
|
|
# }
|
|
|
|
# tryCatch({
|
|
# # Read GeoJSON with explicit CRS handling
|
|
# field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
|
|
|
|
# # Remove OBJECTID column immediately if it exists
|
|
# if ("OBJECTID" %in% names(field_boundaries_sf)) {
|
|
# field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
|
|
# }
|
|
|
|
# # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
|
|
# # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
|
|
# # to prevent S2 geometry validation errors during downstream processing
|
|
# field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
|
|
|
|
# # Validate and fix CRS if needed
|
|
# tryCatch({
|
|
# # Simply assign WGS84 if not already set (safe approach)
|
|
# if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
|
|
# st_crs(field_boundaries_sf) <- 4326
|
|
# warning("CRS was missing, assigned WGS84 (EPSG:4326)")
|
|
# }
|
|
# }, error = function(e) {
|
|
# tryCatch({
|
|
# st_crs(field_boundaries_sf) <<- 4326
|
|
# }, error = function(e2) {
|
|
# warning(paste("Could not set CRS:", e2$message))
|
|
# })
|
|
# })
|
|
|
|
# # Handle column names - accommodate optional sub_area column
|
|
# if ("sub_area" %in% names(field_boundaries_sf)) {
|
|
# field_boundaries_sf <- field_boundaries_sf %>%
|
|
# dplyr::select(field, sub_field, sub_area) %>%
|
|
# sf::st_set_geometry("geometry")
|
|
# } else {
|
|
# field_boundaries_sf <- field_boundaries_sf %>%
|
|
# dplyr::select(field, sub_field) %>%
|
|
# sf::st_set_geometry("geometry")
|
|
# }
|
|
|
|
# # Convert to terra vector if possible, otherwise use sf
|
|
# field_boundaries <- tryCatch({
|
|
# field_boundaries_terra <- terra::vect(field_boundaries_sf)
|
|
# crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
|
|
# crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
|
|
|
|
# if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
|
|
# terra::crs(field_boundaries_terra) <- "EPSG:4326"
|
|
# warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
|
|
# }
|
|
# field_boundaries_terra
|
|
|
|
# }, error = function(e) {
|
|
# warning(paste("Terra conversion failed, using sf object instead:", e$message))
|
|
# field_boundaries_sf
|
|
# })
|
|
|
|
# return(list(
|
|
# field_boundaries_sf = field_boundaries_sf,
|
|
# field_boundaries = field_boundaries
|
|
# ))
|
|
# }, error = function(e) {
|
|
# cat("[DEBUG] Error in load_field_boundaries:\n")
|
|
# cat(" Message:", e$message, "\n")
|
|
# cat(" Call:", deparse(e$call), "\n")
|
|
# stop(paste("Error loading field boundaries:", e$message))
|
|
# })
|
|
# }
|
|
|
|
|
|
|
|
# #' Generate a Sequence of Dates for Processing
|
|
# #'
|
|
# #' Creates a date range from start_date to end_date and extracts week/year info.
|
|
# #' Used by Scripts 20, 30, 40 to determine data processing windows.
|
|
# #'
|
|
# #' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
|
|
# #' @param offset Number of days to look back from end_date (e.g., 7 for one week)
|
|
# #' @return A list containing:
|
|
# #' - week: ISO week number of start_date
|
|
# #' - year: ISO year of start_date
|
|
# #' - days_filter: Vector of dates in "YYYY-MM-DD" format
|
|
# #' - start_date: Start date as Date object
|
|
# #' - end_date: End date as Date object
|
|
# #'
|
|
# #' @details
|
|
# #' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
|
|
# #' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
|
|
# #' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
|
|
# #'
|
|
# #' @examples
|
|
# #' dates <- date_list(as.Date("2025-01-15"), offset = 7)
|
|
# #' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
|
|
# #'
|
|
# #' dates <- date_list("2025-12-31", offset = 14)
|
|
# #' # Handles string input and returns 14 days of data
|
|
# #'
|
|
# date_list <- function(end_date, offset) {
|
|
# # Input validation
|
|
# if (!lubridate::is.Date(end_date)) {
|
|
# end_date <- as.Date(end_date)
|
|
# if (is.na(end_date)) {
|
|
# stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
|
# }
|
|
# }
|
|
|
|
# offset <- as.numeric(offset)
|
|
# if (is.na(offset) || offset < 1) {
|
|
# stop("Invalid offset provided. Expected a positive number.")
|
|
# }
|
|
|
|
# # Calculate date range
|
|
# offset <- offset - 1 # Adjust offset to include end_date
|
|
# start_date <- end_date - lubridate::days(offset)
|
|
|
|
# # Extract ISO week and year information (from END date for reporting period)
|
|
# week <- lubridate::isoweek(end_date)
|
|
# year <- lubridate::isoyear(end_date)
|
|
|
|
# # Generate sequence of dates
|
|
# days_filter <- seq(from = start_date, to = end_date, by = "day")
|
|
# days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
|
|
|
# # Log the date range
|
|
# safe_log(paste("Date range generated from", start_date, "to", end_date))
|
|
|
|
# return(list(
|
|
# "week" = week,
|
|
# "year" = year,
|
|
# "days_filter" = days_filter,
|
|
# "start_date" = start_date,
|
|
# "end_date" = end_date
|
|
# ))
|
|
# }
|
|
|
|
# ==============================================================================
|
|
# #' Repair Invalid GeoJSON Geometries
|
|
# #'
|
|
# #' Fixes common geometry issues in GeoJSON/sf objects:
|
|
# #' - Degenerate vertices (duplicate points)
|
|
# #' - Self-intersecting polygons
|
|
# #' - Invalid ring orientation
|
|
# #' - Empty or NULL geometries
|
|
# #'
|
|
# #' Uses sf::st_make_valid() with buffer trick as fallback.
|
|
# #'
|
|
# #' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
|
|
# #' @return sf object with repaired geometries
|
|
# #'
|
|
# #' @details
|
|
# #' **Why this matters:**
|
|
# #' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
|
|
# #' rings from manual editing or GIS data sources. These cause errors when using
|
|
# #' S2 geometry (strict validation) during cropping operations.
|
|
# #'
|
|
# #' **Repair strategy (priority order):**
|
|
# #' 1. Try st_make_valid() - GEOS-based repair (most reliable)
|
|
# #' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
|
|
# #' 3. Last resort: Silently keep original if repair fails
|
|
# #'
|
|
# #' @examples
|
|
# #' \dontrun{
|
|
# #' fields <- st_read("pivot.geojson")
|
|
# #' fields_fixed <- repair_geojson_geometries(fields)
|
|
# #' cat(paste("Fixed geometries: before=",
|
|
# #' nrow(fields[!st_is_valid(fields), ]),
|
|
# #' ", after=",
|
|
# #' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
|
|
# #' }
|
|
# #'
|
|
# repair_geojson_geometries <- function(sf_object) {
|
|
# if (!inherits(sf_object, "sf")) {
|
|
# stop("Input must be an sf (Simple Features) object")
|
|
# }
|
|
|
|
# # Count invalid geometries BEFORE repair
|
|
# invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
|
|
|
|
# if (invalid_before == 0) {
|
|
# safe_log("All geometries already valid - no repair needed", "INFO")
|
|
# return(sf_object)
|
|
# }
|
|
|
|
# safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
|
|
|
|
# # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
|
|
# # This handles degenerate vertices, self-intersections, invalid rings while preserving all features
|
|
# repaired <- tryCatch({
|
|
# # st_make_valid() on entire sf object preserves all features and attributes
|
|
# repaired_geom <- sf::st_make_valid(sf_object)
|
|
|
|
# # Verify we still have the same number of rows
|
|
# if (nrow(repaired_geom) != nrow(sf_object)) {
|
|
# warning("st_make_valid() changed number of features - attempting row-wise repair")
|
|
|
|
# # Fallback: Repair row-by-row to maintain original structure
|
|
# repaired_geom <- sf_object
|
|
# for (i in seq_len(nrow(sf_object))) {
|
|
# tryCatch({
|
|
# if (!sf::st_is_valid(sf_object[i, ])) {
|
|
# repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
|
|
# }
|
|
# }, error = function(e) {
|
|
# safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
|
|
# })
|
|
# }
|
|
# }
|
|
|
|
# safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
|
|
# repaired_geom
|
|
# }, error = function(e) {
|
|
# safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
|
|
# NULL
|
|
# })
|
|
|
|
# # If repair failed, keep original
|
|
# if (is.null(repaired)) {
|
|
# safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
|
|
# "WARNING")
|
|
# return(sf_object)
|
|
# }
|
|
|
|
# # Count invalid geometries AFTER repair
|
|
# invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
|
|
# safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
|
|
|
|
# return(repaired)
|
|
# }
|
|
|
|
# ==============================================================================
|
|
# # END 00_COMMON_UTILS.R
|
|
# ==============================================================================
|