501 lines
17 KiB
R
501 lines
17 KiB
R
# ==============================================================================
|
|
# 00_COMMON_UTILS.R
|
|
# ==============================================================================
|
|
# GENERIC UTILITY FUNCTIONS FOR SMARTCANE PIPELINE
|
|
#
|
|
# PURPOSE:
|
|
# Centralized location for foundational utilities used across multiple scripts.
|
|
# These functions have NO project knowledge, NO client-type dependencies,
|
|
# NO domain-specific logic.
|
|
#
|
|
# USAGE:
|
|
# All scripts (10, 20, 21, 30, 40, 80, 90, 91) should source this file:
|
|
#
|
|
# source(here::here("r_app", "parameters_project.R")) # Config first
|
|
# source(here::here("r_app", "00_common_utils.R")) # Then common utilities
|
|
#
|
|
# FUNCTIONS:
|
|
# 1. safe_log() — Generic logging with [LEVEL] prefix
|
|
# 2. smartcane_debug() — Conditional debug logging
|
|
# 3. smartcane_warn() — Convenience wrapper for WARN-level messages
|
|
# 4. date_list() — Generate date sequences for processing windows
|
|
# 5. get_iso_week() — Extract ISO week number from date
|
|
# 6. get_iso_year() — Extract ISO year from date
|
|
# 7. get_iso_week_year() — Extract both ISO week and year as list
|
|
# 8. format_week_label() — Format date as week/year label (e.g., "week01_2025")
|
|
# 9. load_field_boundaries() — Load field geometries from GeoJSON
|
|
# 10. load_harvesting_data() — Load harvest schedule from Excel
|
|
#
|
|
# ==============================================================================
|
|
|
|
#' Safe Logging Function
|
|
#'
|
|
#' Generic logging with [LEVEL] prefix. Works standalone without any framework.
|
|
#' Consistent with SmartCane logging standard.
|
|
#'
|
|
#' @param message The message to log
|
|
#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
|
|
#' @return NULL (invisible, used for side effects)
|
|
#'
|
|
#' @examples
|
|
#' safe_log("Processing started", "INFO")
|
|
#' safe_log("Check input file", "WARNING")
|
|
#' safe_log("Failed to load data", "ERROR")
|
|
#'
|
|
safe_log <- function(message, level = "INFO") {
|
|
prefix <- sprintf("[%s]", level)
|
|
cat(sprintf("%s %s\n", prefix, message))
|
|
}
|
|
|
|
#' SmartCane Debug Logging (Conditional)
|
|
#'
|
|
#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
|
|
#' Useful for development/troubleshooting without cluttering normal output.
|
|
#'
|
|
#' @param message The message to log
|
|
#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
|
|
#' @return NULL (invisible, used for side effects)
|
|
#'
|
|
#' @examples
|
|
#' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
|
|
#' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
|
|
#'
|
|
smartcane_debug <- function(message, verbose = FALSE) {
|
|
if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
|
|
return(invisible(NULL))
|
|
}
|
|
safe_log(message, level = "DEBUG")
|
|
}
|
|
|
|
#' SmartCane Warning Logging
|
|
#'
|
|
#' Logs WARN-level messages. Convenience wrapper around safe_log().
|
|
#'
|
|
#' @param message The message to log
|
|
#' @return NULL (invisible, used for side effects)
|
|
#'
|
|
#' @examples
|
|
#' smartcane_warn("Check data format before proceeding")
|
|
#'
|
|
smartcane_warn <- function(message) {
|
|
safe_log(message, level = "WARN")
|
|
}
|
|
|
|
#' Extract ISO Week Number from Date
|
|
#'
|
|
#' Extracts ISO week number (1-53) from a date using %V format.
|
|
#' ISO weeks follow the international standard: Week 1 starts on Monday.
|
|
#'
|
|
#' @param date A Date object or string convertible to Date
|
|
#' @return Numeric: ISO week number (1-53)
|
|
#'
|
|
#' @examples
|
|
#' get_iso_week(as.Date("2025-01-15")) # Returns: 3
|
|
#'
|
|
get_iso_week <- function(date) {
|
|
as.numeric(format(date, "%V"))
|
|
}
|
|
|
|
#' Extract ISO Year from Date
|
|
#'
|
|
#' Extracts ISO year from a date using %G format.
|
|
#' ISO year can differ from calendar year around year boundaries.
|
|
#'
|
|
#' @param date A Date object or string convertible to Date
|
|
#' @return Numeric: ISO year
|
|
#'
|
|
#' @examples
|
|
#' get_iso_year(as.Date("2025-01-01")) # Returns: 2025
|
|
#'
|
|
get_iso_year <- function(date) {
|
|
as.numeric(format(date, "%G"))
|
|
}
|
|
|
|
#' Extract ISO Week and Year as List
|
|
#'
|
|
#' Combines get_iso_week() and get_iso_year() for convenience.
|
|
#'
|
|
#' @param date A Date object or string convertible to Date
|
|
#' @return List with elements: week (1-53), year
|
|
#'
|
|
#' @examples
|
|
#' wwy <- get_iso_week_year(as.Date("2025-01-15"))
|
|
#' # Returns: list(week = 3, year = 2025)
|
|
#'
|
|
get_iso_week_year <- function(date) {
|
|
list(
|
|
week = as.numeric(format(date, "%V")),
|
|
year = as.numeric(format(date, "%G"))
|
|
)
|
|
}
|
|
|
|
#' Format Date as Week/Year Label
|
|
#'
|
|
#' Converts a date into a readable week label format.
|
|
#' Useful for filenames, directory names, and output identification.
|
|
#'
|
|
#' @param date A Date object or string convertible to Date
|
|
#' @param separator Separator between week number and year (default: "_")
|
|
#' @return String in format "week##_YYYY" (e.g., "week03_2025")
|
|
#'
|
|
#' @examples
|
|
#' format_week_label(as.Date("2025-01-15")) # "week03_2025"
|
|
#' format_week_label(as.Date("2025-01-15"), "-") # "week03-2025"
|
|
#'
|
|
format_week_label <- function(date, separator = "_") {
|
|
wwy <- get_iso_week_year(date)
|
|
sprintf("week%02d%s%d", wwy$week, separator, wwy$year)
|
|
}
|
|
|
|
#' Load Field Boundaries from GeoJSON
|
|
#'
|
|
#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
|
|
#' Handles CRS validation and column standardization.
|
|
#'
|
|
#' @param data_dir Directory containing GeoJSON file
|
|
#' @return List with elements:
|
|
#' - field_boundaries_sf: sf (Simple Features) object
|
|
#' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
|
|
#'
|
|
#' @details
|
|
#' Automatically selects pivot_2.geojson for ESA project during CI extraction,
|
|
#' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
|
|
#'
|
|
#' @examples
|
|
#' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
|
|
#' head(boundaries$field_boundaries_sf)
|
|
#'
|
|
load_field_boundaries <- function(data_dir) {
|
|
# Choose field boundaries file based on project and script type
|
|
# ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
|
|
# All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
|
|
use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
|
|
exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
|
|
|
|
if (use_pivot_2) {
|
|
field_boundaries_path <- here(data_dir, "pivot_2.geojson")
|
|
} else {
|
|
field_boundaries_path <- here(data_dir, "Data", "pivot.geojson")
|
|
}
|
|
|
|
if (!file.exists(field_boundaries_path)) {
|
|
stop(paste("Field boundaries file not found at path:", field_boundaries_path))
|
|
}
|
|
|
|
tryCatch({
|
|
# Read GeoJSON with explicit CRS handling
|
|
field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
|
|
|
|
# Remove OBJECTID column immediately if it exists
|
|
if ("OBJECTID" %in% names(field_boundaries_sf)) {
|
|
field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
|
|
}
|
|
|
|
# **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
|
|
# This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
|
|
# to prevent S2 geometry validation errors during downstream processing
|
|
field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
|
|
|
|
# Validate and fix CRS if needed
|
|
tryCatch({
|
|
# Simply assign WGS84 if not already set (safe approach)
|
|
if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
|
|
st_crs(field_boundaries_sf) <- 4326
|
|
warning("CRS was missing, assigned WGS84 (EPSG:4326)")
|
|
}
|
|
}, error = function(e) {
|
|
tryCatch({
|
|
st_crs(field_boundaries_sf) <<- 4326
|
|
}, error = function(e2) {
|
|
warning(paste("Could not set CRS:", e2$message))
|
|
})
|
|
})
|
|
|
|
# Handle column names - accommodate optional sub_area column
|
|
if ("sub_area" %in% names(field_boundaries_sf)) {
|
|
field_boundaries_sf <- field_boundaries_sf %>%
|
|
dplyr::select(field, sub_field, sub_area) %>%
|
|
sf::st_set_geometry("geometry")
|
|
} else {
|
|
field_boundaries_sf <- field_boundaries_sf %>%
|
|
dplyr::select(field, sub_field) %>%
|
|
sf::st_set_geometry("geometry")
|
|
}
|
|
|
|
# Convert to terra vector if possible, otherwise use sf
|
|
field_boundaries <- tryCatch({
|
|
field_boundaries_terra <- terra::vect(field_boundaries_sf)
|
|
crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
|
|
crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
|
|
|
|
if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
|
|
terra::crs(field_boundaries_terra) <- "EPSG:4326"
|
|
warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
|
|
}
|
|
field_boundaries_terra
|
|
|
|
}, error = function(e) {
|
|
warning(paste("Terra conversion failed, using sf object instead:", e$message))
|
|
field_boundaries_sf
|
|
})
|
|
|
|
return(list(
|
|
field_boundaries_sf = field_boundaries_sf,
|
|
field_boundaries = field_boundaries
|
|
))
|
|
}, error = function(e) {
|
|
cat("[DEBUG] Error in load_field_boundaries:\n")
|
|
cat(" Message:", e$message, "\n")
|
|
cat(" Call:", deparse(e$call), "\n")
|
|
stop(paste("Error loading field boundaries:", e$message))
|
|
})
|
|
}
|
|
|
|
#' Load Harvesting Data from Excel
|
|
#'
|
|
#' Loads crop harvest schedule from harvest.xlsx file.
|
|
#' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.).
|
|
#'
|
|
#' @param data_dir Directory containing harvest.xlsx file
|
|
#' @return Data frame with columns: field, sub_field, year, season_start, season_end,
|
|
#' age (weeks), sub_area, tonnage_ha. Returns NULL if file not found.
|
|
#'
|
|
#' @examples
|
|
#' harvest <- load_harvesting_data("laravel_app/storage/app/angata")
|
|
#' head(harvest)
|
|
#'
|
|
load_harvesting_data <- function(data_dir) {
|
|
harvest_file <- here(data_dir, "harvest.xlsx")
|
|
|
|
if (!file.exists(harvest_file)) {
|
|
warning(paste("Harvest data file not found at path:", harvest_file))
|
|
return(NULL)
|
|
}
|
|
|
|
# Helper function to parse dates with multiple format detection
|
|
parse_flexible_date <- function(x) {
|
|
if (is.na(x) || is.null(x)) return(NA_real_)
|
|
if (inherits(x, "Date")) return(x)
|
|
if (inherits(x, "POSIXct")) return(as.Date(x))
|
|
|
|
# If it's numeric (Excel date serial), convert directly
|
|
if (is.numeric(x)) {
|
|
return(as.Date(x, origin = "1899-12-30"))
|
|
}
|
|
|
|
# Try character conversion with multiple formats
|
|
x_char <- as.character(x)
|
|
formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
|
|
|
|
for (fmt in formats) {
|
|
result <- suppressWarnings(as.Date(x_char, format = fmt))
|
|
if (!is.na(result)) return(result)
|
|
}
|
|
|
|
return(NA)
|
|
}
|
|
|
|
tryCatch({
|
|
harvesting_data <- read_excel(harvest_file) %>%
|
|
dplyr::select(
|
|
c(
|
|
"field",
|
|
"sub_field",
|
|
"year",
|
|
"season_start",
|
|
"season_end",
|
|
"age",
|
|
"sub_area",
|
|
"tonnage_ha"
|
|
)
|
|
) %>%
|
|
mutate(
|
|
field = as.character(field),
|
|
sub_field = as.character(sub_field),
|
|
year = as.numeric(year),
|
|
season_start = sapply(season_start, parse_flexible_date),
|
|
season_end = sapply(season_end, parse_flexible_date),
|
|
season_start = as.Date(season_start, origin = "1970-01-01"),
|
|
season_end = as.Date(season_end, origin = "1970-01-01"),
|
|
age = as.numeric(age),
|
|
sub_area = as.character(sub_area),
|
|
tonnage_ha = as.numeric(tonnage_ha)
|
|
) %>%
|
|
mutate(
|
|
season_end = case_when(
|
|
season_end > Sys.Date() ~ Sys.Date(),
|
|
is.na(season_end) ~ Sys.Date(),
|
|
TRUE ~ season_end
|
|
),
|
|
age = round(as.numeric(season_end - season_start) / 7, 0)
|
|
)
|
|
|
|
return(harvesting_data)
|
|
}, error = function(e) {
|
|
warning(paste("Error loading harvesting data:", e$message))
|
|
return(NULL)
|
|
})
|
|
}
|
|
|
|
#' Generate a Sequence of Dates for Processing
|
|
#'
|
|
#' Creates a date range from start_date to end_date and extracts week/year info.
|
|
#' Used by Scripts 20, 30, 40 to determine data processing windows.
|
|
#'
|
|
#' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
|
|
#' @param offset Number of days to look back from end_date (e.g., 7 for one week)
|
|
#' @return A list containing:
|
|
#' - week: ISO week number of start_date
|
|
#' - year: ISO year of start_date
|
|
#' - days_filter: Vector of dates in "YYYY-MM-DD" format
|
|
#' - start_date: Start date as Date object
|
|
#' - end_date: End date as Date object
|
|
#'
|
|
#' @details
|
|
#' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
|
|
#' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
|
|
#' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
|
|
#'
|
|
#' @examples
|
|
#' dates <- date_list(as.Date("2025-01-15"), offset = 7)
|
|
#' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
|
|
#'
|
|
#' dates <- date_list("2025-12-31", offset = 14)
|
|
#' # Handles string input and returns 14 days of data
|
|
#'
|
|
date_list <- function(end_date, offset) {
|
|
# Input validation
|
|
if (!lubridate::is.Date(end_date)) {
|
|
end_date <- as.Date(end_date)
|
|
if (is.na(end_date)) {
|
|
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
|
}
|
|
}
|
|
|
|
offset <- as.numeric(offset)
|
|
if (is.na(offset) || offset < 1) {
|
|
stop("Invalid offset provided. Expected a positive number.")
|
|
}
|
|
|
|
# Calculate date range
|
|
offset <- offset - 1 # Adjust offset to include end_date
|
|
start_date <- end_date - lubridate::days(offset)
|
|
|
|
# Extract ISO week and year information (from END date for reporting period)
|
|
week <- lubridate::isoweek(end_date)
|
|
year <- lubridate::isoyear(end_date)
|
|
|
|
# Generate sequence of dates
|
|
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
|
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
|
|
|
# Log the date range
|
|
safe_log(paste("Date range generated from", start_date, "to", end_date))
|
|
|
|
return(list(
|
|
"week" = week,
|
|
"year" = year,
|
|
"days_filter" = days_filter,
|
|
"start_date" = start_date,
|
|
"end_date" = end_date
|
|
))
|
|
}
|
|
|
|
# ==============================================================================
|
|
#' Repair Invalid GeoJSON Geometries
|
|
#'
|
|
#' Fixes common geometry issues in GeoJSON/sf objects:
|
|
#' - Degenerate vertices (duplicate points)
|
|
#' - Self-intersecting polygons
|
|
#' - Invalid ring orientation
|
|
#' - Empty or NULL geometries
|
|
#'
|
|
#' Uses sf::st_make_valid() with buffer trick as fallback.
|
|
#'
|
|
#' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
|
|
#' @return sf object with repaired geometries
|
|
#'
|
|
#' @details
|
|
#' **Why this matters:**
|
|
#' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
|
|
#' rings from manual editing or GIS data sources. These cause errors when using
|
|
#' S2 geometry (strict validation) during cropping operations.
|
|
#'
|
|
#' **Repair strategy (priority order):**
|
|
#' 1. Try st_make_valid() - GEOS-based repair (most reliable)
|
|
#' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
|
|
#' 3. Last resort: Silently keep original if repair fails
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{
|
|
#' fields <- st_read("pivot.geojson")
|
|
#' fields_fixed <- repair_geojson_geometries(fields)
|
|
#' cat(paste("Fixed geometries: before=",
|
|
#' nrow(fields[!st_is_valid(fields), ]),
|
|
#' ", after=",
|
|
#' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
|
|
#' }
|
|
#'
|
|
repair_geojson_geometries <- function(sf_object) {
|
|
if (!inherits(sf_object, "sf")) {
|
|
stop("Input must be an sf (Simple Features) object")
|
|
}
|
|
|
|
# Count invalid geometries BEFORE repair
|
|
invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
|
|
|
|
if (invalid_before == 0) {
|
|
safe_log("All geometries already valid - no repair needed", "INFO")
|
|
return(sf_object)
|
|
}
|
|
|
|
safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
|
|
|
|
# STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
|
|
# This handles degenerate vertices, self-intersections, invalid rings while preserving all features
|
|
repaired <- tryCatch({
|
|
# st_make_valid() on entire sf object preserves all features and attributes
|
|
repaired_geom <- sf::st_make_valid(sf_object)
|
|
|
|
# Verify we still have the same number of rows
|
|
if (nrow(repaired_geom) != nrow(sf_object)) {
|
|
warning("st_make_valid() changed number of features - attempting row-wise repair")
|
|
|
|
# Fallback: Repair row-by-row to maintain original structure
|
|
repaired_geom <- sf_object
|
|
for (i in seq_len(nrow(sf_object))) {
|
|
tryCatch({
|
|
if (!sf::st_is_valid(sf_object[i, ])) {
|
|
repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
|
|
}
|
|
}, error = function(e) {
|
|
safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
|
|
})
|
|
}
|
|
}
|
|
|
|
safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
|
|
repaired_geom
|
|
}, error = function(e) {
|
|
safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
|
|
NULL
|
|
})
|
|
|
|
# If repair failed, keep original
|
|
if (is.null(repaired)) {
|
|
safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
|
|
"WARNING")
|
|
return(sf_object)
|
|
}
|
|
|
|
# Count invalid geometries AFTER repair
|
|
invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
|
|
safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
|
|
|
|
return(repaired)
|
|
}
|
|
|
|
# ==============================================================================
|
|
# END 00_COMMON_UTILS.R
|
|
# ==============================================================================
|