1241 lines
47 KiB
R
1241 lines
47 KiB
R
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R
|
||
#
|
||
# PARAMETERS_PROJECT.R
|
||
# ====================
|
||
# This script defines project parameters, directory structures, and loads field boundaries.
|
||
# It establishes all the necessary paths and creates required directories for the SmartCane project.
|
||
|
||
# 1. Load required libraries
|
||
# -------------------------
|
||
suppressPackageStartupMessages({
|
||
library(here)
|
||
library(readxl)
|
||
library(sf)
|
||
library(dplyr)
|
||
library(tidyr)
|
||
library(jsonlite) # For reading tiling_config.json
|
||
})
|
||
|
||
# 2. Client type mapping (for conditional script execution)
|
||
# ---------------------------------------------------------
|
||
# Maps project names to client types for pipeline control
|
||
# Client types:
|
||
# - "cane_supply": Runs Scripts 20,21,30,31,80,91 (full pipeline with Excel output)
|
||
# - "agronomic_support": Runs Scripts 80,90 only (KPI calculation + Word report)
|
||
# - "extension_service": (Future - not yet implemented)
|
||
#
|
||
# NOTE: This will eventually migrate to Laravel environment variables/database
|
||
# For now, maintain this mapping and update as projects are added
|
||
CLIENT_TYPE_MAP <- list(
|
||
"angata" = "cane_supply",
|
||
"aura" = "agronomic_support",
|
||
"chemba" = "cane_supply",
|
||
"xinavane" = "cane_supply",
|
||
"esa" = "cane_supply"
|
||
)
|
||
|
||
get_client_type <- function(project_name) {
|
||
client_type <- CLIENT_TYPE_MAP[[project_name]]
|
||
if (is.null(client_type)) {
|
||
warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name))
|
||
return("cane_supply")
|
||
}
|
||
return(client_type)
|
||
}
|
||
|
||
# 2b. Client-specific KPI configurations
|
||
# ----------------------------------------
|
||
# Defines which KPIs and outputs are required for each client type
|
||
# This enables Script 80 to conditionally calculate only relevant metrics
|
||
#
|
||
# Structure:
|
||
# - kpi_calculations: Vector of KPI types to calculate for this client
|
||
# - outputs: Vector of output formats to generate (determines RDS/Excel naming)
|
||
# - requires_harvest_data: Boolean - whether Script 31 harvest predictions are needed
|
||
# - script_90_compatible: Boolean - whether output should match Script 90 expectations
|
||
# - script_91_compatible: Boolean - whether output should match Script 91 expectations
|
||
#
|
||
CLIENT_TYPE_CONFIGS <- list(
|
||
|
||
# Aura (agronomic_support): Farm-level KPI summaries for weekly reports to agronomists
|
||
"agronomic_support" = list(
|
||
client_type = "agronomic_support",
|
||
description = "Farm-level KPI summaries for agronomic decision support",
|
||
kpi_calculations = c(
|
||
"field_uniformity",
|
||
"area_change",
|
||
"tch_forecasted",
|
||
"growth_decline",
|
||
"weed_presence",
|
||
"gap_filling"
|
||
),
|
||
outputs = c(
|
||
"kpi_summary_tables", # Summary statistics for Script 90 report front page
|
||
"field_details" # Detailed field table for Script 90 report end section
|
||
),
|
||
requires_harvest_data = FALSE, # Script 31 predictions not used
|
||
script_90_compatible = TRUE, # Output format matches Script 90 expectations
|
||
script_91_compatible = FALSE
|
||
),
|
||
|
||
# Cane Supply (cane_supply): Per-field analysis with harvest timing prediction
|
||
"cane_supply" = list(
|
||
client_type = "cane_supply",
|
||
description = "Per-field analysis with harvest prediction and phase assignment",
|
||
kpi_calculations = c(
|
||
"per_field_analysis", # Use 80_weekly_stats_utils.R for field-level statistics
|
||
"phase_assignment", # Assign growth phases (Germination, Tillering, Grand Growth, Maturation)
|
||
"harvest_prediction", # Include Script 31 harvest age predictions if available
|
||
"status_triggers" # Calculate field status (Normal, Monitor, Alert, Urgent)
|
||
),
|
||
outputs = c(
|
||
"field_analysis_excel", # Excel file with per-field metrics
|
||
"field_analysis_summary" # Summary RDS for Script 91 report
|
||
),
|
||
requires_harvest_data = TRUE, # harvest.xlsx is required for phase assignment
|
||
script_90_compatible = FALSE,
|
||
script_91_compatible = TRUE
|
||
)
|
||
)
|
||
|
||
#' Get KPI configuration for a specific client type
|
||
#' @param client_type Character string of client type (e.g., "agronomic_support", "cane_supply")
|
||
#' @return List containing configuration for that client type
|
||
get_client_kpi_config <- function(client_type) {
|
||
config <- CLIENT_TYPE_CONFIGS[[client_type]]
|
||
|
||
if (is.null(config)) {
|
||
warning(sprintf("Client type '%s' not in CLIENT_TYPE_CONFIGS - defaulting to 'cane_supply'", client_type))
|
||
return(CLIENT_TYPE_CONFIGS[["cane_supply"]])
|
||
}
|
||
|
||
return(config)
|
||
}
|
||
|
||
# 3. Smart detection for tile-based vs single-file mosaic approach
|
||
# ----------------------------------------------------------------
|
||
detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
|
||
# PRIORITY 1: Check for tiling_config.json metadata file from script 10
|
||
# This is the most reliable source since script 10 explicitly records its decision
|
||
|
||
if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
|
||
# Try to find tiling_config.json in any grid-size subfolder
|
||
config_files <- list.files(daily_tiles_split_dir,
|
||
pattern = "tiling_config\\.json$",
|
||
recursive = TRUE,
|
||
full.names = TRUE)
|
||
|
||
if (length(config_files) > 0) {
|
||
# Found a config file - use the most recent one
|
||
config_file <- config_files[which.max(file.info(config_files)$mtime)]
|
||
|
||
tryCatch({
|
||
config_json <- jsonlite::read_json(config_file)
|
||
return(list(
|
||
has_tiles = config_json$has_tiles %||% TRUE,
|
||
detected_tiles = character(),
|
||
total_files = 0,
|
||
source = "tiling_config.json",
|
||
grid_size = config_json$grid_size %||% "unknown"
|
||
))
|
||
}, error = function(e) {
|
||
warning("Error reading tiling_config.json: ", e$message)
|
||
# Fall through to file-based detection
|
||
})
|
||
}
|
||
}
|
||
|
||
# PRIORITY 2: File-based detection (fallback if metadata not found)
|
||
# Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories
|
||
|
||
if (!dir.exists(merged_final_tif_dir)) {
|
||
return(list(
|
||
has_tiles = FALSE,
|
||
detected_tiles = character(),
|
||
total_files = 0,
|
||
source = "directory_not_found"
|
||
))
|
||
}
|
||
|
||
# First check if there are grid-size subdirectories (5x5, 10x10, etc.)
|
||
# This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif
|
||
grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE)
|
||
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
|
||
|
||
if (length(grid_patterns) > 0) {
|
||
# Found grid-size subdirectories - tiles exist!
|
||
grid_size <- grid_patterns[1]
|
||
grid_dir <- file.path(merged_final_tif_dir, grid_size)
|
||
|
||
# List sample tile files from the grid directory
|
||
sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3]
|
||
|
||
return(list(
|
||
has_tiles = TRUE,
|
||
detected_tiles = sample_tiles,
|
||
total_files = length(sample_tiles),
|
||
source = "grid_subdirectory_detection",
|
||
grid_size = grid_size,
|
||
grid_path = grid_dir
|
||
))
|
||
}
|
||
|
||
# Fall back to checking for tile-named files directly in merged_final_tif
|
||
# List all .tif files in merged_final_tif
|
||
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
|
||
|
||
if (length(tif_files) == 0) {
|
||
return(list(
|
||
has_tiles = FALSE,
|
||
detected_tiles = character(),
|
||
total_files = 0,
|
||
source = "no_files_found"
|
||
))
|
||
}
|
||
|
||
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
|
||
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
|
||
tile_pattern <- "_(\\d{2})\\.tif$"
|
||
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
|
||
|
||
has_tiles <- length(tile_files) > 0
|
||
|
||
return(list(
|
||
has_tiles = has_tiles,
|
||
detected_tiles = tile_files,
|
||
total_files = length(tif_files),
|
||
source = "file_pattern_detection"
|
||
))
|
||
}
|
||
|
||
# 4. Define project directory structure
|
||
# -----------------------------------
|
||
# ==============================================================================
|
||
# CENTRALIZED PATH MANAGEMENT - setup_project_directories()
|
||
# ==============================================================================
|
||
# This function is the single source of truth for ALL file paths used across the pipeline.
|
||
# All scripts should call this function once at startup and use returned paths.
|
||
# This eliminates ~88 hardcoded file.path() calls scattered across 8 scripts.
|
||
#
|
||
# USAGE:
|
||
# paths <- setup_project_directories(project_dir)
|
||
# merged_tif_dir <- paths$merged_tif_folder
|
||
# daily_ci_dir <- paths$daily_ci_vals_dir
|
||
# kpi_output_dir <- paths$kpi_reports_dir
|
||
#
|
||
# TIERS (8-layer directory structure):
|
||
# Tier 1: Raw data (merged_tif)
|
||
# Tier 2: Per-field TIFFs (field_tiles, field_tiles_CI)
|
||
# Tier 3: CI Extraction (daily_ci_vals, cumulative_ci_vals)
|
||
# Tier 4: Growth Model (growth_model_interpolated)
|
||
# Tier 5: Mosaics (weekly_mosaic, weekly_tile_max)
|
||
# Tier 6: KPI & Reporting (kpi_reports_dir, kpi_field_stats_dir)
|
||
# Tier 7: Support (data, vrt, harvest, logs)
|
||
# Tier 8: Config & Metadata (field_boundaries_path, tiling_config_path)
|
||
#
|
||
# BENEFITS:
|
||
# ✓ Single source of truth (eliminates ~88 hardcoded file.path() calls)
|
||
# ✓ Auto-creates all directories (no scattered dir.create() calls)
|
||
# ✓ Easy to update storage structure globally
|
||
# ✓ Consistent naming across all 8 scripts
|
||
# ==============================================================================
|
||
setup_project_directories <- function(project_dir, data_source = "merged_tif") {
|
||
# ===========================================================================
|
||
# BASE DIRECTORIES (Foundation for all paths)
|
||
# ===========================================================================
|
||
laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir)
|
||
|
||
# ===========================================================================
|
||
# TIER 1: RAW DATA & INPUT PATHS (Script 00 - Python download output)
|
||
# ===========================================================================
|
||
merged_tif_folder <- here(laravel_storage_dir, "merged_tif") # 4-band raw GeoTIFFs from Planet
|
||
|
||
# ===========================================================================
|
||
# TIER 2: TILING PATHS (Script 10 - Per-field tiff creation)
|
||
# ===========================================================================
|
||
# Per-field TIFF structure: field_tiles/{FIELD_NAME}/{YYYY-MM-DD}.tif
|
||
field_tiles_dir <- here(laravel_storage_dir, "field_tiles")
|
||
|
||
# Per-field CI TIFFs (pre-computed, used by Script 40): field_tiles_CI/{FIELD_NAME}/{YYYY-MM-DD}.tif
|
||
field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI")
|
||
|
||
# Legacy tiling (for backward compatibility): daily_tiles_split/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif
|
||
daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")
|
||
|
||
# ===========================================================================
|
||
# TIER 3: CI EXTRACTION PATHS (Script 20 - Canopy Index calculation)
|
||
# ===========================================================================
|
||
extracted_ci_base_dir <- here(laravel_storage_dir, "Data", "extracted_ci")
|
||
|
||
# Daily CI values (cumulative RDS): combined_CI_data.rds
|
||
daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals")
|
||
|
||
# Cumulative CI across time: All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||
cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals")
|
||
|
||
# Per-field CI data for Python harvest prediction (Script 21): ci_data_for_python.csv
|
||
ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python")
|
||
|
||
# ===========================================================================
|
||
# TIER 4: GROWTH MODEL PATHS (Script 30 - Interpolation & smoothing)
|
||
# ===========================================================================
|
||
growth_model_interpolated_dir <- here(laravel_storage_dir, "growth_model_interpolated")
|
||
|
||
# ===========================================================================
|
||
# TIER 5: MOSAIC PATHS (Script 40 - Weekly mosaics)
|
||
# ===========================================================================
|
||
# Per-field weekly mosaics (per-field architecture): weekly_mosaic/{FIELD}/{week_XX_YYYY}.tif
|
||
weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
|
||
|
||
# Tile-based weekly max (legacy): weekly_tile_max/{grid_size}/week_XX_YYYY.tif
|
||
weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max")
|
||
|
||
# ===========================================================================
|
||
# TIER 6: KPI & REPORTING PATHS (Scripts 80, 90, 91)
|
||
# ===========================================================================
|
||
reports_dir <- here(laravel_storage_dir, "reports")
|
||
kpi_reports_dir <- here(reports_dir, "kpis") # Where Script 80 outputs KPI CSV/RDS files
|
||
kpi_field_stats_dir <- here(kpi_reports_dir, "field_stats") # Per-field KPI details
|
||
kpi_field_analysis_dir <- here(kpi_reports_dir, "field_analysis") # Field-level analysis for Script 91
|
||
|
||
# ===========================================================================
|
||
# TIER 7: SUPPORT PATHS (Data, VRT, Harvest)
|
||
# ===========================================================================
|
||
data_dir <- here(laravel_storage_dir, "Data")
|
||
vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction
|
||
harvest_dir <- here(data_dir, "HarvestData") # Harvest schedule data
|
||
log_dir <- here(laravel_storage_dir, "logs") # Log files
|
||
|
||
# ===========================================================================
|
||
# TIER 8: CONFIG & METADATA PATHS
|
||
# ===========================================================================
|
||
# Field boundaries GeoJSON (same across all scripts)
|
||
field_boundaries_path <- here(data_dir, "pivot.geojson")
|
||
|
||
# Tiling configuration metadata from Script 10
|
||
tiling_config_path <- here(daily_tiles_split_dir, "tiling_config.json")
|
||
|
||
# ===========================================================================
|
||
# CREATE ALL DIRECTORIES (once per pipeline run)
|
||
# ===========================================================================
|
||
all_dirs <- c(
|
||
# Tier 1
|
||
merged_tif_folder,
|
||
# Tier 2
|
||
field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir,
|
||
# Tier 3
|
||
extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
|
||
# Tier 4
|
||
growth_model_interpolated_dir,
|
||
# Tier 5
|
||
weekly_mosaic_dir, weekly_tile_max_dir,
|
||
# Tier 6
|
||
reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
|
||
# Tier 7
|
||
data_dir, vrt_dir, harvest_dir, log_dir
|
||
)
|
||
|
||
for (dir_path in all_dirs) {
|
||
dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
|
||
}
|
||
|
||
# ===========================================================================
|
||
# RETURN COMPREHENSIVE PATH LIST
|
||
# Scripts should source parameters_project.R and receive paths object like:
|
||
# paths <- setup_project_directories(project_dir)
|
||
# Then use: paths$merged_tif_folder, paths$daily_ci_vals_dir, etc.
|
||
# ===========================================================================
|
||
return(list(
|
||
# PROJECT ROOT
|
||
laravel_storage_dir = laravel_storage_dir,
|
||
|
||
# TIER 1: Raw data
|
||
merged_tif_folder = merged_tif_folder,
|
||
|
||
# TIER 2: Per-field TIFFs
|
||
field_tiles_dir = field_tiles_dir,
|
||
field_tiles_ci_dir = field_tiles_ci_dir,
|
||
daily_tiles_split_dir = daily_tiles_split_dir,
|
||
|
||
# TIER 3: CI Extraction
|
||
extracted_ci_base_dir = extracted_ci_base_dir,
|
||
daily_ci_vals_dir = daily_ci_vals_dir,
|
||
cumulative_ci_vals_dir = cumulative_ci_vals_dir,
|
||
ci_for_python_dir = ci_for_python_dir,
|
||
|
||
# TIER 4: Growth Model
|
||
growth_model_interpolated_dir = growth_model_interpolated_dir,
|
||
|
||
# TIER 5: Mosaics
|
||
weekly_mosaic_dir = weekly_mosaic_dir,
|
||
weekly_tile_max_dir = weekly_tile_max_dir,
|
||
|
||
# TIER 6: KPI & Reporting
|
||
reports_dir = reports_dir,
|
||
kpi_reports_dir = kpi_reports_dir,
|
||
kpi_field_stats_dir = kpi_field_stats_dir,
|
||
kpi_field_analysis_dir = kpi_field_analysis_dir,
|
||
|
||
# TIER 7: Support
|
||
data_dir = data_dir,
|
||
vrt_dir = vrt_dir,
|
||
harvest_dir = harvest_dir,
|
||
log_dir = log_dir,
|
||
|
||
# TIER 8: Config & Metadata
|
||
field_boundaries_path = field_boundaries_path,
|
||
tiling_config_path = tiling_config_path
|
||
))
|
||
}
|
||
|
||
# ==============================================================================
|
||
# TIER-BY-TIER PATH REFERENCE (for setup_project_directories output)
|
||
# ==============================================================================
|
||
#
|
||
# TIER 1: RAW DATA (Script 00 - Python download)
|
||
# paths$merged_tif_folder
|
||
# └─ {YYYY-MM-DD}.tif (4-band uint16 GeoTIFFs from Planet API)
|
||
#
|
||
# TIER 2: PER-FIELD TIFFS (Script 10)
|
||
# paths$field_tiles_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif
|
||
# paths$field_tiles_ci_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif
|
||
# paths$daily_tiles_split_dir/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif (legacy)
|
||
#
|
||
# TIER 3: CI EXTRACTION (Script 20)
|
||
# paths$daily_ci_vals_dir/combined_CI_data.rds
|
||
# paths$cumulative_ci_vals_dir/All_pivots_Cumulative_CI_quadrant_year_v2.rds
|
||
# paths$ci_for_python_dir/ci_data_for_python.csv (Script 21 output)
|
||
#
|
||
# TIER 4: GROWTH MODEL (Script 30)
|
||
# paths$growth_model_interpolated_dir/ (RDS files with interpolated CI)
|
||
#
|
||
# TIER 5: MOSAICS (Script 40)
|
||
# paths$weekly_mosaic_dir/{FIELD_NAME}/week_XX_YYYY.tif
|
||
# paths$weekly_tile_max_dir/{grid_size}/week_XX_YYYY_00.tif (legacy)
|
||
#
|
||
# TIER 6: KPI & REPORTING (Scripts 80, 90, 91)
|
||
# paths$kpi_reports_dir/ (KPI outputs from Script 80)
|
||
# paths$kpi_field_stats_dir/ (Per-field KPI RDS)
|
||
# paths$kpi_field_analysis_dir/ (Analysis RDS for Script 91)
|
||
# paths$reports_dir/ (Word/HTML reports)
|
||
#
|
||
# TIER 7: SUPPORT (Various scripts)
|
||
# paths$data_dir/pivot.geojson (Field boundaries)
|
||
# paths$data_dir/harvest.xlsx (Harvest schedule)
|
||
# paths$vrt_dir/ (Virtual raster files)
|
||
# paths$harvest_dir/ (Harvest predictions from Python)
|
||
# paths$log_dir/ (Pipeline logs)
|
||
#
|
||
# TIER 8: CONFIG & METADATA
|
||
# paths$field_boundaries_path (Full path to pivot.geojson)
|
||
# paths$tiling_config_path (Metadata from Script 10)
|
||
#
|
||
# ==============================================================================
|
||
|
||
# 5. Utility Functions
|
||
# ----------------------
|
||
# NOTE: load_field_boundaries() and load_harvesting_data() are defined in 00_common_utils.R
|
||
# to avoid duplication. They are sourced after parameters_project.R and used by all scripts.
|
||
|
||
# 6. Load harvesting data
|
||
# ---------------------
|
||
load_harvesting_data <- function(data_dir) {
|
||
harvest_file <- here(data_dir, "harvest.xlsx")
|
||
|
||
if (!file.exists(harvest_file)) {
|
||
warning(paste("Harvest data file not found at path:", harvest_file))
|
||
return(NULL)
|
||
}
|
||
|
||
# Helper function to parse dates with multiple format detection
|
||
parse_flexible_date <- function(x) {
|
||
if (is.na(x) || is.null(x)) return(NA_real_)
|
||
if (inherits(x, "Date")) return(x)
|
||
if (inherits(x, "POSIXct")) return(as.Date(x))
|
||
|
||
# If it's numeric (Excel date serial), convert directly
|
||
if (is.numeric(x)) {
|
||
return(as.Date(x, origin = "1899-12-30"))
|
||
}
|
||
|
||
# Try character conversion with multiple formats
|
||
x_char <- as.character(x)
|
||
|
||
# Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS
|
||
formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
|
||
|
||
for (fmt in formats) {
|
||
result <- suppressWarnings(as.Date(x_char, format = fmt))
|
||
if (!is.na(result)) return(result)
|
||
}
|
||
|
||
# If all else fails, return NA
|
||
return(NA)
|
||
}
|
||
|
||
tryCatch({
|
||
harvesting_data <- read_excel(harvest_file) %>%
|
||
dplyr::select(
|
||
c(
|
||
"field",
|
||
"sub_field",
|
||
"year",
|
||
"season_start",
|
||
"season_end",
|
||
"age",
|
||
"sub_area",
|
||
"tonnage_ha"
|
||
)
|
||
) %>%
|
||
mutate(
|
||
field = as.character(field),
|
||
sub_field = as.character(sub_field),
|
||
year = as.numeric(year),
|
||
season_start = sapply(season_start, parse_flexible_date),
|
||
season_end = sapply(season_end, parse_flexible_date),
|
||
season_start = as.Date(season_start, origin = "1970-01-01"),
|
||
season_end = as.Date(season_end, origin = "1970-01-01"),
|
||
age = as.numeric(age),
|
||
sub_area = as.character(sub_area),
|
||
tonnage_ha = as.numeric(tonnage_ha)
|
||
) %>%
|
||
mutate(
|
||
season_end = case_when(
|
||
season_end > Sys.Date() ~ Sys.Date(),
|
||
is.na(season_end) ~ Sys.Date(),
|
||
TRUE ~ season_end
|
||
),
|
||
age = round(as.numeric(season_end - season_start) / 7, 0)
|
||
)
|
||
|
||
return(harvesting_data)
|
||
}, error = function(e) {
|
||
warning(paste("Error loading harvesting data:", e$message))
|
||
return(NULL)
|
||
})
|
||
}
|
||
|
||
# 5. Define logging functions globally first
|
||
# ---------------------------------------
|
||
# Create a simple default log function in case setup_logging hasn't been called yet
|
||
log_message <- function(message, level = "INFO") {
|
||
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
||
formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
|
||
cat(formatted_message, "\n")
|
||
}
|
||
|
||
log_head <- function(list, level = "INFO") {
|
||
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
|
||
}
|
||
|
||
# 8. Set up full logging system with file output
|
||
# -------------------------------------------
|
||
setup_logging <- function(log_dir) {
|
||
log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log"))
|
||
|
||
# Create enhanced log functions
|
||
log_message <- function(message, level = "INFO") {
|
||
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
||
formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
|
||
cat(formatted_message, "\n", file = log_file, append = TRUE)
|
||
|
||
# Also print to console for debugging
|
||
if (level %in% c("ERROR", "WARNING")) {
|
||
cat(formatted_message, "\n")
|
||
}
|
||
}
|
||
|
||
log_head <- function(list, level = "INFO") {
|
||
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
|
||
}
|
||
|
||
# Update the global functions with the enhanced versions
|
||
assign("log_message", log_message, envir = .GlobalEnv)
|
||
assign("log_head", log_head, envir = .GlobalEnv)
|
||
|
||
return(list(
|
||
log_file = log_file,
|
||
log_message = log_message,
|
||
log_head = log_head
|
||
))
|
||
}
|
||
|
||
# 8. HELPER FUNCTIONS FOR COMMON CALCULATIONS
|
||
# -----------------------------------------------
|
||
# Centralized functions to reduce duplication across scripts
|
||
|
||
# Get ISO week and year from a date
|
||
get_iso_week <- function(date) {
|
||
as.numeric(format(date, "%V"))
|
||
}
|
||
|
||
get_iso_year <- function(date) {
|
||
as.numeric(format(date, "%G"))
|
||
}
|
||
|
||
# Get both ISO week and year as a list
|
||
get_iso_week_year <- function(date) {
|
||
list(
|
||
week = as.numeric(format(date, "%V")),
|
||
year = as.numeric(format(date, "%G"))
|
||
)
|
||
}
|
||
|
||
# Format week/year into a readable label
|
||
format_week_label <- function(date, separator = "_") {
|
||
wwy <- get_iso_week_year(date)
|
||
sprintf("week%02d%s%d", wwy$week, separator, wwy$year)
|
||
}
|
||
|
||
# Auto-detect mosaic mode
|
||
# For per-field architecture, always returns "single-file" (weekly_mosaic/{FIELD}/week_*.tif)
|
||
detect_mosaic_mode <- function(project_dir) {
|
||
# Per-field architecture uses single-file mosaics organized per-field
|
||
weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
|
||
if (dir.exists(weekly_mosaic)) {
|
||
return("single-file") # Per-field structure
|
||
}
|
||
return("unknown")
|
||
}
|
||
|
||
# Auto-detect grid size from tile directory structure
|
||
# For per-field architecture, returns "unknown" since grid-based organization is legacy
|
||
detect_grid_size <- function(project_dir) {
|
||
# Per-field architecture doesn't use grid-based organization anymore
|
||
return("unknown")
|
||
}
|
||
|
||
# Build storage paths consistently across all scripts
|
||
get_project_storage_path <- function(project_dir, subdir = NULL) {
|
||
base <- file.path("laravel_app", "storage", "app", project_dir)
|
||
if (!is.null(subdir)) file.path(base, subdir) else base
|
||
}
|
||
|
||
get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
|
||
# Per-field architecture always uses weekly_mosaic (single-file, per-field organization)
|
||
get_project_storage_path(project_dir, "weekly_mosaic")
|
||
}
|
||
|
||
get_kpi_dir <- function(project_dir, client_type) {
|
||
subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis"
|
||
get_project_storage_path(project_dir, file.path("reports", "kpis", subdir))
|
||
}
|
||
|
||
# Logging functions moved to 00_common_utils.R
|
||
# - smartcane_log() — Main logging function with level prefix
|
||
# - smartcane_debug() — Conditional debug logging
|
||
# - smartcane_warn() — Warning wrapper
|
||
# Import with: source("r_app/00_common_utils.R")
|
||
|
||
# ============================================================================
|
||
# PHASE 3 & 4: OPTIMIZATION & DOCUMENTATION
|
||
# ============================================================================
|
||
|
||
# System Constants
|
||
# ----------------
|
||
# Define once, use everywhere
|
||
|
||
RSCRIPT_PATH <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe"
|
||
# Used in run_full_pipeline.R for calling R scripts via system()
|
||
|
||
# Data Source Documentation
|
||
# ---------------------------
|
||
# Explains the two satellite data formats and when to use each
|
||
#
|
||
# SmartCane uses PlanetScope imagery from Planet Labs API in two formats:
|
||
#
|
||
# 1. merged_tif (4-band):
|
||
# - Standard format: Red, Green, Blue, Near-Infrared
|
||
# - Size: ~150-200 MB per date
|
||
# - Use case: Agronomic support, general crop health monitoring
|
||
# - Projects: aura, xinavane
|
||
# - Cloud handling: Basic cloud masking from Planet metadata
|
||
#
|
||
# 2. merged_tif_8b (8-band with cloud confidence):
|
||
# - Enhanced format: 4-band imagery + 4-band UDM2 cloud mask
|
||
# - UDM2 bands: Clear, Snow, Shadow, Light Haze
|
||
# - Size: ~250-350 MB per date
|
||
# - Use case: Harvest prediction, supply chain optimization
|
||
# - Projects: angata, chemba, esa (cane_supply clients)
|
||
# - Cloud handling: Per-pixel cloud confidence from Planet UDM2
|
||
# - Why: Cane supply chains need precise confidence to predict harvest dates
|
||
# (don't want to predict based on cloudy data)
|
||
#
|
||
# The system auto-detects which is available via detect_data_source()
|
||
|
||
# Mosaic Mode Documentation
|
||
# --------------------------
|
||
# SmartCane supports two ways to store and process weekly mosaics:
|
||
#
|
||
# 1. Single-file mosaic ("single-file"):
|
||
# - One GeoTIFF per week: weekly_mosaic/week_02_2026.tif
|
||
# - 5 bands per file: R, G, B, NIR, CI (Canopy Index)
|
||
# - Size: ~300-500 MB per week
|
||
# - Pros: Simpler file management, easier full-field visualization
|
||
# - Cons: Slower for field-specific queries, requires loading full raster
|
||
# - Best for: Agronomic support (aura) with <100 fields
|
||
# - Script 04 output: 5-band single-file mosaic
|
||
#
|
||
# 2. Tiled mosaic ("tiled"):
|
||
# - Grid of tiles per week: weekly_tile_max/5x5/week_02_2026_{TT}.tif
|
||
# - Example: 25 files (5×5 grid) × 5 bands = 125 individual tiffs
|
||
# - Size: ~15-20 MB per tile, organized in folders
|
||
# - Pros: Parallel processing, fast field lookups, scales to 1000+ fields
|
||
# - Cons: More file I/O, requires tile-to-field mapping metadata
|
||
# - Best for: Cane supply (angata, chemba) with 500+ fields
|
||
# - Script 04 output: Per-tile tiff files in weekly_tile_max/{grid}/
|
||
# - Tile assignment: Field boundaries mapped to grid coordinates
|
||
#
|
||
# The system auto-detects which is available via detect_mosaic_mode()
|
||
|
||
# Client Type Documentation
|
||
# --------------------------
|
||
# SmartCane runs different analysis pipelines based on client_type:
|
||
#
|
||
# CLIENT_TYPE: cane_supply
|
||
# Purpose: Optimize sugar mill supply chain (harvest scheduling)
|
||
# Scripts run: 20 (CI), 21 (RDS to CSV), 30 (Growth), 31 (Harvest pred), 40 (Mosaic), 80 (KPI), 91 (Excel)
|
||
# Outputs:
|
||
# - Per-field analysis: field status, growth phase, harvest readiness
|
||
# - Excel reports (Script 91): Detailed metrics for logistics planning
|
||
# - KPI directory: reports/kpis/field_analysis/ (one RDS per week)
|
||
# Harvest data: Required (harvest.xlsx - planting dates for phase assignment)
|
||
# Data source: merged_tif_8b (uses cloud confidence for confidence)
|
||
# Mosaic mode: tiled (scales to 500+ fields)
|
||
# Projects: angata, chemba, xinavane, esa
|
||
#
|
||
# CLIENT_TYPE: agronomic_support
|
||
# Purpose: Provide weekly crop health insights to agronomists
|
||
# Scripts run: 80 (KPI), 90 (Word report)
|
||
# Outputs:
|
||
# - Farm-level KPI summaries (no per-field breakdown)
|
||
# - Word reports (Script 90): Charts and trends for agronomist decision support
|
||
# - KPI directory: reports/kpis/field_level/ (one RDS per week)
|
||
# Harvest data: Not used
|
||
# Data source: merged_tif (simpler, smaller)
|
||
# Mosaic mode: single-file (100-200 fields)
|
||
# Projects: aura
|
||
#
|
||
|
||
# Detect data source (merged_tif vs merged_tif_8b) based on availability
|
||
# Returns the first available source; defaults to merged_tif_8b if neither exists
|
||
detect_data_source <- function(project_dir) {
|
||
# Data source is always merged_tif for consistency
|
||
return("merged_tif")
|
||
}
|
||
|
||
# Check KPI completeness for a reporting period
|
||
# Returns: List with kpis_df (data.frame), missing_count, and all_complete (boolean)
|
||
# This replaces duplicate KPI checking logic in run_full_pipeline.R (lines ~228-270, ~786-810)
|
||
check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks_needed) {
|
||
kpi_dir <- get_kpi_dir(project_dir, client_type)
|
||
|
||
kpis_needed <- data.frame()
|
||
|
||
for (weeks_back in 0:(reporting_weeks_needed - 1)) {
|
||
check_date <- end_date - (weeks_back * 7)
|
||
wwy <- get_iso_week_year(check_date)
|
||
|
||
# Build week pattern and check if it exists
|
||
week_pattern <- sprintf("week%02d_%d", wwy$week, wwy$year)
|
||
files_this_week <- list.files(kpi_dir, pattern = week_pattern)
|
||
has_kpis <- length(files_this_week) > 0
|
||
|
||
# Track missing weeks
|
||
kpis_needed <- rbind(kpis_needed, data.frame(
|
||
week = wwy$week,
|
||
year = wwy$year,
|
||
date = check_date,
|
||
has_kpis = has_kpis,
|
||
pattern = week_pattern,
|
||
file_count = length(files_this_week)
|
||
))
|
||
|
||
# Debug logging
|
||
smartcane_debug(sprintf(
|
||
"Week %02d/%d (%s): %s (%d files)",
|
||
wwy$week, wwy$year, format(check_date, "%Y-%m-%d"),
|
||
if (has_kpis) "✓ FOUND" else "✗ MISSING",
|
||
length(files_this_week)
|
||
))
|
||
}
|
||
|
||
# Summary statistics
|
||
missing_count <- sum(!kpis_needed$has_kpis)
|
||
all_complete <- missing_count == 0
|
||
|
||
return(list(
|
||
kpis_df = kpis_needed,
|
||
kpi_dir = kpi_dir,
|
||
missing_count = missing_count,
|
||
missing_weeks = kpis_needed[!kpis_needed$has_kpis, ],
|
||
all_complete = all_complete
|
||
))
|
||
}
|
||
|
||
# ==============================================================================
|
||
# HELPER FUNCTIONS FOR run_full_pipeline.R PATH VERIFICATION (SC-116)
|
||
# ==============================================================================
|
||
# These functions replace hardcoded file.path() calls in run_full_pipeline.R
|
||
# with centralized, testable helper functions. Each function verifies a specific
|
||
# output directory for a pipeline stage.
|
||
|
||
#' Get verification path for Script 31 harvest output
|
||
#'
|
||
#' @param project_dir Character. Project name (e.g., "angata", "aura")
|
||
#' @param week_num Integer. ISO week number (01-53)
|
||
#' @param year_num Integer. Year (e.g., 2026)
|
||
#' @return Character. Full path to expected harvest imminent CSV file
|
||
#' @details
|
||
#' Script 31 generates: {project}_{project}_harvest_imminent_week_{WW}_{YYYY}.csv
|
||
#' Location: laravel_app/storage/app/{project}/reports/kpis/field_stats/
|
||
#'
|
||
get_harvest_output_path <- function(project_dir, week_num, year_num) {
|
||
file.path(
|
||
"laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats",
|
||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, week_num, year_num)
|
||
)
|
||
}
|
||
|
||
#' Check if harvest output file exists for a given week
|
||
#'
|
||
#' @param project_dir Character. Project name
|
||
#' @param week_num Integer. ISO week number
|
||
#' @param year_num Integer. Year
|
||
#' @return Logical. TRUE if file exists, FALSE otherwise
|
||
#'
|
||
check_harvest_output_exists <- function(project_dir, week_num, year_num) {
|
||
path <- get_harvest_output_path(project_dir, week_num, year_num)
|
||
file.exists(path)
|
||
}
|
||
|
||
#' Get expected output directory for a mosaic verification based on mode
|
||
#'
|
||
#' @param project_dir Character. Project name
|
||
#' @param mosaic_mode Character. Either "tiled" or "single-file"
|
||
#' @return Character. Full path to mosaic directory
|
||
#'
|
||
#' @details
|
||
#' Tiled: laravel_app/storage/app/{project}/weekly_tile_max/
|
||
#' Single-file: laravel_app/storage/app/{project}/weekly_mosaic/
|
||
#'
|
||
get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
|
||
base <- file.path("laravel_app", "storage", "app", project_dir)
|
||
|
||
if (mosaic_mode == "tiled") {
|
||
file.path(base, "weekly_tile_max")
|
||
} else {
|
||
# Default to single-file (single-file is standard for per-field architecture)
|
||
file.path(base, "weekly_mosaic")
|
||
}
|
||
}
|
||
|
||
#' Check if mosaic files exist for a specific week
|
||
#'
|
||
#' @param project_dir Character. Project name
|
||
#' @param week_num Integer. ISO week number
|
||
#' @param year_num Integer. Year
|
||
#' @param mosaic_mode Character. "tiled" or "single-file"
|
||
#' @return List with created (logical), file_count (int), and sample_files (char vector)
|
||
#'
|
||
check_mosaic_exists <- function(project_dir, week_num, year_num, mosaic_mode) {
|
||
mosaic_dir <- get_mosaic_verification_dir(project_dir, mosaic_mode)
|
||
|
||
if (!dir.exists(mosaic_dir)) {
|
||
return(list(created = FALSE, file_count = 0, sample_files = character()))
|
||
}
|
||
|
||
week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
|
||
# Search recursively for per-field architecture support
|
||
mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE)
|
||
|
||
list(
|
||
created = length(mosaic_files) > 0,
|
||
file_count = length(mosaic_files),
|
||
sample_files = head(mosaic_files, 3) # First 3 files as sample
|
||
)
|
||
}
|
||
|
||
# 9. Initialize the project
|
||
# ----------------------
|
||
# Export project directories and settings
|
||
initialize_project <- function(project_dir, data_source = "merged_tif") {
|
||
# Set up directory structure, passing data_source to select TIF folder
|
||
dirs <- setup_project_directories(project_dir, data_source = data_source)
|
||
|
||
# Set up logging
|
||
logging <- setup_logging(dirs$log_dir)
|
||
|
||
# Load field boundaries
|
||
boundaries <- load_field_boundaries(dirs$data_dir)
|
||
|
||
# Load harvesting data
|
||
harvesting_data <- load_harvesting_data(dirs$data_dir)
|
||
|
||
# Return all initialized components
|
||
return(c(
|
||
dirs,
|
||
list(
|
||
logging = logging,
|
||
field_boundaries = boundaries$field_boundaries,
|
||
field_boundaries_sf = boundaries$field_boundaries_sf,
|
||
harvesting_data = harvesting_data
|
||
)
|
||
))
|
||
}
|
||
|
||
# When script is sourced, initialize with the global project_dir variable if it exists
|
||
if (exists("project_dir")) {
|
||
# Now we can safely log before initialization
|
||
log_message(paste("Initializing project with directory:", project_dir))
|
||
|
||
# Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default
|
||
data_src <- if (exists("data_source")) data_source else "merged_tif"
|
||
log_message(paste("Using data source directory:", data_src))
|
||
|
||
project_config <- initialize_project(project_dir, data_source = data_src)
|
||
|
||
# Expose all variables to the global environment
|
||
list2env(project_config, envir = .GlobalEnv)
|
||
|
||
# Log project initialization completion with tile mode info
|
||
log_message(paste("Project initialized with directory:", project_dir))
|
||
if (exists("use_tile_mosaic")) {
|
||
mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE"
|
||
log_message(paste("Mosaic mode detected:", mosaic_mode))
|
||
if (exists("tile_detection_info") && !is.null(tile_detection_info)) {
|
||
log_message(paste(" - Detection source:", tile_detection_info$detected_source))
|
||
log_message(paste(" - Grid size:", tile_detection_info$grid_size))
|
||
log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count))
|
||
if (length(tile_detection_info$sample_tiles) > 0) {
|
||
log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", ")))
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
|
||
}
|
||
|
||
|
||
|
||
#' Safe Logging Function
|
||
#'
|
||
#' Generic logging with [LEVEL] prefix. Works standalone without any framework.
|
||
#' Consistent with SmartCane logging standard.
|
||
#'
|
||
#' @param message The message to log
|
||
#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
|
||
#' @return NULL (invisible, used for side effects)
|
||
#'
|
||
#' @examples
|
||
#' safe_log("Processing started", "INFO")
|
||
#' safe_log("Check input file", "WARNING")
|
||
#' safe_log("Failed to load data", "ERROR")
|
||
#'
|
||
safe_log <- function(message, level = "INFO") {
|
||
prefix <- sprintf("[%s]", level)
|
||
cat(sprintf("%s %s\n", prefix, message))
|
||
}
|
||
|
||
#' SmartCane Debug Logging (Conditional)
|
||
#'
|
||
#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
|
||
#' Useful for development/troubleshooting without cluttering normal output.
|
||
#'
|
||
#' @param message The message to log
|
||
#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
|
||
#' @return NULL (invisible, used for side effects)
|
||
#'
|
||
#' @examples
|
||
#' smartcane_debug("Processing field 1", verbose = FALSE) # Only if SMARTCANE_DEBUG=TRUE
|
||
#' smartcane_debug("Detailed state info", verbose = TRUE) # Always outputs
|
||
#'
|
||
smartcane_debug <- function(message, verbose = FALSE) {
|
||
if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
|
||
return(invisible(NULL))
|
||
}
|
||
safe_log(message, level = "DEBUG")
|
||
}
|
||
|
||
#' SmartCane Warning Logging
|
||
#'
|
||
#' Logs WARN-level messages. Convenience wrapper around safe_log().
|
||
#'
|
||
#' @param message The message to log
|
||
#' @return NULL (invisible, used for side effects)
|
||
#'
|
||
#' @examples
|
||
#' smartcane_warn("Check data format before proceeding")
|
||
#'
|
||
smartcane_warn <- function(message) {
|
||
safe_log(message, level = "WARN")
|
||
}
|
||
|
||
#' Load Field Boundaries from GeoJSON
|
||
#'
|
||
#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
|
||
#' Handles CRS validation and column standardization.
|
||
#'
|
||
#' @param data_dir Directory containing GeoJSON file
|
||
#' @return List with elements:
|
||
#' - field_boundaries_sf: sf (Simple Features) object
|
||
#' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
|
||
#'
|
||
#' @details
|
||
#' Automatically selects pivot_2.geojson for ESA project during CI extraction,
|
||
#' otherwise uses pivot.geojson. Handles both multi-polygon and simple polygon geometries.
|
||
#'
|
||
#' @examples
|
||
#' boundaries <- load_field_boundaries("laravel_app/storage/app/angata")
|
||
#' head(boundaries$field_boundaries_sf)
|
||
#'
|
||
load_field_boundaries <- function(data_dir) {
|
||
# Choose field boundaries file based on project and script type
|
||
# ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
|
||
# All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
|
||
use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
|
||
exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
|
||
|
||
if (use_pivot_2) {
|
||
field_boundaries_path <- file.path(data_dir, "pivot_2.geojson")
|
||
} else {
|
||
field_boundaries_path <- file.path(data_dir, "pivot.geojson")
|
||
}
|
||
|
||
if (!file.exists(field_boundaries_path)) {
|
||
stop(paste("Field boundaries file not found at path:", field_boundaries_path))
|
||
}
|
||
|
||
tryCatch({
|
||
# Read GeoJSON with explicit CRS handling
|
||
field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
|
||
|
||
# Remove OBJECTID column immediately if it exists
|
||
if ("OBJECTID" %in% names(field_boundaries_sf)) {
|
||
field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
|
||
}
|
||
|
||
# **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.)
|
||
# This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.)
|
||
# to prevent S2 geometry validation errors during downstream processing
|
||
field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf)
|
||
|
||
# Validate and fix CRS if needed
|
||
tryCatch({
|
||
# Simply assign WGS84 if not already set (safe approach)
|
||
if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
|
||
st_crs(field_boundaries_sf) <- 4326
|
||
warning("CRS was missing, assigned WGS84 (EPSG:4326)")
|
||
}
|
||
}, error = function(e) {
|
||
tryCatch({
|
||
st_crs(field_boundaries_sf) <<- 4326
|
||
}, error = function(e2) {
|
||
warning(paste("Could not set CRS:", e2$message))
|
||
})
|
||
})
|
||
|
||
# Handle column names - accommodate optional sub_area column
|
||
if ("sub_area" %in% names(field_boundaries_sf)) {
|
||
field_boundaries_sf <- field_boundaries_sf %>%
|
||
dplyr::select(field, sub_field, sub_area) %>%
|
||
sf::st_set_geometry("geometry")
|
||
} else {
|
||
field_boundaries_sf <- field_boundaries_sf %>%
|
||
dplyr::select(field, sub_field) %>%
|
||
sf::st_set_geometry("geometry")
|
||
}
|
||
|
||
# Convert to terra vector if possible, otherwise use sf
|
||
field_boundaries <- tryCatch({
|
||
field_boundaries_terra <- terra::vect(field_boundaries_sf)
|
||
crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
|
||
crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
|
||
|
||
if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
|
||
terra::crs(field_boundaries_terra) <- "EPSG:4326"
|
||
warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
|
||
}
|
||
field_boundaries_terra
|
||
|
||
}, error = function(e) {
|
||
warning(paste("Terra conversion failed, using sf object instead:", e$message))
|
||
field_boundaries_sf
|
||
})
|
||
|
||
return(list(
|
||
field_boundaries_sf = field_boundaries_sf,
|
||
field_boundaries = field_boundaries
|
||
))
|
||
}, error = function(e) {
|
||
cat("[DEBUG] Error in load_field_boundaries:\n")
|
||
cat(" Message:", e$message, "\n")
|
||
cat(" Call:", deparse(e$call), "\n")
|
||
stop(paste("Error loading field boundaries:", e$message))
|
||
})
|
||
}
|
||
|
||
|
||
|
||
#' Generate a Sequence of Dates for Processing
|
||
#'
|
||
#' Creates a date range from start_date to end_date and extracts week/year info.
|
||
#' Used by Scripts 20, 30, 40 to determine data processing windows.
|
||
#'
|
||
#' @param end_date The end date for the sequence (Date object or "YYYY-MM-DD" string)
|
||
#' @param offset Number of days to look back from end_date (e.g., 7 for one week)
|
||
#' @return A list containing:
|
||
#' - week: ISO week number of start_date
|
||
#' - year: ISO year of start_date
|
||
#' - days_filter: Vector of dates in "YYYY-MM-DD" format
|
||
#' - start_date: Start date as Date object
|
||
#' - end_date: End date as Date object
|
||
#'
|
||
#' @details
|
||
#' IMPORTANT: Uses `lubridate::week()` and `lubridate::year()` which return
|
||
#' ISO week numbers (week 1 starts on Monday). For ISO week-based calculations,
|
||
#' use `lubridate::isoweek()` and `lubridate::isoyear()` instead.
|
||
#'
|
||
#' @examples
|
||
#' dates <- date_list(as.Date("2025-01-15"), offset = 7)
|
||
#' # Returns: week=2, year=2025, days_filter = c("2025-01-09", ..., "2025-01-15")
|
||
#'
|
||
#' dates <- date_list("2025-12-31", offset = 14)
|
||
#' # Handles string input and returns 14 days of data
|
||
#'
|
||
date_list <- function(end_date, offset) {
|
||
# Input validation
|
||
if (!lubridate::is.Date(end_date)) {
|
||
end_date <- as.Date(end_date)
|
||
if (is.na(end_date)) {
|
||
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
||
}
|
||
}
|
||
|
||
offset <- as.numeric(offset)
|
||
if (is.na(offset) || offset < 1) {
|
||
stop("Invalid offset provided. Expected a positive number.")
|
||
}
|
||
|
||
# Calculate date range
|
||
offset <- offset - 1 # Adjust offset to include end_date
|
||
start_date <- end_date - lubridate::days(offset)
|
||
|
||
# Extract ISO week and year information (from END date for reporting period)
|
||
week <- lubridate::isoweek(end_date)
|
||
year <- lubridate::isoyear(end_date)
|
||
|
||
# Generate sequence of dates
|
||
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
||
|
||
# Log the date range
|
||
safe_log(paste("Date range generated from", start_date, "to", end_date))
|
||
|
||
return(list(
|
||
"week" = week,
|
||
"year" = year,
|
||
"days_filter" = days_filter,
|
||
"start_date" = start_date,
|
||
"end_date" = end_date
|
||
))
|
||
}
|
||
|
||
# ==============================================================================
|
||
#' Repair Invalid GeoJSON Geometries
|
||
#'
|
||
#' Fixes common geometry issues in GeoJSON/sf objects:
|
||
#' - Degenerate vertices (duplicate points)
|
||
#' - Self-intersecting polygons
|
||
#' - Invalid ring orientation
|
||
#' - Empty or NULL geometries
|
||
#'
|
||
#' Uses sf::st_make_valid() with buffer trick as fallback.
|
||
#'
|
||
#' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries
|
||
#' @return sf object with repaired geometries
|
||
#'
|
||
#' @details
|
||
#' **Why this matters:**
|
||
#' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting
|
||
#' rings from manual editing or GIS data sources. These cause errors when using
|
||
#' S2 geometry (strict validation) during cropping operations.
|
||
#'
|
||
#' **Repair strategy (priority order):**
|
||
#' 1. Try st_make_valid() - GEOS-based repair (most reliable)
|
||
#' 2. Fallback: st_union() + buffer(0) - Forces polygon validity
|
||
#' 3. Last resort: Silently keep original if repair fails
|
||
#'
|
||
#' @examples
|
||
#' \dontrun{
|
||
#' fields <- st_read("pivot.geojson")
|
||
#' fields_fixed <- repair_geojson_geometries(fields)
|
||
#' cat(paste("Fixed geometries: before=",
|
||
#' nrow(fields[!st_is_valid(fields), ]),
|
||
#' ", after=",
|
||
#' nrow(fields_fixed[!st_is_valid(fields_fixed), ])))
|
||
#' }
|
||
#'
|
||
repair_geojson_geometries <- function(sf_object) {
|
||
if (!inherits(sf_object, "sf")) {
|
||
stop("Input must be an sf (Simple Features) object")
|
||
}
|
||
|
||
# Count invalid geometries BEFORE repair
|
||
invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE)
|
||
|
||
if (invalid_before == 0) {
|
||
safe_log("All geometries already valid - no repair needed", "INFO")
|
||
return(sf_object)
|
||
}
|
||
|
||
safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING")
|
||
|
||
# STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS)
|
||
# This handles degenerate vertices, self-intersections, invalid rings while preserving all features
|
||
repaired <- tryCatch({
|
||
# st_make_valid() on entire sf object preserves all features and attributes
|
||
repaired_geom <- sf::st_make_valid(sf_object)
|
||
|
||
# Verify we still have the same number of rows
|
||
if (nrow(repaired_geom) != nrow(sf_object)) {
|
||
warning("st_make_valid() changed number of features - attempting row-wise repair")
|
||
|
||
# Fallback: Repair row-by-row to maintain original structure
|
||
repaired_geom <- sf_object
|
||
for (i in seq_len(nrow(sf_object))) {
|
||
tryCatch({
|
||
if (!sf::st_is_valid(sf_object[i, ])) {
|
||
repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ])
|
||
}
|
||
}, error = function(e) {
|
||
safe_log(paste("Could not repair row", i, "-", e$message), "WARNING")
|
||
})
|
||
}
|
||
}
|
||
|
||
safe_log("✓ st_make_valid() successfully repaired geometries", "INFO")
|
||
repaired_geom
|
||
}, error = function(e) {
|
||
safe_log(paste("st_make_valid() failed:", e$message), "WARNING")
|
||
NULL
|
||
})
|
||
|
||
# If repair failed, keep original
|
||
if (is.null(repaired)) {
|
||
safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"),
|
||
"WARNING")
|
||
return(sf_object)
|
||
}
|
||
|
||
# Count invalid geometries AFTER repair
|
||
invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE)
|
||
safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO")
|
||
|
||
return(repaired)
|
||
}
|
||
|
||
# ==============================================================================
|
||
# END 00_COMMON_UTILS.R
|
||
# ==============================================================================
|