# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R # # PARAMETERS_PROJECT.R # ==================== # This script defines project parameters, directory structures, and loads field boundaries. # It establishes all the necessary paths and creates required directories for the SmartCane project. # 1. Load required libraries # ------------------------- suppressPackageStartupMessages({ library(here) library(readxl) library(sf) library(dplyr) library(tidyr) library(jsonlite) # For reading tiling_config.json }) # 2. Client type mapping (for conditional script execution) # --------------------------------------------------------- # Maps project names to client types for pipeline control # Client types: # - "cane_supply": Runs Scripts 20,21,30,31,80,91 (full pipeline with Excel output) # - "agronomic_support": Runs Scripts 80,90 only (KPI calculation + Word report) # - "extension_service": (Future - not yet implemented) # # NOTE: This will eventually migrate to Laravel environment variables/database # For now, maintain this mapping and update as projects are added CLIENT_TYPE_MAP <- list( "angata" = "cane_supply", "aura" = "agronomic_support", "chemba" = "cane_supply", "xinavane" = "cane_supply", "esa" = "cane_supply" ) get_client_type <- function(project_name) { client_type <- CLIENT_TYPE_MAP[[project_name]] if (is.null(client_type)) { warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name)) return("cane_supply") } return(client_type) } # 2b. Client-specific KPI configurations # ---------------------------------------- # Defines which KPIs and outputs are required for each client type # This enables Script 80 to conditionally calculate only relevant metrics # # Structure: # - kpi_calculations: Vector of KPI types to calculate for this client # - outputs: Vector of output formats to generate (determines RDS/Excel naming) # - requires_harvest_data: Boolean - whether Script 31 harvest predictions are needed # - script_90_compatible: Boolean - whether output should match Script 90 expectations # - script_91_compatible: Boolean - whether output should match Script 91 expectations # CLIENT_TYPE_CONFIGS <- list( # Aura (agronomic_support): Farm-level KPI summaries for weekly reports to agronomists "agronomic_support" = list( client_type = "agronomic_support", description = "Farm-level KPI summaries for agronomic decision support", kpi_calculations = c( "field_uniformity", "area_change", "tch_forecasted", "growth_decline", "weed_presence", "gap_filling" ), outputs = c( "kpi_summary_tables", # Summary statistics for Script 90 report front page "field_details" # Detailed field table for Script 90 report end section ), requires_harvest_data = FALSE, # Script 31 predictions not used script_90_compatible = TRUE, # Output format matches Script 90 expectations script_91_compatible = FALSE ), # Cane Supply (cane_supply): Per-field analysis with harvest timing prediction "cane_supply" = list( client_type = "cane_supply", description = "Per-field analysis with harvest prediction and phase assignment", kpi_calculations = c( "per_field_analysis", # Use 80_weekly_stats_utils.R for field-level statistics "phase_assignment", # Assign growth phases (Germination, Tillering, Grand Growth, Maturation) "harvest_prediction", # Include Script 31 harvest age predictions if available "status_triggers" # Calculate field status (Normal, Monitor, Alert, Urgent) ), outputs = c( "field_analysis_excel", # Excel file with per-field metrics "field_analysis_summary" # Summary RDS for Script 91 report ), requires_harvest_data = TRUE, # harvest.xlsx is required for phase assignment script_90_compatible = FALSE, script_91_compatible = TRUE ) ) #' Get KPI configuration for a specific client type #' @param client_type Character string of client type (e.g., "agronomic_support", "cane_supply") #' @return List containing configuration for that client type get_client_kpi_config <- function(client_type) { config <- CLIENT_TYPE_CONFIGS[[client_type]] if (is.null(config)) { warning(sprintf("Client type '%s' not in CLIENT_TYPE_CONFIGS - defaulting to 'cane_supply'", client_type)) return(CLIENT_TYPE_CONFIGS[["cane_supply"]]) } return(config) } # 3. Smart detection for tile-based vs single-file mosaic approach # ---------------------------------------------------------------- detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { # PRIORITY 1: Check for tiling_config.json metadata file from script 10 # This is the most reliable source since script 10 explicitly records its decision if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) { # Try to find tiling_config.json in any grid-size subfolder config_files <- list.files(daily_tiles_split_dir, pattern = "tiling_config\\.json$", recursive = TRUE, full.names = TRUE) if (length(config_files) > 0) { # Found a config file - use the most recent one config_file <- config_files[which.max(file.info(config_files)$mtime)] tryCatch({ config_json <- jsonlite::read_json(config_file) return(list( has_tiles = config_json$has_tiles %||% TRUE, detected_tiles = character(), total_files = 0, source = "tiling_config.json", grid_size = config_json$grid_size %||% "unknown" )) }, error = function(e) { warning("Error reading tiling_config.json: ", e$message) # Fall through to file-based detection }) } } # PRIORITY 2: File-based detection (fallback if metadata not found) # Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories if (!dir.exists(merged_final_tif_dir)) { return(list( has_tiles = FALSE, detected_tiles = character(), total_files = 0, source = "directory_not_found" )) } # First check if there are grid-size subdirectories (5x5, 10x10, etc.) # This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE) grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE) if (length(grid_patterns) > 0) { # Found grid-size subdirectories - tiles exist! grid_size <- grid_patterns[1] grid_dir <- file.path(merged_final_tif_dir, grid_size) # List sample tile files from the grid directory sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3] return(list( has_tiles = TRUE, detected_tiles = sample_tiles, total_files = length(sample_tiles), source = "grid_subdirectory_detection", grid_size = grid_size, grid_path = grid_dir )) } # Fall back to checking for tile-named files directly in merged_final_tif # List all .tif files in merged_final_tif tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) if (length(tif_files) == 0) { return(list( has_tiles = FALSE, detected_tiles = character(), total_files = 0, source = "no_files_found" )) } # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits) # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif tile_pattern <- "_(\\d{2})\\.tif$" tile_files <- tif_files[grepl(tile_pattern, tif_files)] has_tiles <- length(tile_files) > 0 return(list( has_tiles = has_tiles, detected_tiles = tile_files, total_files = length(tif_files), source = "file_pattern_detection" )) } # 4. Define project directory structure # ----------------------------------- # ============================================================================== # CENTRALIZED PATH MANAGEMENT - setup_project_directories() # ============================================================================== # This function is the single source of truth for ALL file paths used across the pipeline. # All scripts should call this function once at startup and use returned paths. # This eliminates ~88 hardcoded file.path() calls scattered across 8 scripts. # # USAGE: # paths <- setup_project_directories(project_dir) # merged_tif_dir <- paths$merged_tif_folder # daily_ci_dir <- paths$daily_ci_vals_dir # kpi_output_dir <- paths$kpi_reports_dir # # TIERS (8-layer directory structure): # Tier 1: Raw data (merged_tif) # Tier 2: Per-field TIFFs (field_tiles, field_tiles_CI) # Tier 3: CI Extraction (daily_ci_vals, cumulative_ci_vals) # Tier 4: Growth Model (growth_model_interpolated) # Tier 5: Mosaics (weekly_mosaic, weekly_tile_max) # Tier 6: KPI & Reporting (kpi_reports_dir, kpi_field_stats_dir) # Tier 7: Support (data, vrt, harvest, logs) # Tier 8: Config & Metadata (field_boundaries_path, tiling_config_path) # # BENEFITS: # ✓ Single source of truth (eliminates ~88 hardcoded file.path() calls) # ✓ Auto-creates all directories (no scattered dir.create() calls) # ✓ Easy to update storage structure globally # ✓ Consistent naming across all 8 scripts # ============================================================================== setup_project_directories <- function(project_dir, data_source = "merged_tif") { # =========================================================================== # BASE DIRECTORIES (Foundation for all paths) # =========================================================================== laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir) # =========================================================================== # TIER 1: RAW DATA & INPUT PATHS (Script 00 - Python download output) # =========================================================================== merged_tif_folder <- here(laravel_storage_dir, "merged_tif") # 4-band raw GeoTIFFs from Planet # =========================================================================== # TIER 2: TILING PATHS (Script 10 - Per-field tiff creation) # =========================================================================== # Per-field TIFF structure: field_tiles/{FIELD_NAME}/{YYYY-MM-DD}.tif field_tiles_dir <- here(laravel_storage_dir, "field_tiles") # Per-field CI TIFFs (pre-computed, used by Script 40): field_tiles_CI/{FIELD_NAME}/{YYYY-MM-DD}.tif field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI") # Legacy tiling (for backward compatibility): daily_tiles_split/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split") # =========================================================================== # TIER 3: CI EXTRACTION PATHS (Script 20 - Canopy Index calculation) # =========================================================================== extracted_ci_base_dir <- here(laravel_storage_dir, "Data", "extracted_ci") # Daily CI values (cumulative RDS): combined_CI_data.rds daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals") # Cumulative CI across time: All_pivots_Cumulative_CI_quadrant_year_v2.rds cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals") # Per-field CI data for Python harvest prediction (Script 21): ci_data_for_python.csv ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python") # =========================================================================== # TIER 4: GROWTH MODEL PATHS (Script 30 - Interpolation & smoothing) # =========================================================================== growth_model_interpolated_dir <- here(laravel_storage_dir, "growth_model_interpolated") # =========================================================================== # TIER 5: MOSAIC PATHS (Script 40 - Weekly mosaics) # =========================================================================== # Per-field weekly mosaics (per-field architecture): weekly_mosaic/{FIELD}/{week_XX_YYYY}.tif weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic") # Tile-based weekly max (legacy): weekly_tile_max/{grid_size}/week_XX_YYYY.tif weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max") # =========================================================================== # TIER 6: KPI & REPORTING PATHS (Scripts 80, 90, 91) # =========================================================================== reports_dir <- here(laravel_storage_dir, "reports") kpi_reports_dir <- here(reports_dir, "kpis") # Where Script 80 outputs KPI CSV/RDS files kpi_field_stats_dir <- here(kpi_reports_dir, "field_stats") # Per-field KPI details kpi_field_analysis_dir <- here(kpi_reports_dir, "field_analysis") # Field-level analysis for Script 91 # =========================================================================== # TIER 7: SUPPORT PATHS (Data, VRT, Harvest) # =========================================================================== data_dir <- here(laravel_storage_dir, "Data") vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction harvest_dir <- here(data_dir, "HarvestData") # Harvest schedule data log_dir <- here(laravel_storage_dir, "logs") # Log files # =========================================================================== # TIER 8: CONFIG & METADATA PATHS # =========================================================================== # Field boundaries GeoJSON (same across all scripts) field_boundaries_path <- here(data_dir, "pivot.geojson") # Tiling configuration metadata from Script 10 tiling_config_path <- here(daily_tiles_split_dir, "tiling_config.json") # =========================================================================== # CREATE ALL DIRECTORIES (once per pipeline run) # =========================================================================== all_dirs <- c( # Tier 1 merged_tif_folder, # Tier 2 field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir, # Tier 3 extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir, # Tier 4 growth_model_interpolated_dir, # Tier 5 weekly_mosaic_dir, weekly_tile_max_dir, # Tier 6 reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir, # Tier 7 data_dir, vrt_dir, harvest_dir, log_dir ) for (dir_path in all_dirs) { dir.create(dir_path, showWarnings = FALSE, recursive = TRUE) } # =========================================================================== # RETURN COMPREHENSIVE PATH LIST # Scripts should source parameters_project.R and receive paths object like: # paths <- setup_project_directories(project_dir) # Then use: paths$merged_tif_folder, paths$daily_ci_vals_dir, etc. # =========================================================================== return(list( # PROJECT ROOT laravel_storage_dir = laravel_storage_dir, # TIER 1: Raw data merged_tif_folder = merged_tif_folder, # TIER 2: Per-field TIFFs field_tiles_dir = field_tiles_dir, field_tiles_ci_dir = field_tiles_ci_dir, daily_tiles_split_dir = daily_tiles_split_dir, # TIER 3: CI Extraction extracted_ci_base_dir = extracted_ci_base_dir, daily_ci_vals_dir = daily_ci_vals_dir, cumulative_ci_vals_dir = cumulative_ci_vals_dir, ci_for_python_dir = ci_for_python_dir, # TIER 4: Growth Model growth_model_interpolated_dir = growth_model_interpolated_dir, # TIER 5: Mosaics weekly_mosaic_dir = weekly_mosaic_dir, weekly_tile_max_dir = weekly_tile_max_dir, # TIER 6: KPI & Reporting reports_dir = reports_dir, kpi_reports_dir = kpi_reports_dir, kpi_field_stats_dir = kpi_field_stats_dir, kpi_field_analysis_dir = kpi_field_analysis_dir, # TIER 7: Support data_dir = data_dir, vrt_dir = vrt_dir, harvest_dir = harvest_dir, log_dir = log_dir, # TIER 8: Config & Metadata field_boundaries_path = field_boundaries_path, tiling_config_path = tiling_config_path )) } # ============================================================================== # TIER-BY-TIER PATH REFERENCE (for setup_project_directories output) # ============================================================================== # # TIER 1: RAW DATA (Script 00 - Python download) # paths$merged_tif_folder # └─ {YYYY-MM-DD}.tif (4-band uint16 GeoTIFFs from Planet API) # # TIER 2: PER-FIELD TIFFS (Script 10) # paths$field_tiles_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif # paths$field_tiles_ci_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif # paths$daily_tiles_split_dir/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif (legacy) # # TIER 3: CI EXTRACTION (Script 20) # paths$daily_ci_vals_dir/combined_CI_data.rds # paths$cumulative_ci_vals_dir/All_pivots_Cumulative_CI_quadrant_year_v2.rds # paths$ci_for_python_dir/ci_data_for_python.csv (Script 21 output) # # TIER 4: GROWTH MODEL (Script 30) # paths$growth_model_interpolated_dir/ (RDS files with interpolated CI) # # TIER 5: MOSAICS (Script 40) # paths$weekly_mosaic_dir/{FIELD_NAME}/week_XX_YYYY.tif # paths$weekly_tile_max_dir/{grid_size}/week_XX_YYYY_00.tif (legacy) # # TIER 6: KPI & REPORTING (Scripts 80, 90, 91) # paths$kpi_reports_dir/ (KPI outputs from Script 80) # paths$kpi_field_stats_dir/ (Per-field KPI RDS) # paths$kpi_field_analysis_dir/ (Analysis RDS for Script 91) # paths$reports_dir/ (Word/HTML reports) # # TIER 7: SUPPORT (Various scripts) # paths$data_dir/pivot.geojson (Field boundaries) # paths$data_dir/harvest.xlsx (Harvest schedule) # paths$vrt_dir/ (Virtual raster files) # paths$harvest_dir/ (Harvest predictions from Python) # paths$log_dir/ (Pipeline logs) # # TIER 8: CONFIG & METADATA # paths$field_boundaries_path (Full path to pivot.geojson) # paths$tiling_config_path (Metadata from Script 10) # # ============================================================================== #set working dir. # 5. Load field boundaries # ---------------------- load_field_boundaries <- function(data_dir) { # Choose field boundaries file based on project and script type # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model) # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson use_pivot_2 <- exists("project_dir") && project_dir == "esa" && exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03 if (use_pivot_2) { field_boundaries_path <- here(data_dir, "pivot_2.geojson") } else { field_boundaries_path <- here(data_dir, "pivot.geojson") } if (!file.exists(field_boundaries_path)) { stop(paste("Field boundaries file not found at path:", field_boundaries_path)) } tryCatch({ # Read GeoJSON with explicit CRS handling field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE) # Remove OBJECTID column immediately if it exists if ("OBJECTID" %in% names(field_boundaries_sf)) { field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) } # Validate and fix CRS if needed - DO NOT call is.na on CRS objects as it can cause errors # Just ensure CRS is set; terra will handle projection if needed tryCatch({ # Simply assign WGS84 if not already set (safe approach) # This avoids any problematic is.na() calls on complex CRS objects if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) { st_crs(field_boundaries_sf) <- 4326 warning("CRS was missing, assigned WGS84 (EPSG:4326)") } }, error = function(e) { # If any CRS operation fails, just try to set it tryCatch({ st_crs(field_boundaries_sf) <<- 4326 }, error = function(e2) { # Silently continue - terra might handle it warning(paste("Could not set CRS:", e2$message)) }) }) # Handle column names - accommodate optional sub_area column # IMPORTANT: Must preserve geometry column properly when renaming sf object if ("sub_area" %in% names(field_boundaries_sf)) { # Reorder columns but keep geometry last field_boundaries_sf <- field_boundaries_sf %>% dplyr::select(field, sub_field, sub_area) %>% sf::st_set_geometry("geometry") } else { # Reorder columns but keep geometry last field_boundaries_sf <- field_boundaries_sf %>% dplyr::select(field, sub_field) %>% sf::st_set_geometry("geometry") } # Convert to terra vector if possible, otherwise use sf # Some GeoJSON files (like aura with complex MultiPolygons) may have GDAL/terra compatibility issues field_boundaries <- tryCatch({ field_boundaries_terra <- terra::vect(field_boundaries_sf) # Ensure terra object has valid CRS with safer checks crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL) crs_str <- if (!is.null(crs_value)) as.character(crs_value) else "" if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) { terra::crs(field_boundaries_terra) <- "EPSG:4326" warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)") } field_boundaries_terra }, error = function(e) { warning(paste("Terra conversion failed, using sf object instead:", e$message)) # Return sf object as fallback - functions will handle both types field_boundaries_sf }) return(list( field_boundaries_sf = field_boundaries_sf, field_boundaries = field_boundaries )) }, error = function(e) { cat("[DEBUG] Error in load_field_boundaries:\n") cat(" Message:", e$message, "\n") cat(" Call:", deparse(e$call), "\n") stop(paste("Error loading field boundaries:", e$message)) }) } # 6. Load harvesting data # --------------------- load_harvesting_data <- function(data_dir) { harvest_file <- here(data_dir, "harvest.xlsx") if (!file.exists(harvest_file)) { warning(paste("Harvest data file not found at path:", harvest_file)) return(NULL) } # Helper function to parse dates with multiple format detection parse_flexible_date <- function(x) { if (is.na(x) || is.null(x)) return(NA_real_) if (inherits(x, "Date")) return(x) if (inherits(x, "POSIXct")) return(as.Date(x)) # If it's numeric (Excel date serial), convert directly if (is.numeric(x)) { return(as.Date(x, origin = "1899-12-30")) } # Try character conversion with multiple formats x_char <- as.character(x) # Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S") for (fmt in formats) { result <- suppressWarnings(as.Date(x_char, format = fmt)) if (!is.na(result)) return(result) } # If all else fails, return NA return(NA) } tryCatch({ harvesting_data <- read_excel(harvest_file) %>% dplyr::select( c( "field", "sub_field", "year", "season_start", "season_end", "age", "sub_area", "tonnage_ha" ) ) %>% mutate( field = as.character(field), sub_field = as.character(sub_field), year = as.numeric(year), season_start = sapply(season_start, parse_flexible_date), season_end = sapply(season_end, parse_flexible_date), season_start = as.Date(season_start, origin = "1970-01-01"), season_end = as.Date(season_end, origin = "1970-01-01"), age = as.numeric(age), sub_area = as.character(sub_area), tonnage_ha = as.numeric(tonnage_ha) ) %>% mutate( season_end = case_when( season_end > Sys.Date() ~ Sys.Date(), is.na(season_end) ~ Sys.Date(), TRUE ~ season_end ), age = round(as.numeric(season_end - season_start) / 7, 0) ) return(harvesting_data) }, error = function(e) { warning(paste("Error loading harvesting data:", e$message)) return(NULL) }) } # 5. Define logging functions globally first # --------------------------------------- # Create a simple default log function in case setup_logging hasn't been called yet log_message <- function(message, level = "INFO") { timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) cat(formatted_message, "\n") } log_head <- function(list, level = "INFO") { log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) } # 8. Set up full logging system with file output # ------------------------------------------- setup_logging <- function(log_dir) { log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log")) # Create enhanced log functions log_message <- function(message, level = "INFO") { timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) cat(formatted_message, "\n", file = log_file, append = TRUE) # Also print to console for debugging if (level %in% c("ERROR", "WARNING")) { cat(formatted_message, "\n") } } log_head <- function(list, level = "INFO") { log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) } # Update the global functions with the enhanced versions assign("log_message", log_message, envir = .GlobalEnv) assign("log_head", log_head, envir = .GlobalEnv) return(list( log_file = log_file, log_message = log_message, log_head = log_head )) } # 8. HELPER FUNCTIONS FOR COMMON CALCULATIONS # ----------------------------------------------- # Centralized functions to reduce duplication across scripts # Get ISO week and year from a date get_iso_week <- function(date) { as.numeric(format(date, "%V")) } get_iso_year <- function(date) { as.numeric(format(date, "%G")) } # Get both ISO week and year as a list get_iso_week_year <- function(date) { list( week = as.numeric(format(date, "%V")), year = as.numeric(format(date, "%G")) ) } # Format week/year into a readable label format_week_label <- function(date, separator = "_") { wwy <- get_iso_week_year(date) sprintf("week%02d%s%d", wwy$week, separator, wwy$year) } # Auto-detect mosaic mode # For per-field architecture, always returns "single-file" (weekly_mosaic/{FIELD}/week_*.tif) detect_mosaic_mode <- function(project_dir) { # Per-field architecture uses single-file mosaics organized per-field weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic") if (dir.exists(weekly_mosaic)) { return("single-file") # Per-field structure } return("unknown") } # Auto-detect grid size from tile directory structure # For per-field architecture, returns "unknown" since grid-based organization is legacy detect_grid_size <- function(project_dir) { # Per-field architecture doesn't use grid-based organization anymore return("unknown") } # Build storage paths consistently across all scripts get_project_storage_path <- function(project_dir, subdir = NULL) { base <- file.path("laravel_app", "storage", "app", project_dir) if (!is.null(subdir)) file.path(base, subdir) else base } get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { # Per-field architecture always uses weekly_mosaic (single-file, per-field organization) get_project_storage_path(project_dir, "weekly_mosaic") } get_kpi_dir <- function(project_dir, client_type) { subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis" get_project_storage_path(project_dir, file.path("reports", "kpis", subdir)) } # Logging functions moved to 00_common_utils.R # - smartcane_log() — Main logging function with level prefix # - smartcane_debug() — Conditional debug logging # - smartcane_warn() — Warning wrapper # Import with: source("r_app/00_common_utils.R") # ============================================================================ # PHASE 3 & 4: OPTIMIZATION & DOCUMENTATION # ============================================================================ # System Constants # ---------------- # Define once, use everywhere RSCRIPT_PATH <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" # Used in run_full_pipeline.R for calling R scripts via system() # Data Source Documentation # --------------------------- # Explains the two satellite data formats and when to use each # # SmartCane uses PlanetScope imagery from Planet Labs API in two formats: # # 1. merged_tif (4-band): # - Standard format: Red, Green, Blue, Near-Infrared # - Size: ~150-200 MB per date # - Use case: Agronomic support, general crop health monitoring # - Projects: aura, xinavane # - Cloud handling: Basic cloud masking from Planet metadata # # 2. merged_tif_8b (8-band with cloud confidence): # - Enhanced format: 4-band imagery + 4-band UDM2 cloud mask # - UDM2 bands: Clear, Snow, Shadow, Light Haze # - Size: ~250-350 MB per date # - Use case: Harvest prediction, supply chain optimization # - Projects: angata, chemba, esa (cane_supply clients) # - Cloud handling: Per-pixel cloud confidence from Planet UDM2 # - Why: Cane supply chains need precise confidence to predict harvest dates # (don't want to predict based on cloudy data) # # The system auto-detects which is available via detect_data_source() # Mosaic Mode Documentation # -------------------------- # SmartCane supports two ways to store and process weekly mosaics: # # 1. Single-file mosaic ("single-file"): # - One GeoTIFF per week: weekly_mosaic/week_02_2026.tif # - 5 bands per file: R, G, B, NIR, CI (Canopy Index) # - Size: ~300-500 MB per week # - Pros: Simpler file management, easier full-field visualization # - Cons: Slower for field-specific queries, requires loading full raster # - Best for: Agronomic support (aura) with <100 fields # - Script 04 output: 5-band single-file mosaic # # 2. Tiled mosaic ("tiled"): # - Grid of tiles per week: weekly_tile_max/5x5/week_02_2026_{TT}.tif # - Example: 25 files (5×5 grid) × 5 bands = 125 individual tiffs # - Size: ~15-20 MB per tile, organized in folders # - Pros: Parallel processing, fast field lookups, scales to 1000+ fields # - Cons: More file I/O, requires tile-to-field mapping metadata # - Best for: Cane supply (angata, chemba) with 500+ fields # - Script 04 output: Per-tile tiff files in weekly_tile_max/{grid}/ # - Tile assignment: Field boundaries mapped to grid coordinates # # The system auto-detects which is available via detect_mosaic_mode() # Client Type Documentation # -------------------------- # SmartCane runs different analysis pipelines based on client_type: # # CLIENT_TYPE: cane_supply # Purpose: Optimize sugar mill supply chain (harvest scheduling) # Scripts run: 20 (CI), 21 (RDS to CSV), 30 (Growth), 31 (Harvest pred), 40 (Mosaic), 80 (KPI), 91 (Excel) # Outputs: # - Per-field analysis: field status, growth phase, harvest readiness # - Excel reports (Script 91): Detailed metrics for logistics planning # - KPI directory: reports/kpis/field_analysis/ (one RDS per week) # Harvest data: Required (harvest.xlsx - planting dates for phase assignment) # Data source: merged_tif_8b (uses cloud confidence for confidence) # Mosaic mode: tiled (scales to 500+ fields) # Projects: angata, chemba, xinavane, esa # # CLIENT_TYPE: agronomic_support # Purpose: Provide weekly crop health insights to agronomists # Scripts run: 80 (KPI), 90 (Word report) # Outputs: # - Farm-level KPI summaries (no per-field breakdown) # - Word reports (Script 90): Charts and trends for agronomist decision support # - KPI directory: reports/kpis/field_level/ (one RDS per week) # Harvest data: Not used # Data source: merged_tif (simpler, smaller) # Mosaic mode: single-file (100-200 fields) # Projects: aura # # Detect data source (merged_tif vs merged_tif_8b) based on availability # Returns the first available source; defaults to merged_tif_8b if neither exists detect_data_source <- function(project_dir) { # Data source is always merged_tif for consistency return("merged_tif") } # Check KPI completeness for a reporting period # Returns: List with kpis_df (data.frame), missing_count, and all_complete (boolean) # This replaces duplicate KPI checking logic in run_full_pipeline.R (lines ~228-270, ~786-810) check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks_needed) { kpi_dir <- get_kpi_dir(project_dir, client_type) kpis_needed <- data.frame() for (weeks_back in 0:(reporting_weeks_needed - 1)) { check_date <- end_date - (weeks_back * 7) wwy <- get_iso_week_year(check_date) # Build week pattern and check if it exists week_pattern <- sprintf("week%02d_%d", wwy$week, wwy$year) files_this_week <- list.files(kpi_dir, pattern = week_pattern) has_kpis <- length(files_this_week) > 0 # Track missing weeks kpis_needed <- rbind(kpis_needed, data.frame( week = wwy$week, year = wwy$year, date = check_date, has_kpis = has_kpis, pattern = week_pattern, file_count = length(files_this_week) )) # Debug logging smartcane_debug(sprintf( "Week %02d/%d (%s): %s (%d files)", wwy$week, wwy$year, format(check_date, "%Y-%m-%d"), if (has_kpis) "✓ FOUND" else "✗ MISSING", length(files_this_week) )) } # Summary statistics missing_count <- sum(!kpis_needed$has_kpis) all_complete <- missing_count == 0 return(list( kpis_df = kpis_needed, kpi_dir = kpi_dir, missing_count = missing_count, missing_weeks = kpis_needed[!kpis_needed$has_kpis, ], all_complete = all_complete )) } # 9. Initialize the project # ---------------------- # Export project directories and settings initialize_project <- function(project_dir, data_source = "merged_tif") { # Set up directory structure, passing data_source to select TIF folder dirs <- setup_project_directories(project_dir, data_source = data_source) # Set up logging logging <- setup_logging(dirs$log_dir) # Load field boundaries boundaries <- load_field_boundaries(dirs$data_dir) # Load harvesting data harvesting_data <- load_harvesting_data(dirs$data_dir) # Return all initialized components return(c( dirs, list( logging = logging, field_boundaries = boundaries$field_boundaries, field_boundaries_sf = boundaries$field_boundaries_sf, harvesting_data = harvesting_data ) )) } # When script is sourced, initialize with the global project_dir variable if it exists if (exists("project_dir")) { # Now we can safely log before initialization log_message(paste("Initializing project with directory:", project_dir)) # Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default data_src <- if (exists("data_source")) data_source else "merged_tif" log_message(paste("Using data source directory:", data_src)) project_config <- initialize_project(project_dir, data_source = data_src) # Expose all variables to the global environment list2env(project_config, envir = .GlobalEnv) # Log project initialization completion with tile mode info log_message(paste("Project initialized with directory:", project_dir)) if (exists("use_tile_mosaic")) { mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE" log_message(paste("Mosaic mode detected:", mosaic_mode)) if (exists("tile_detection_info") && !is.null(tile_detection_info)) { log_message(paste(" - Detection source:", tile_detection_info$detected_source)) log_message(paste(" - Grid size:", tile_detection_info$grid_size)) log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count)) if (length(tile_detection_info$sample_tiles) > 0) { log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", "))) } } } } else { warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R") }