# ============================================================================== # PARAMETERS_PROJECT_2.R (CLEANED VERSION) # ============================================================================== # PURPOSE: # Project configuration, directory structure setup, and helper functions # for centralized path management and project initialization. # # SECTION 1: Libraries & Dependencies # SECTION 2: Client Type Mapping & Configuration # SECTION 3: Directory Structure Setup # SECTION 4: Date/Week Utility Functions (centralized) # SECTION 5: Field Boundary & Harvest Data Loaders # SECTION 6: Project Initialization & Logging # SECTION 7: Mosaic & KPI Verification Helpers # # NOTE: Duplicate functions (safe_log, smartcane_debug, smartcane_warn, # load_field_boundaries, date_list, repair_geojson_geometries) # have been REMOVED - they belong in 00_common_utils.R. # Source 00_common_utils.R after parameters_project.R to get those. # ============================================================================== # ============================================================================== # SECTION 1: LIBRARIES & DEPENDENCIES # ============================================================================== suppressPackageStartupMessages({ library(here) library(readxl) library(sf) library(dplyr) library(tidyr) library(lubridate) library(jsonlite) # For reading tiling_config.json }) # ============================================================================== # SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING # ============================================================================== # Maturity threshold for yield prediction: crop age in Days After Harvest (DAH) # Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts # ~240 days ≈ 8 months, typical sugarcane maturity window DAH_MATURITY_THRESHOLD <- 240 # Maps project names to client types for pipeline control # This determines which scripts run and what outputs they produce CLIENT_TYPE_MAP <- list( "angata" = "cane_supply", "chemba" = "agronomic_support", "xinavane" = "agronomic_support", "esa" = "agronomic_support", "simba" = "agronomic_support", "john" = "agronomic_support", "huss" = "agronomic_support", "aura" = "agronomic_support" ) #' Get client type for a project #' @param project_name Character project name #' @return Character client type ("cane_supply" or "agronomic_support") get_client_type <- function(project_name) { client_type <- CLIENT_TYPE_MAP[[project_name]] if (is.null(client_type)) { warning(paste0("Project '", project_name, "' not found in CLIENT_TYPE_MAP. Defaulting to 'agronomic_support'.", sep="")) return("agronomic_support") # Default for all unlisted projects } return(client_type) } # Client-specific KPI configurations # Defines which KPIs and outputs are required for each client type CLIENT_TYPE_CONFIGS <- list( # Aura (agronomic_support): Farm-level KPI summaries for agronomists "agronomic_support" = list( client_type = "agronomic_support", description = "Farm-level KPI summaries for agronomic decision support", kpi_calculations = c( "field_uniformity", "area_change", "tch_forecasted", "growth_decline", "weed_presence", "gap_filling" ), outputs = c("kpi_summary_tables", "field_details"), requires_harvest_data = FALSE, script_90_compatible = TRUE, script_91_compatible = FALSE ), # Cane Supply (cane_supply): Per-field analysis with harvest prediction "cane_supply" = list( client_type = "cane_supply", description = "Per-field analysis with harvest prediction and phase assignment", kpi_calculations = c( "per_field_analysis", "phase_assignment", "harvest_prediction", "status_triggers" ), outputs = c("field_analysis_excel", "field_analysis_summary"), requires_harvest_data = TRUE, script_90_compatible = FALSE, script_91_compatible = TRUE ) ) #' Get KPI configuration for a specific client type #' @param client_type Character (e.g., "agronomic_support", "cane_supply") #' @return List containing configuration for that client type get_client_kpi_config <- function(client_type) { config <- CLIENT_TYPE_CONFIGS[[client_type]] if (is.null(config)) { warning(sprintf("Client type '%s' not found - using cane_supply defaults", client_type)) return(CLIENT_TYPE_CONFIGS[["cane_supply"]]) } return(config) } # ============================================================================== # SECTION 3: DIRECTORY STRUCTURE SETUP # ============================================================================== # CENTRALIZED PATH MANAGEMENT: All file paths in the entire pipeline # are derived from setup_project_directories(). # This is the single source of truth for 8 tiers of directories. #' Setup complete project directory structure #' #' Creates all 8 tiers of directories and returns a comprehensive list #' of paths for use throughout the pipeline. #' #' @param project_dir Character. Project name (e.g., "angata", "aura") #' @param data_source Character. "merged_tif" (default) or "merged_tif_8b" #' @return List containing all 8 tiers of paths #' #' @details #' TIER 1: Raw data (merged_tif) - Python download output #' TIER 2: Per-field TIFFs - Script 10 output #' TIER 3: CI extraction - Script 20 output #' TIER 4: Growth model - Script 30 output #' TIER 5: Mosaics - Script 40 output #' TIER 6: KPI & reports - Scripts 80/90/91 output #' TIER 7: Support files - GeoJSON, Excel, logs #' TIER 8: Metadata - Config, CRS info setup_project_directories <- function(project_dir, data_source = "merged_tif") { # BASE DIRECTORIES laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir) # TIER 1: RAW DATA (Script 00 output - Python download) merged_tif_folder <- here(laravel_storage_dir, "merged_tif") # TIER 2: PER-FIELD TIFFS (Script 10 output) field_tiles_dir <- here(laravel_storage_dir, "field_tiles") field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI") # SUPPORT TIER: DATA DIRECTORY (define early for use in later tiers) data_dir <- here(laravel_storage_dir, "Data") # TIER 3: CI EXTRACTION (Script 20 output) # Structure: Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds (per-field daily CI values) extracted_ci_base_dir <- here(data_dir, "extracted_ci") daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals") # Per-field structure cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals") ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python") # TIER 4: GROWTH MODEL (Script 30 output) growth_model_interpolated_dir <- here(data_dir, "growth_model_interpolated") # TIER 5: MOSAICS (Script 40 output) weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic") weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max") # TIER 6: KPI & REPORTING (Scripts 80/90/91 output) reports_dir <- here(laravel_storage_dir, "reports") kpi_reports_dir <- here(reports_dir, "kpis") # TIER 7: SUPPORT (various scripts) vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction harvest_dir <- here(data_dir, "harvest") # Harvest data directory log_dir <- here(laravel_storage_dir, "logs") # Create all directories all_dirs <- c( merged_tif_folder, field_tiles_dir, field_tiles_ci_dir, extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir, growth_model_interpolated_dir, weekly_mosaic_dir, weekly_tile_max_dir, reports_dir, kpi_reports_dir, data_dir, vrt_dir, harvest_dir, log_dir ) for (dir_path in all_dirs) { dir.create(dir_path, showWarnings = FALSE, recursive = TRUE) } # TIER 8: CONFIG & METADATA PATHS field_boundaries_path <- here(data_dir, "pivot.geojson") # Return comprehensive list return(list( # PROJECT ROOT laravel_storage_dir = laravel_storage_dir, # TIER 1: Raw data merged_tif_folder = merged_tif_folder, # TIER 2: Per-field TIFFs field_tiles_dir = field_tiles_dir, field_tiles_ci_dir = field_tiles_ci_dir, # TIER 3: CI extraction extracted_ci_base_dir = extracted_ci_base_dir, daily_ci_vals_dir = daily_ci_vals_dir, cumulative_ci_vals_dir = cumulative_ci_vals_dir, ci_for_python_dir = ci_for_python_dir, # TIER 4: Growth model growth_model_interpolated_dir = growth_model_interpolated_dir, # TIER 5: Mosaics weekly_mosaic_dir = weekly_mosaic_dir, weekly_tile_max_dir = weekly_tile_max_dir, # TIER 6: KPI & reporting reports_dir = reports_dir, kpi_reports_dir = kpi_reports_dir, # TIER 7: Support data_dir = data_dir, vrt_dir = vrt_dir, harvest_dir = harvest_dir, log_dir = log_dir, # TIER 8: Metadata field_boundaries_path = field_boundaries_path )) } # ============================================================================== # SECTION 4: DATE/WEEK UTILITY FUNCTIONS # ============================================================================== # ISO 8601 week/year functions for consistent date handling across scripts #' Extract ISO week number from a date #' @param date Date object or string convertible to Date #' @return Numeric ISO week number (1-53) get_iso_week <- function(date) { as.numeric(format(date, "%V")) } #' Extract ISO year from a date #' @param date Date object or string convertible to Date #' @return Numeric ISO year get_iso_year <- function(date) { as.numeric(format(date, "%G")) } #' Extract both ISO week and year as a list #' @param date Date object or string convertible to Date #' @return List with elements: week (1-53), year get_iso_week_year <- function(date) { list( week = as.numeric(format(date, "%V")), year = as.numeric(format(date, "%G")) ) } #' Format date as a week/year label #' @param date Date object or string convertible to Date #' @param separator Character. Separator between week and year (default "_") #' @return Character in format "week##_YYYY" (e.g., "week03_2025") format_week_label <- function(date, separator = "_") { wwy <- get_iso_week_year(date) sprintf("week%02d%s%d", wwy$week, separator, wwy$year) } # ============================================================================== # SECTION 5: FIELD BOUNDARY & HARVEST DATA LOADERS # ============================================================================== # IMPORTANT: These functions are also defined in 00_common_utils.R # to avoid duplication. Source 00_common_utils.R AFTER parameters_project.R # to override these stub definitions with the full implementations. #' Load Field Boundaries from GeoJSON #' #' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson). #' Handles CRS validation and column standardization. #' #' @param data_dir Directory containing GeoJSON file #' @return List with elements: #' - field_boundaries_sf: sf (Simple Features) object #' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback) #' load_field_boundaries <- function(data_dir) { field_boundaries_path <- file.path(data_dir, "pivot.geojson") if (!file.exists(field_boundaries_path)) { stop("Field boundaries file not found at:", field_boundaries_path) } tryCatch({ boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE) # Filter out features with empty geometries boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ] # Repair geometries if needed if (!all(sf::st_is_valid(boundaries_sf))) { boundaries_sf <- sf::st_make_valid(boundaries_sf) } # Convert to terra SpatVect boundaries_spat <- terra::vect(boundaries_sf) return(list( field_boundaries_sf = boundaries_sf, field_boundaries = boundaries_spat )) }, error = function(e) { stop("Error loading field boundaries:", e$message) }) } #' Create Date List #' #' Creates a sequence of dates from end_date going back offset days #' #' @param end_date End date (Date object) #' @param offset Number of days to go back #' @return Character vector of dates in YYYY-MM-DD format date_list <- function(end_date, offset) { start_date <- end_date - offset date_seq <- seq(start_date, end_date, by = "day") format(date_seq, "%Y-%m-%d") } #' Repair GeoJSON Geometries #' #' Validates and repairs invalid geometries in sf object #' #' @param sf_object sf object with potentially invalid geometries #' @return sf object with repaired geometries repair_geojson_geometries <- function(sf_object) { if (!all(sf::st_is_valid(sf_object))) { sf_object <- sf::st_make_valid(sf_object) } sf_object } #' Load harvesting data from Excel #' #' Loads crop harvest schedule from harvest.xlsx file. #' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.). #' #' @param data_dir Directory containing harvest.xlsx file #' @return Data frame with columns: field, sub_field, year, season_start, #' season_end, age (weeks), sub_area, tonnage_ha. Returns NULL if not found. load_harvesting_data <- function(data_dir) { harvest_file <- file.path(data_dir, "harvest.xlsx") if (!file.exists(harvest_file)) { warning(paste("Harvest data file not found at path:", harvest_file)) return(NULL) } # Helper function to parse dates with multiple format detection parse_flexible_date <- function(x) { if (is.na(x) || is.null(x)) return(NA_real_) if (inherits(x, "Date")) return(x) if (inherits(x, "POSIXct")) return(as.Date(x)) if (is.numeric(x)) { return(as.Date(x, origin = "1899-12-30")) } x_char <- as.character(x) formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S") for (fmt in formats) { result <- suppressWarnings(as.Date(x_char, format = fmt)) if (!is.na(result)) return(result) } return(NA) } tryCatch({ harvesting_data <- read_excel(harvest_file) %>% dplyr::select(c( "field", "sub_field", "year", "season_start", "season_end", "age", "sub_area", "tonnage_ha" )) %>% mutate( field = as.character(field), sub_field = as.character(sub_field), year = as.numeric(year), season_start = sapply(season_start, parse_flexible_date), season_end = sapply(season_end, parse_flexible_date), season_start = as.Date(season_start, origin = "1970-01-01"), season_end = as.Date(season_end, origin = "1970-01-01"), age = as.numeric(age), sub_area = as.character(sub_area), tonnage_ha = as.numeric(tonnage_ha) ) %>% mutate( season_end = case_when( season_end > Sys.Date() ~ Sys.Date(), is.na(season_end) ~ Sys.Date(), TRUE ~ season_end ), age = round(as.numeric(season_end - season_start) / 7, 0) ) return(harvesting_data) }, error = function(e) { warning(paste("Error loading harvesting data:", e$message)) return(NULL) }) } # ============================================================================== # SECTION 6: LOGGING SYSTEM SETUP # ============================================================================== # Create default logging functions #' Safe Logging Function #' #' Generic logging with [LEVEL] prefix. Works standalone without any framework. #' Consistent with SmartCane logging standard. #' #' @param message The message to log #' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG" #' @return NULL (invisible, used for side effects) #' safe_log <- function(message, level = "INFO") { prefix <- sprintf("[%s]", level) cat(sprintf("%s %s\n", prefix, message)) } #' SmartCane Debug Logging (Conditional) #' #' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set. #' #' @param message The message to log #' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE) #' @return NULL (invisible, used for side effects) #' smartcane_debug <- function(message, verbose = FALSE) { if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") { return(invisible(NULL)) } safe_log(message, level = "DEBUG") } #' SmartCane Warning Logging #' #' Logs WARN-level messages. Convenience wrapper around safe_log(). #' #' @param message The message to log #' @return NULL (invisible, used for side effects) #' smartcane_warn <- function(message) { safe_log(message, level = "WARN") } log_message <- function(message, level = "INFO") { prefix <- sprintf("[%s]", level) cat(sprintf("%s %s\n", prefix, message)) } log_head <- function(data, level = "INFO") { log_message(paste(capture.output(str(head(data))), collapse = "\n"), level) } #' Setup full logging system with file output #' #' Creates log directory and returns logging functions that write to both #' console and log file. #' #' @param log_dir Character. Directory for log files #' @return List with log_file path, log_message function, and log_head function setup_logging <- function(log_dir) { dir.create(log_dir, showWarnings = FALSE, recursive = TRUE) log_file <- file.path(log_dir, sprintf( "smartcane_%s.log", format(Sys.time(), "%Y%m%d_%H%M%S") )) log_message <- function(message, level = "INFO") { prefix <- sprintf("[%s] [%s]", level, format(Sys.time(), "%Y-%m-%d %H:%M:%S")) formatted_message <- sprintf("%s %s", prefix, message) cat(formatted_message, "\n") cat(formatted_message, "\n", file = log_file, append = TRUE) } log_head <- function(list, level = "INFO") { log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) } assign("log_message", log_message, envir = .GlobalEnv) assign("log_head", log_head, envir = .GlobalEnv) return(list( log_file = log_file, log_message = log_message, log_head = log_head )) } # ============================================================================== # SECTION 7: MOSAIC & KPI VERIFICATION HELPERS # ============================================================================== # Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths #' Detect mosaic mode from project structure #' #' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics #' #' @param project_dir Character. Project name #' @return Character. "tiled" or "single-file" detect_mosaic_mode <- function(project_dir) { # Per-field architecture is standard - always return "single-file" # unless weekly_tile_max directory exists with content mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) { return("tiled") } return("single-file") } #' Detect grid size from tile directory structure #' #' For per-field architecture, returns "unknown" (grid-based tiling is legacy) #' #' @param project_dir Character. Project name #' @return Character. Grid size ("unknown" for per-field) detect_grid_size <- function(project_dir) { # Per-field architecture doesn't use grid-based organization return("unknown") } #' Get project storage path #' #' @param project_dir Character. Project name #' @param subdir Character. Optional subdirectory (default NULL) #' @return Character. Full path get_project_storage_path <- function(project_dir, subdir = NULL) { path <- file.path("laravel_app", "storage", "app", project_dir) if (!is.null(subdir)) { path <- file.path(path, subdir) } return(path) } #' Get mosaic directory #' #' @param project_dir Character. Project name #' @param mosaic_mode Character. "tiled" or "single-file" #' @return Character. Full path to mosaic directory get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { if (mosaic_mode == "auto") { mosaic_mode <- detect_mosaic_mode(project_dir) } if (mosaic_mode == "tiled") { get_project_storage_path(project_dir, "weekly_tile_max") } else { get_project_storage_path(project_dir, "weekly_mosaic") } } #' Get KPI directory based on client type #' #' @param project_dir Character. Project name #' @param client_type Character. Client type #' @return Character. Full path to KPI directory get_kpi_dir <- function(project_dir, client_type) { base_path <- get_project_storage_path(project_dir, "reports/kpis") if (client_type == "agronomic_support") { return(file.path(base_path, "field_level")) } else { return(file.path(base_path, "field_analysis")) } } #' Get expected output path for harvest imminent file #' #' @param project_dir Character. Project name #' @param week_num Integer. ISO week number #' @param year_num Integer. Year #' @return Character. Full path to expected harvest imminent CSV file get_harvest_output_path <- function(project_dir, week_num, year_num) { file.path( "laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats", sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, week_num, year_num) ) } #' Check if harvest output file exists for a specific week #' #' @param project_dir Character. Project name #' @param week_num Integer. ISO week number #' @param year_num Integer. Year #' @return Logical. TRUE if file exists check_harvest_output_exists <- function(project_dir, week_num, year_num) { path <- get_harvest_output_path(project_dir, week_num, year_num) file.exists(path) } #' Get mosaic verification directory #' #' @param project_dir Character. Project name #' @param mosaic_mode Character. "tiled" or "single-file" #' @return Character. Full path to mosaic directory get_mosaic_verification_dir <- function(project_dir, mosaic_mode) { base <- file.path("laravel_app", "storage", "app", project_dir) if (mosaic_mode == "tiled") { file.path(base, "weekly_tile_max") } else { file.path(base, "weekly_mosaic") } } #' Check if mosaic files exist for a specific week #' #' @param project_dir Character. Project name #' @param week_num Integer. ISO week number #' @param year_num Integer. Year #' @param mosaic_mode Character. "tiled" or "single-file" #' @return List with created (logical), file_count (int), sample_files (char vector) check_mosaic_exists <- function(project_dir, week_num, year_num, mosaic_mode) { mosaic_dir <- get_mosaic_verification_dir(project_dir, mosaic_mode) if (!dir.exists(mosaic_dir)) { return(list(created = FALSE, file_count = 0, sample_files = character())) } week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num) mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE) list( created = length(mosaic_files) > 0, file_count = length(mosaic_files), sample_files = head(mosaic_files, 3) ) } #' Check KPI completeness for reporting window #' #' @param project_dir Character. Project name #' @param client_type Character. Client type #' @param end_date Date. End date of reporting window #' @param reporting_weeks Integer. Number of weeks to check #' @return List with kpi_dir, kpis_df, missing_count check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks = 1) { kpi_dir <- get_kpi_dir(project_dir, client_type) if (!dir.exists(kpi_dir)) { dir.create(kpi_dir, showWarnings = FALSE, recursive = TRUE) } kpis_df <- data.frame() missing_count <- 0 for (weeks_back in 0:(reporting_weeks - 1)) { target_week <- end_date - lubridate::weeks(weeks_back) wwy <- get_iso_week_year(target_week) # Check for KPI file for this week kpi_pattern <- sprintf("week_%02d_%d", wwy$week, wwy$year) kpi_files <- list.files(kpi_dir, pattern = kpi_pattern) has_kpis <- length(kpi_files) > 0 file_count <- length(kpi_files) if (!has_kpis) missing_count <- missing_count + 1 kpis_df <- rbind(kpis_df, data.frame( week = wwy$week, year = wwy$year, date = target_week, has_kpis = has_kpis, file_count = file_count )) } return(list( kpi_dir = kpi_dir, kpis_df = kpis_df, missing_count = missing_count )) } # ============================================================================== # SECTION 8: PROJECT INITIALIZATION # ============================================================================== #' Initialize the project #' #' Sets up directory structure, logging, and loads configuration #' #' @param project_dir Character. Project name #' @param data_source Character. "merged_tif" or "merged_tif_8b" #' @return List with all project directories and settings initialize_project <- function(project_dir, data_source = "merged_tif") { dirs <- setup_project_directories(project_dir, data_source = data_source) logging <- setup_logging(dirs$log_dir) return(list( dirs = dirs, logging = logging, project_dir = project_dir, client_type = get_client_type(project_dir) )) } # ============================================================================== # AUTO-INITIALIZATION: Set project_dir global if not already set # ============================================================================== if (exists("project_dir")) { assign("project_dir", project_dir, envir = .GlobalEnv) } else { warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R") } # ============================================================================== # END PARAMETERS_PROJECT_2.R # ==============================================================================