# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R
#
# PARAMETERS_PROJECT.R
# ====================
# This script defines project parameters, directory structures, and loads field boundaries.
# It establishes all the necessary paths and creates required directories for the SmartCane project.

# 1. Load required libraries
# -------------------------
suppressPackageStartupMessages({
  library(here)
  library(readxl)
  library(sf)
  library(dplyr)
  library(tidyr)
  library(jsonlite)  # For reading tiling_config.json
})

# 2. Client type mapping (for conditional script execution)
# ---------------------------------------------------------
# Maps project names to client types for pipeline control
# Client types:
#   - "cane_supply":          Runs Scripts 20,21,30,31,80,91 (full pipeline with Excel output)
#   - "agronomic_support":    Runs Scripts 80,90 only (KPI calculation + Word report)
#   - "extension_service":    (Future - not yet implemented)
#
# NOTE: This will eventually migrate to Laravel environment variables/database
#       For now, maintain this mapping and update as projects are added
CLIENT_TYPE_MAP <- list(
  "angata" = "cane_supply",
  "aura" = "agronomic_support",
  "chemba" = "cane_supply",
  "xinavane" = "cane_supply",
  "esa" = "cane_supply"
)

get_client_type <- function(project_name) {
  client_type <- CLIENT_TYPE_MAP[[project_name]]
  if (is.null(client_type)) {
    warning(sprintf("Project '%s' not in CLIENT_TYPE_MAP - defaulting to 'cane_supply'", project_name))
    return("cane_supply")
  }
  return(client_type)
}

# 2b. Client-specific KPI configurations
# ----------------------------------------
# Defines which KPIs and outputs are required for each client type
# This enables Script 80 to conditionally calculate only relevant metrics
#
# Structure:
#   - kpi_calculations: Vector of KPI types to calculate for this client
#   - outputs: Vector of output formats to generate (determines RDS/Excel naming)
#   - requires_harvest_data: Boolean - whether Script 31 harvest predictions are needed
#   - script_90_compatible: Boolean - whether output should match Script 90 expectations
#   - script_91_compatible: Boolean - whether output should match Script 91 expectations
#
CLIENT_TYPE_CONFIGS <- list(
  
  # Aura (agronomic_support): Farm-level KPI summaries for weekly reports to agronomists
  "agronomic_support" = list(
    client_type = "agronomic_support",
    description = "Farm-level KPI summaries for agronomic decision support",
    kpi_calculations = c(
      "field_uniformity",
      "area_change",
      "tch_forecasted",
      "growth_decline",
      "weed_presence",
      "gap_filling"
    ),
    outputs = c(
      "kpi_summary_tables",   # Summary statistics for Script 90 report front page
      "field_details"         # Detailed field table for Script 90 report end section
    ),
    requires_harvest_data = FALSE,  # Script 31 predictions not used
    script_90_compatible = TRUE,    # Output format matches Script 90 expectations
    script_91_compatible = FALSE
  ),
  
  # Cane Supply (cane_supply): Per-field analysis with harvest timing prediction
  "cane_supply" = list(
    client_type = "cane_supply",
    description = "Per-field analysis with harvest prediction and phase assignment",
    kpi_calculations = c(
      "per_field_analysis",   # Use 80_weekly_stats_utils.R for field-level statistics
      "phase_assignment",     # Assign growth phases (Germination, Tillering, Grand Growth, Maturation)
      "harvest_prediction",   # Include Script 31 harvest age predictions if available
      "status_triggers"       # Calculate field status (Normal, Monitor, Alert, Urgent)
    ),
    outputs = c(
      "field_analysis_excel",      # Excel file with per-field metrics
      "field_analysis_summary"     # Summary RDS for Script 91 report
    ),
    requires_harvest_data = TRUE,   # harvest.xlsx is required for phase assignment
    script_90_compatible = FALSE,
    script_91_compatible = TRUE
  )
)

#' Get KPI configuration for a specific client type
#' @param client_type Character string of client type (e.g., "agronomic_support", "cane_supply")
#' @return List containing configuration for that client type
get_client_kpi_config <- function(client_type) {
  config <- CLIENT_TYPE_CONFIGS[[client_type]]
  
  if (is.null(config)) {
    warning(sprintf("Client type '%s' not in CLIENT_TYPE_CONFIGS - defaulting to 'cane_supply'", client_type))
    return(CLIENT_TYPE_CONFIGS[["cane_supply"]])
  }
  
  return(config)
}

# 3. Smart detection for tile-based vs single-file mosaic approach
# ----------------------------------------------------------------
detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
  # PRIORITY 1: Check for tiling_config.json metadata file from script 10
  # This is the most reliable source since script 10 explicitly records its decision
  
  if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
    # Try to find tiling_config.json in any grid-size subfolder
    config_files <- list.files(daily_tiles_split_dir, 
                               pattern = "tiling_config\\.json$", 
                               recursive = TRUE,
                               full.names = TRUE)
    
    if (length(config_files) > 0) {
      # Found a config file - use the most recent one
      config_file <- config_files[which.max(file.info(config_files)$mtime)]
      
      tryCatch({
        config_json <- jsonlite::read_json(config_file)
        return(list(
          has_tiles = config_json$has_tiles %||% TRUE,
          detected_tiles = character(),
          total_files = 0,
          source = "tiling_config.json",
          grid_size = config_json$grid_size %||% "unknown"
        ))
      }, error = function(e) {
        warning("Error reading tiling_config.json: ", e$message)
        # Fall through to file-based detection
      })
    }
  }
  
  # PRIORITY 2: File-based detection (fallback if metadata not found)
  # Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories
  
  if (!dir.exists(merged_final_tif_dir)) {
    return(list(
      has_tiles = FALSE, 
      detected_tiles = character(),
      total_files = 0,
      source = "directory_not_found"
    ))
  }
  
  # First check if there are grid-size subdirectories (5x5, 10x10, etc.)
  # This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif
  grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE)
  grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
  
  if (length(grid_patterns) > 0) {
    # Found grid-size subdirectories - tiles exist!
    grid_size <- grid_patterns[1]
    grid_dir <- file.path(merged_final_tif_dir, grid_size)
    
    # List sample tile files from the grid directory
    sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3]
    
    return(list(
      has_tiles = TRUE,
      detected_tiles = sample_tiles,
      total_files = length(sample_tiles),
      source = "grid_subdirectory_detection",
      grid_size = grid_size,
      grid_path = grid_dir
    ))
  }
  
  # Fall back to checking for tile-named files directly in merged_final_tif
  # List all .tif files in merged_final_tif
  tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
  
  if (length(tif_files) == 0) {
    return(list(
      has_tiles = FALSE,
      detected_tiles = character(), 
      total_files = 0,
      source = "no_files_found"
    ))
  }
  
  # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
  # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
  tile_pattern <- "_(\\d{2})\\.tif$"
  tile_files <- tif_files[grepl(tile_pattern, tif_files)]
  
  has_tiles <- length(tile_files) > 0
  
  return(list(
    has_tiles = has_tiles,
    detected_tiles = tile_files,
    total_files = length(tif_files),
    source = "file_pattern_detection"
  ))
}

# 4. Define project directory structure
# -----------------------------------
# ==============================================================================
# CENTRALIZED PATH MANAGEMENT - setup_project_directories()
# ==============================================================================
# This function is the single source of truth for ALL file paths used across the pipeline.
# All scripts should call this function once at startup and use returned paths.
# This eliminates ~88 hardcoded file.path() calls scattered across 8 scripts.
#
# USAGE:
#   paths <- setup_project_directories(project_dir)
#   merged_tif_dir <- paths$merged_tif_folder
#   daily_ci_dir <- paths$daily_ci_vals_dir
#   kpi_output_dir <- paths$kpi_reports_dir
#
# TIERS (8-layer directory structure):
#   Tier 1: Raw data (merged_tif)
#   Tier 2: Per-field TIFFs (field_tiles, field_tiles_CI)
#   Tier 3: CI Extraction (daily_ci_vals, cumulative_ci_vals)
#   Tier 4: Growth Model (growth_model_interpolated)
#   Tier 5: Mosaics (weekly_mosaic, weekly_tile_max)
#   Tier 6: KPI & Reporting (kpi_reports_dir, kpi_field_stats_dir)
#   Tier 7: Support (data, vrt, harvest, logs)
#   Tier 8: Config & Metadata (field_boundaries_path, tiling_config_path)
#
# BENEFITS:
#   ✓ Single source of truth (eliminates ~88 hardcoded file.path() calls)
#   ✓ Auto-creates all directories (no scattered dir.create() calls)
#   ✓ Easy to update storage structure globally
#   ✓ Consistent naming across all 8 scripts
# ==============================================================================
setup_project_directories <- function(project_dir, data_source = "merged_tif") {
  # ===========================================================================
  # BASE DIRECTORIES (Foundation for all paths)
  # ===========================================================================
  laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir)
  
  # ===========================================================================
  # TIER 1: RAW DATA & INPUT PATHS (Script 00 - Python download output)
  # ===========================================================================
  merged_tif_folder <- here(laravel_storage_dir, "merged_tif")  # 4-band raw GeoTIFFs from Planet
  
  # ===========================================================================
  # TIER 2: TILING PATHS (Script 10 - Per-field tiff creation)
  # ===========================================================================
  # Per-field TIFF structure: field_tiles/{FIELD_NAME}/{YYYY-MM-DD}.tif
  field_tiles_dir <- here(laravel_storage_dir, "field_tiles")
  
  # Per-field CI TIFFs (pre-computed, used by Script 40): field_tiles_CI/{FIELD_NAME}/{YYYY-MM-DD}.tif
  field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI")
  
  # Legacy tiling (for backward compatibility): daily_tiles_split/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif
  daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")
  
  # ===========================================================================
  # TIER 3: CI EXTRACTION PATHS (Script 20 - Canopy Index calculation)
  # ===========================================================================
  extracted_ci_base_dir <- here(laravel_storage_dir, "Data", "extracted_ci")
  
  # Daily CI values (cumulative RDS): combined_CI_data.rds
  daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals")
  
  # Cumulative CI across time: All_pivots_Cumulative_CI_quadrant_year_v2.rds
  cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals")
  
  # Per-field CI data for Python harvest prediction (Script 21): ci_data_for_python.csv
  ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python")
  
  # ===========================================================================
  # TIER 4: GROWTH MODEL PATHS (Script 30 - Interpolation & smoothing)
  # ===========================================================================
  growth_model_interpolated_dir <- here(laravel_storage_dir, "growth_model_interpolated")
  
  # ===========================================================================
  # TIER 5: MOSAIC PATHS (Script 40 - Weekly mosaics)
  # ===========================================================================
  # Per-field weekly mosaics (per-field architecture): weekly_mosaic/{FIELD}/{week_XX_YYYY}.tif
  weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
  
  # Tile-based weekly max (legacy): weekly_tile_max/{grid_size}/week_XX_YYYY.tif
  weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max")
  
  # ===========================================================================
  # TIER 6: KPI & REPORTING PATHS (Scripts 80, 90, 91)
  # ===========================================================================
  reports_dir <- here(laravel_storage_dir, "reports")
  kpi_reports_dir <- here(reports_dir, "kpis")  # Where Script 80 outputs KPI CSV/RDS files
  kpi_field_stats_dir <- here(kpi_reports_dir, "field_stats")  # Per-field KPI details
  kpi_field_analysis_dir <- here(kpi_reports_dir, "field_analysis")  # Field-level analysis for Script 91
  
  # ===========================================================================
  # TIER 7: SUPPORT PATHS (Data, VRT, Harvest)
  # ===========================================================================
  data_dir <- here(laravel_storage_dir, "Data")
  vrt_dir <- here(data_dir, "vrt")  # Virtual Raster files created during CI extraction
  harvest_dir <- here(data_dir, "HarvestData")  # Harvest schedule data
  log_dir <- here(laravel_storage_dir, "logs")  # Log files
  
  # ===========================================================================
  # TIER 8: CONFIG & METADATA PATHS
  # ===========================================================================
  # Field boundaries GeoJSON (same across all scripts)
  field_boundaries_path <- here(data_dir, "pivot.geojson")
  
  # Tiling configuration metadata from Script 10
  tiling_config_path <- here(daily_tiles_split_dir, "tiling_config.json")
  
  # ===========================================================================
  # CREATE ALL DIRECTORIES (once per pipeline run)
  # ===========================================================================
  all_dirs <- c(
    # Tier 1
    merged_tif_folder,
    # Tier 2
    field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir,
    # Tier 3
    extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
    # Tier 4
    growth_model_interpolated_dir,
    # Tier 5
    weekly_mosaic_dir, weekly_tile_max_dir,
    # Tier 6
    reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
    # Tier 7
    data_dir, vrt_dir, harvest_dir, log_dir
  )
  
  for (dir_path in all_dirs) {
    dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
  }
  
  # ===========================================================================
  # RETURN COMPREHENSIVE PATH LIST
  # Scripts should source parameters_project.R and receive paths object like:
  # paths <- setup_project_directories(project_dir)
  # Then use: paths$merged_tif_folder, paths$daily_ci_vals_dir, etc.
  # ===========================================================================
  return(list(
    # PROJECT ROOT
    laravel_storage_dir = laravel_storage_dir,
    
    # TIER 1: Raw data
    merged_tif_folder = merged_tif_folder,
    
    # TIER 2: Per-field TIFFs
    field_tiles_dir = field_tiles_dir,
    field_tiles_ci_dir = field_tiles_ci_dir,
    daily_tiles_split_dir = daily_tiles_split_dir,
    
    # TIER 3: CI Extraction
    extracted_ci_base_dir = extracted_ci_base_dir,
    daily_ci_vals_dir = daily_ci_vals_dir,
    cumulative_ci_vals_dir = cumulative_ci_vals_dir,
    ci_for_python_dir = ci_for_python_dir,
    
    # TIER 4: Growth Model
    growth_model_interpolated_dir = growth_model_interpolated_dir,
    
    # TIER 5: Mosaics
    weekly_mosaic_dir = weekly_mosaic_dir,
    weekly_tile_max_dir = weekly_tile_max_dir,
    
    # TIER 6: KPI & Reporting
    reports_dir = reports_dir,
    kpi_reports_dir = kpi_reports_dir,
    kpi_field_stats_dir = kpi_field_stats_dir,
    kpi_field_analysis_dir = kpi_field_analysis_dir,
    
    # TIER 7: Support
    data_dir = data_dir,
    vrt_dir = vrt_dir,
    harvest_dir = harvest_dir,
    log_dir = log_dir,
    
    # TIER 8: Config & Metadata
    field_boundaries_path = field_boundaries_path,
    tiling_config_path = tiling_config_path
  ))
}

# ==============================================================================
# TIER-BY-TIER PATH REFERENCE (for setup_project_directories output)
# ==============================================================================
#
# TIER 1: RAW DATA (Script 00 - Python download)
#   paths$merged_tif_folder
#     └─ {YYYY-MM-DD}.tif (4-band uint16 GeoTIFFs from Planet API)
#
# TIER 2: PER-FIELD TIFFS (Script 10)
#   paths$field_tiles_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif
#   paths$field_tiles_ci_dir/{FIELD_NAME}/{YYYY-MM-DD}.tif
#   paths$daily_tiles_split_dir/{grid_size}/{YYYY-MM-DD}/{YYYY-MM-DD}_XX.tif (legacy)
#
# TIER 3: CI EXTRACTION (Script 20)
#   paths$daily_ci_vals_dir/combined_CI_data.rds
#   paths$cumulative_ci_vals_dir/All_pivots_Cumulative_CI_quadrant_year_v2.rds
#   paths$ci_for_python_dir/ci_data_for_python.csv (Script 21 output)
#
# TIER 4: GROWTH MODEL (Script 30)
#   paths$growth_model_interpolated_dir/ (RDS files with interpolated CI)
#
# TIER 5: MOSAICS (Script 40)
#   paths$weekly_mosaic_dir/{FIELD_NAME}/week_XX_YYYY.tif
#   paths$weekly_tile_max_dir/{grid_size}/week_XX_YYYY_00.tif (legacy)
#
# TIER 6: KPI & REPORTING (Scripts 80, 90, 91)
#   paths$kpi_reports_dir/               (KPI outputs from Script 80)
#   paths$kpi_field_stats_dir/           (Per-field KPI RDS)
#   paths$kpi_field_analysis_dir/        (Analysis RDS for Script 91)
#   paths$reports_dir/                   (Word/HTML reports)
#
# TIER 7: SUPPORT (Various scripts)
#   paths$data_dir/pivot.geojson         (Field boundaries)
#   paths$data_dir/harvest.xlsx          (Harvest schedule)
#   paths$vrt_dir/                       (Virtual raster files)
#   paths$harvest_dir/                   (Harvest predictions from Python)
#   paths$log_dir/                       (Pipeline logs)
#
# TIER 8: CONFIG & METADATA
#   paths$field_boundaries_path          (Full path to pivot.geojson)
#   paths$tiling_config_path             (Metadata from Script 10)
#
# ==============================================================================

#set working dir.
# 5. Load field boundaries
# ----------------------
load_field_boundaries <- function(data_dir) {
  # Choose field boundaries file based on project and script type
  # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
  # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
  use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
                 exists("ci_extraction_script")  # ci_extraction_script flag set by scripts 02-03

  if (use_pivot_2) {
    field_boundaries_path <- here(data_dir, "pivot_2.geojson")
  } else {
    field_boundaries_path <- here(data_dir, "pivot.geojson")
  }

  if (!file.exists(field_boundaries_path)) {
    stop(paste("Field boundaries file not found at path:", field_boundaries_path))
  }
  
  tryCatch({
    # Read GeoJSON with explicit CRS handling
    field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
    
    # Remove OBJECTID column immediately if it exists
    if ("OBJECTID" %in% names(field_boundaries_sf)) {
      field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
    }
    
    # Validate and fix CRS if needed - DO NOT call is.na on CRS objects as it can cause errors
    # Just ensure CRS is set; terra will handle projection if needed
    tryCatch({
      # Simply assign WGS84 if not already set (safe approach)
      # This avoids any problematic is.na() calls on complex CRS objects
      if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
        st_crs(field_boundaries_sf) <- 4326
        warning("CRS was missing, assigned WGS84 (EPSG:4326)")
      }
    }, error = function(e) {
      # If any CRS operation fails, just try to set it
      tryCatch({
        st_crs(field_boundaries_sf) <<- 4326
      }, error = function(e2) {
        # Silently continue - terra might handle it
        warning(paste("Could not set CRS:", e2$message))
      })
    })
    
    # Handle column names - accommodate optional sub_area column
    # IMPORTANT: Must preserve geometry column properly when renaming sf object
    if ("sub_area" %in% names(field_boundaries_sf)) {
      # Reorder columns but keep geometry last
      field_boundaries_sf <- field_boundaries_sf %>% 
        dplyr::select(field, sub_field, sub_area) %>%
        sf::st_set_geometry("geometry")
    } else {
      # Reorder columns but keep geometry last
      field_boundaries_sf <- field_boundaries_sf %>% 
        dplyr::select(field, sub_field) %>%
        sf::st_set_geometry("geometry")
    }
    
    # Convert to terra vector if possible, otherwise use sf
    # Some GeoJSON files (like aura with complex MultiPolygons) may have GDAL/terra compatibility issues
    field_boundaries <- tryCatch({
      field_boundaries_terra <- terra::vect(field_boundaries_sf)
      
      # Ensure terra object has valid CRS with safer checks
      crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
      crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
      
      if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
        terra::crs(field_boundaries_terra) <- "EPSG:4326"
        warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
      }
      field_boundaries_terra
      
    }, error = function(e) {
      warning(paste("Terra conversion failed, using sf object instead:", e$message))
      # Return sf object as fallback - functions will handle both types
      field_boundaries_sf
    })
    
    return(list(
      field_boundaries_sf = field_boundaries_sf,
      field_boundaries = field_boundaries
    ))
  }, error = function(e) {
    cat("[DEBUG] Error in load_field_boundaries:\n")
    cat("  Message:", e$message, "\n")
    cat("  Call:", deparse(e$call), "\n")
    stop(paste("Error loading field boundaries:", e$message))
  })
}

# 6. Load harvesting data
# ---------------------
load_harvesting_data <- function(data_dir) {
  harvest_file <- here(data_dir, "harvest.xlsx")
  
  if (!file.exists(harvest_file)) {
    warning(paste("Harvest data file not found at path:", harvest_file))
    return(NULL)
  }
  
  # Helper function to parse dates with multiple format detection
  parse_flexible_date <- function(x) {
    if (is.na(x) || is.null(x)) return(NA_real_)
    if (inherits(x, "Date")) return(x)
    if (inherits(x, "POSIXct")) return(as.Date(x))
    
    # If it's numeric (Excel date serial), convert directly
    if (is.numeric(x)) {
      return(as.Date(x, origin = "1899-12-30"))
    }
    
    # Try character conversion with multiple formats
    x_char <- as.character(x)
    
    # Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS
    formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
    
    for (fmt in formats) {
      result <- suppressWarnings(as.Date(x_char, format = fmt))
      if (!is.na(result)) return(result)
    }
    
    # If all else fails, return NA
    return(NA)
  }
  
  tryCatch({
    harvesting_data <- read_excel(harvest_file) %>%
      dplyr::select(
        c(
          "field",
          "sub_field",
          "year",
          "season_start",
          "season_end",
          "age",
          "sub_area",
          "tonnage_ha"
        )
      ) %>%
      mutate(
        field = as.character(field),
        sub_field = as.character(sub_field),
        year = as.numeric(year),
        season_start = sapply(season_start, parse_flexible_date),
        season_end = sapply(season_end, parse_flexible_date),
        season_start = as.Date(season_start, origin = "1970-01-01"),
        season_end = as.Date(season_end, origin = "1970-01-01"),
        age = as.numeric(age),
        sub_area = as.character(sub_area),
        tonnage_ha = as.numeric(tonnage_ha)
      ) %>%
      mutate(
        season_end = case_when(
          season_end > Sys.Date() ~ Sys.Date(),
          is.na(season_end) ~ Sys.Date(),
          TRUE ~ season_end
        ),
        age = round(as.numeric(season_end - season_start) / 7, 0)
      )
    
    return(harvesting_data)
  }, error = function(e) {
    warning(paste("Error loading harvesting data:", e$message))
    return(NULL)
  })
}

# 5. Define logging functions globally first
# ---------------------------------------
# Create a simple default log function in case setup_logging hasn't been called yet
log_message <- function(message, level = "INFO") {
  timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
  formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
  cat(formatted_message, "\n")
}

log_head <- function(list, level = "INFO") {
  log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
}

# 8. Set up full logging system with file output
# -------------------------------------------
setup_logging <- function(log_dir) {
  log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log"))
  
  # Create enhanced log functions
  log_message <- function(message, level = "INFO") {
    timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
    formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
    cat(formatted_message, "\n", file = log_file, append = TRUE)
    
    # Also print to console for debugging
    if (level %in% c("ERROR", "WARNING")) {
      cat(formatted_message, "\n")
    }
  }
  
  log_head <- function(list, level = "INFO") {
    log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
  }
  
  # Update the global functions with the enhanced versions
  assign("log_message", log_message, envir = .GlobalEnv)
  assign("log_head", log_head, envir = .GlobalEnv)
  
  return(list(
    log_file = log_file,
    log_message = log_message,
    log_head = log_head
  ))
}

# 8. HELPER FUNCTIONS FOR COMMON CALCULATIONS
# -----------------------------------------------
# Centralized functions to reduce duplication across scripts

# Get ISO week and year from a date
get_iso_week <- function(date) {
  as.numeric(format(date, "%V"))
}

get_iso_year <- function(date) {
  as.numeric(format(date, "%G"))
}

# Get both ISO week and year as a list
get_iso_week_year <- function(date) {
  list(
    week = as.numeric(format(date, "%V")),
    year = as.numeric(format(date, "%G"))
  )
}

# Format week/year into a readable label
format_week_label <- function(date, separator = "_") {
  wwy <- get_iso_week_year(date)
  sprintf("week%02d%s%d", wwy$week, separator, wwy$year)
}

# Auto-detect mosaic mode
# For per-field architecture, always returns "single-file" (weekly_mosaic/{FIELD}/week_*.tif)
detect_mosaic_mode <- function(project_dir) {
  # Per-field architecture uses single-file mosaics organized per-field
  weekly_mosaic <- file.path("laravel_app", "storage", "app", project_dir, "weekly_mosaic")
  if (dir.exists(weekly_mosaic)) {
    return("single-file")  # Per-field structure
  }
  return("unknown")
}

# Auto-detect grid size from tile directory structure
# For per-field architecture, returns "unknown" since grid-based organization is legacy
detect_grid_size <- function(project_dir) {
  # Per-field architecture doesn't use grid-based organization anymore
  return("unknown")
}

# Build storage paths consistently across all scripts
get_project_storage_path <- function(project_dir, subdir = NULL) {
  base <- file.path("laravel_app", "storage", "app", project_dir)
  if (!is.null(subdir)) file.path(base, subdir) else base
}

get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
  # Per-field architecture always uses weekly_mosaic (single-file, per-field organization)
  get_project_storage_path(project_dir, "weekly_mosaic")
}

get_kpi_dir <- function(project_dir, client_type) {
  subdir <- if (client_type == "agronomic_support") "field_level" else "field_analysis"
  get_project_storage_path(project_dir, file.path("reports", "kpis", subdir))
}

# Logging functions moved to 00_common_utils.R
# - smartcane_log()   — Main logging function with level prefix
# - smartcane_debug() — Conditional debug logging
# - smartcane_warn()  — Warning wrapper
# Import with: source("r_app/00_common_utils.R")

# ============================================================================
# PHASE 3 & 4: OPTIMIZATION & DOCUMENTATION
# ============================================================================

# System Constants
# ----------------
# Define once, use everywhere

RSCRIPT_PATH <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe"
# Used in run_full_pipeline.R for calling R scripts via system()

# Data Source Documentation
# ---------------------------
# Explains the two satellite data formats and when to use each
#
# SmartCane uses PlanetScope imagery from Planet Labs API in two formats:
#
# 1. merged_tif (4-band):
#    - Standard format: Red, Green, Blue, Near-Infrared
#    - Size: ~150-200 MB per date
#    - Use case: Agronomic support, general crop health monitoring
#    - Projects: aura, xinavane
#    - Cloud handling: Basic cloud masking from Planet metadata
#
# 2. merged_tif_8b (8-band with cloud confidence):
#    - Enhanced format: 4-band imagery + 4-band UDM2 cloud mask
#    - UDM2 bands: Clear, Snow, Shadow, Light Haze
#    - Size: ~250-350 MB per date
#    - Use case: Harvest prediction, supply chain optimization
#    - Projects: angata, chemba, esa (cane_supply clients)
#    - Cloud handling: Per-pixel cloud confidence from Planet UDM2
#    - Why: Cane supply chains need precise confidence to predict harvest dates
#           (don't want to predict based on cloudy data)
#
# The system auto-detects which is available via detect_data_source()

# Mosaic Mode Documentation
# --------------------------
# SmartCane supports two ways to store and process weekly mosaics:
#
# 1. Single-file mosaic ("single-file"):
#    - One GeoTIFF per week: weekly_mosaic/week_02_2026.tif
#    - 5 bands per file: R, G, B, NIR, CI (Canopy Index)
#    - Size: ~300-500 MB per week
#    - Pros: Simpler file management, easier full-field visualization
#    - Cons: Slower for field-specific queries, requires loading full raster
#    - Best for: Agronomic support (aura) with <100 fields
#    - Script 04 output: 5-band single-file mosaic
#
# 2. Tiled mosaic ("tiled"):
#    - Grid of tiles per week: weekly_tile_max/5x5/week_02_2026_{TT}.tif
#    - Example: 25 files (5×5 grid) × 5 bands = 125 individual tiffs
#    - Size: ~15-20 MB per tile, organized in folders
#    - Pros: Parallel processing, fast field lookups, scales to 1000+ fields
#    - Cons: More file I/O, requires tile-to-field mapping metadata
#    - Best for: Cane supply (angata, chemba) with 500+ fields
#    - Script 04 output: Per-tile tiff files in weekly_tile_max/{grid}/
#    - Tile assignment: Field boundaries mapped to grid coordinates
#
# The system auto-detects which is available via detect_mosaic_mode()

# Client Type Documentation
# --------------------------
# SmartCane runs different analysis pipelines based on client_type:
#
# CLIENT_TYPE: cane_supply
#   Purpose: Optimize sugar mill supply chain (harvest scheduling)
#   Scripts run: 20 (CI), 21 (RDS to CSV), 30 (Growth), 31 (Harvest pred), 40 (Mosaic), 80 (KPI), 91 (Excel)
#   Outputs:
#     - Per-field analysis: field status, growth phase, harvest readiness
#     - Excel reports (Script 91): Detailed metrics for logistics planning
#     - KPI directory: reports/kpis/field_analysis/ (one RDS per week)
#   Harvest data: Required (harvest.xlsx - planting dates for phase assignment)
#   Data source: merged_tif_8b (uses cloud confidence for confidence)
#   Mosaic mode: tiled (scales to 500+ fields)
#   Projects: angata, chemba, xinavane, esa
#
# CLIENT_TYPE: agronomic_support
#   Purpose: Provide weekly crop health insights to agronomists
#   Scripts run: 80 (KPI), 90 (Word report)
#   Outputs:
#     - Farm-level KPI summaries (no per-field breakdown)
#     - Word reports (Script 90): Charts and trends for agronomist decision support
#     - KPI directory: reports/kpis/field_level/ (one RDS per week)
#   Harvest data: Not used
#   Data source: merged_tif (simpler, smaller)
#   Mosaic mode: single-file (100-200 fields)
#   Projects: aura
#

# Detect data source (merged_tif vs merged_tif_8b) based on availability
# Returns the first available source; defaults to merged_tif_8b if neither exists
detect_data_source <- function(project_dir) {
  # Data source is always merged_tif for consistency
  return("merged_tif")
}

# Check KPI completeness for a reporting period
# Returns: List with kpis_df (data.frame), missing_count, and all_complete (boolean)
# This replaces duplicate KPI checking logic in run_full_pipeline.R (lines ~228-270, ~786-810)
check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks_needed) {
  kpi_dir <- get_kpi_dir(project_dir, client_type)
  
  kpis_needed <- data.frame()
  
  for (weeks_back in 0:(reporting_weeks_needed - 1)) {
    check_date <- end_date - (weeks_back * 7)
    wwy <- get_iso_week_year(check_date)
    
    # Build week pattern and check if it exists
    week_pattern <- sprintf("week%02d_%d", wwy$week, wwy$year)
    files_this_week <- list.files(kpi_dir, pattern = week_pattern)
    has_kpis <- length(files_this_week) > 0
    
    # Track missing weeks
    kpis_needed <- rbind(kpis_needed, data.frame(
      week = wwy$week,
      year = wwy$year,
      date = check_date,
      has_kpis = has_kpis,
      pattern = week_pattern,
      file_count = length(files_this_week)
    ))
    
    # Debug logging
    smartcane_debug(sprintf(
      "Week %02d/%d (%s): %s (%d files)",
      wwy$week, wwy$year, format(check_date, "%Y-%m-%d"),
      if (has_kpis) "✓ FOUND" else "✗ MISSING",
      length(files_this_week)
    ))
  }
  
  # Summary statistics
  missing_count <- sum(!kpis_needed$has_kpis)
  all_complete <- missing_count == 0
  
  return(list(
    kpis_df = kpis_needed,
    kpi_dir = kpi_dir,
    missing_count = missing_count,
    missing_weeks = kpis_needed[!kpis_needed$has_kpis, ],
    all_complete = all_complete
  ))
}

# 9. Initialize the project
# ----------------------
# Export project directories and settings
initialize_project <- function(project_dir, data_source = "merged_tif") {
  # Set up directory structure, passing data_source to select TIF folder
  dirs <- setup_project_directories(project_dir, data_source = data_source)
  
  # Set up logging
  logging <- setup_logging(dirs$log_dir)
  
  # Load field boundaries
  boundaries <- load_field_boundaries(dirs$data_dir)
  
  # Load harvesting data
  harvesting_data <- load_harvesting_data(dirs$data_dir)
  
  # Return all initialized components
  return(c(
    dirs,
    list(
      logging = logging,
      field_boundaries = boundaries$field_boundaries,
      field_boundaries_sf = boundaries$field_boundaries_sf,
      harvesting_data = harvesting_data
    )
  ))
}

# When script is sourced, initialize with the global project_dir variable if it exists
if (exists("project_dir")) {
  # Now we can safely log before initialization
  log_message(paste("Initializing project with directory:", project_dir))
  
  # Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default
  data_src <- if (exists("data_source")) data_source else "merged_tif"
  log_message(paste("Using data source directory:", data_src))
  
  project_config <- initialize_project(project_dir, data_source = data_src)
  
  # Expose all variables to the global environment
  list2env(project_config, envir = .GlobalEnv)
  
  # Log project initialization completion with tile mode info
  log_message(paste("Project initialized with directory:", project_dir))
  if (exists("use_tile_mosaic")) {
    mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE"
    log_message(paste("Mosaic mode detected:", mosaic_mode))
    if (exists("tile_detection_info") && !is.null(tile_detection_info)) {
      log_message(paste("  - Detection source:", tile_detection_info$detected_source))
      log_message(paste("  - Grid size:", tile_detection_info$grid_size))
      log_message(paste("  - Detected files in storage:", tile_detection_info$detected_count))
      if (length(tile_detection_info$sample_tiles) > 0) {
        log_message(paste("  - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", ")))
      }
    }
  }
} else {
  warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
}