SmartCane/r_app/parameters_project.R

# ==============================================================================
# PARAMETERS_PROJECT_2.R (CLEANED VERSION)
# ==============================================================================
# PURPOSE:
#   Project configuration, directory structure setup, and helper functions
#   for centralized path management and project initialization.
#
# SECTION 1: Libraries & Dependencies
# SECTION 2: Client Type Mapping & Configuration
# SECTION 3: Directory Structure Setup
# SECTION 4: Date/Week Utility Functions (centralized)
# SECTION 5: Field Boundary & Harvest Data Loaders
# SECTION 6: Project Initialization & Logging
# SECTION 7: Mosaic & KPI Verification Helpers
#
# NOTE: Duplicate functions (safe_log, smartcane_debug, smartcane_warn,
#       load_field_boundaries, date_list, repair_geojson_geometries)
#       have been REMOVED - they belong in 00_common_utils.R.
#       Source 00_common_utils.R after parameters_project.R to get those.
# ==============================================================================

# ==============================================================================
# SECTION 1: LIBRARIES & DEPENDENCIES
# ==============================================================================
suppressPackageStartupMessages({
  library(here)
  library(readxl)
  library(sf)
  library(dplyr)
  library(tidyr)
  library(lubridate)
  library(jsonlite)  # For reading tiling_config.json
})

# ==============================================================================
# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
# ==============================================================================

# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
# ~240 days ≈ 8 months, typical sugarcane maturity window
DAH_MATURITY_THRESHOLD <- 240

# Maps project names to client types for pipeline control
# This determines which scripts run and what outputs they produce

CLIENT_TYPE_MAP <- list(
  "angata" = "cane_supply",
  "chemba" = "agronomic_support",
  "xinavane" = "agronomic_support",
  "esa" = "agronomic_support",
  "simba" = "agronomic_support",
  "john" = "agronomic_support",
  "huss" = "agronomic_support",
  "aura" = "agronomic_support"
)

#' Get client type for a project
#' @param project_name Character project name
#' @return Character client type ("cane_supply" or "agronomic_support")
get_client_type <- function(project_name) {
  client_type <- CLIENT_TYPE_MAP[[project_name]]
  if (is.null(client_type)) {
    warning(paste0("Project '", project_name, "' not found in CLIENT_TYPE_MAP. Defaulting to 'agronomic_support'.", sep=""))
    return("agronomic_support")  # Default for all unlisted projects
  }
  return(client_type)
}

# Client-specific KPI configurations
# Defines which KPIs and outputs are required for each client type
CLIENT_TYPE_CONFIGS <- list(

  # Aura (agronomic_support): Farm-level KPI summaries for agronomists
  "agronomic_support" = list(
    client_type = "agronomic_support",
    description = "Farm-level KPI summaries for agronomic decision support",
    kpi_calculations = c(
      "field_uniformity",
      "area_change",
      "tch_forecasted",
      "growth_decline",
      "weed_presence",
      "gap_filling"
    ),
    outputs = c("kpi_summary_tables", "field_details"),
    requires_harvest_data = FALSE,
    script_90_compatible = TRUE,
    script_91_compatible = FALSE
  ),

  # Cane Supply (cane_supply): Per-field analysis with harvest prediction
  "cane_supply" = list(
    client_type = "cane_supply",
    description = "Per-field analysis with harvest prediction and phase assignment",
    kpi_calculations = c(
      "per_field_analysis",
      "phase_assignment",
      "harvest_prediction",
      "status_triggers"
    ),
    outputs = c("field_analysis_excel", "field_analysis_summary"),
    requires_harvest_data = TRUE,
    script_90_compatible = FALSE,
    script_91_compatible = TRUE
  )
)

#' Get KPI configuration for a specific client type
#' @param client_type Character (e.g., "agronomic_support", "cane_supply")
#' @return List containing configuration for that client type
get_client_kpi_config <- function(client_type) {
  config <- CLIENT_TYPE_CONFIGS[[client_type]]
  if (is.null(config)) {
    warning(sprintf("Client type '%s' not found - using cane_supply defaults", client_type))
    return(CLIENT_TYPE_CONFIGS[["cane_supply"]])
  }
  return(config)
}

# ==============================================================================
# SECTION 3: DIRECTORY STRUCTURE SETUP
# ==============================================================================
# CENTRALIZED PATH MANAGEMENT: All file paths in the entire pipeline
# are derived from setup_project_directories().
# This is the single source of truth for 8 tiers of directories.

#' Setup complete project directory structure
#'
#' Creates all 8 tiers of directories and returns a comprehensive list
#' of paths for use throughout the pipeline.
#'
#' @param project_dir Character. Project name (e.g., "angata", "aura")
#' @param data_source Character. "merged_tif" (default) or "merged_tif_8b"
#' @return List containing all 8 tiers of paths
#'
#' @details
#' TIER 1: Raw data (merged_tif) - Python download output
#' TIER 2: Per-field TIFFs - Script 10 output
#' TIER 3: CI extraction - Script 20 output
#' TIER 4: Growth model - Script 30 output
#' TIER 5: Mosaics - Script 40 output
#' TIER 6: KPI & reports - Scripts 80/90/91 output
#' TIER 7: Support files - GeoJSON, Excel, logs
#' TIER 8: Metadata - Config, CRS info
setup_project_directories <- function(project_dir, data_source = "merged_tif") {
  # BASE DIRECTORIES
  laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir)

  # TIER 1: RAW DATA (Script 00 output - Python download)
  merged_tif_folder <- here(laravel_storage_dir, "merged_tif")

  # TIER 2: PER-FIELD TIFFS (Script 10 output)
  field_tiles_dir <- here(laravel_storage_dir, "field_tiles")
  field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI")
  daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")

  # SUPPORT TIER: DATA DIRECTORY (define early for use in later tiers)
  data_dir <- here(laravel_storage_dir, "Data")

  # TIER 3: CI EXTRACTION (Script 20 output)
  # Structure: Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds (per-field daily CI values)
  extracted_ci_base_dir <- here(data_dir, "extracted_ci")
  daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals")  # Per-field structure
  cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals")
  ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python")

  # TIER 4: GROWTH MODEL (Script 30 output)
  growth_model_interpolated_dir <- here(data_dir, "growth_model_interpolated")

  # TIER 5: MOSAICS (Script 40 output)
  weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")

  # TIER 6: KPI & REPORTING (Scripts 80/90/91 output)
  reports_dir <- here(laravel_storage_dir, "reports")
  kpi_reports_dir <- here(reports_dir, "kpis", "field_level")
  kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats")
  kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis")

  # TIER 7: SUPPORT (various scripts)
  vrt_dir <- here(data_dir, "vrt")  # Virtual Raster files created during CI extraction
  harvest_dir <- here(data_dir, "harvest")  # Harvest data directory
  log_dir <- here(laravel_storage_dir, "logs")

  # Create all directories
  all_dirs <- c(
    merged_tif_folder, field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir,
    extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
    growth_model_interpolated_dir,
    weekly_mosaic_dir,
    reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
    data_dir, vrt_dir, harvest_dir, log_dir
  )

  for (dir_path in all_dirs) {
    dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
  }

  # TIER 8: CONFIG & METADATA PATHS
  field_boundaries_path <- here(data_dir, "pivot.geojson")

  # Return comprehensive list
  return(list(
    # PROJECT ROOT
    laravel_storage_dir = laravel_storage_dir,

    # TIER 1: Raw data
    merged_tif_folder = merged_tif_folder,

    # TIER 2: Per-field TIFFs
    field_tiles_dir = field_tiles_dir,
    field_tiles_ci_dir = field_tiles_ci_dir,
    daily_tiles_split_dir = daily_tiles_split_dir,

    # TIER 3: CI extraction
    extracted_ci_base_dir = extracted_ci_base_dir,
    daily_ci_vals_dir = daily_ci_vals_dir,
    cumulative_ci_vals_dir = cumulative_ci_vals_dir,
    ci_for_python_dir = ci_for_python_dir,

    # TIER 4: Growth model
    growth_model_interpolated_dir = growth_model_interpolated_dir,

    # TIER 5: Mosaics
    weekly_mosaic_dir = weekly_mosaic_dir,

    # TIER 6: KPI & reporting
    reports_dir = reports_dir,
    kpi_reports_dir = kpi_reports_dir,
    kpi_field_stats_dir = kpi_field_stats_dir,
    kpi_field_analysis_dir = kpi_field_analysis_dir,

    # TIER 7: Support
    data_dir = data_dir,
    vrt_dir = vrt_dir,
    harvest_dir = harvest_dir,
    log_dir = log_dir,

    # TIER 8: Metadata
    field_boundaries_path = field_boundaries_path
  ))
}

# ==============================================================================
# SECTION 4: DATE/WEEK UTILITY FUNCTIONS
# ==============================================================================
# ISO 8601 week/year functions for consistent date handling across scripts

#' Extract ISO week number from a date
#' @param date Date object or string convertible to Date
#' @return Numeric ISO week number (1-53)
get_iso_week <- function(date) {
  as.numeric(format(date, "%V"))
}

#' Extract ISO year from a date
#' @param date Date object or string convertible to Date
#' @return Numeric ISO year
get_iso_year <- function(date) {
  as.numeric(format(date, "%G"))
}

#' Extract both ISO week and year as a list
#' @param date Date object or string convertible to Date
#' @return List with elements: week (1-53), year
get_iso_week_year <- function(date) {
  list(
    week = as.numeric(format(date, "%V")),
    year = as.numeric(format(date, "%G"))
  )
}

#' Format date as a week/year label
#' @param date Date object or string convertible to Date
#' @param separator Character. Separator between week and year (default "_")
#' @return Character in format "week##_YYYY" (e.g., "week03_2025")
format_week_label <- function(date, separator = "_") {
  wwy <- get_iso_week_year(date)
  sprintf("week%02d%s%d", wwy$week, separator, wwy$year)
}

# ==============================================================================
# SECTION 5: FIELD BOUNDARY & HARVEST DATA LOADERS
# ==============================================================================
# IMPORTANT: These functions are also defined in 00_common_utils.R
# to avoid duplication. Source 00_common_utils.R AFTER parameters_project.R
# to override these stub definitions with the full implementations.

#' Load Field Boundaries from GeoJSON
#'
#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
#' Handles CRS validation and column standardization.
#'
#' @param data_dir Directory containing GeoJSON file
#' @return List with elements:
#'   - field_boundaries_sf: sf (Simple Features) object
#'   - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
#'
load_field_boundaries <- function(data_dir) {
  field_boundaries_path <- file.path(data_dir, "pivot.geojson")

  if (!file.exists(field_boundaries_path)) {
    stop("Field boundaries file not found at:", field_boundaries_path)
  }

  tryCatch({
    boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE)

    # Filter out features with empty geometries
    boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ]

    # Repair geometries if needed
    if (!all(sf::st_is_valid(boundaries_sf))) {
      boundaries_sf <- sf::st_make_valid(boundaries_sf)
    }

    # Convert to terra SpatVect
    boundaries_spat <- terra::vect(boundaries_sf)

    return(list(
      field_boundaries_sf = boundaries_sf,
      field_boundaries = boundaries_spat
    ))
  }, error = function(e) {
    stop("Error loading field boundaries:", e$message)
  })
}

#' Create Date List
#'
#' Creates a sequence of dates from end_date going back offset days
#'
#' @param end_date End date (Date object)
#' @param offset Number of days to go back
#' @return Character vector of dates in YYYY-MM-DD format
date_list <- function(end_date, offset) {
  start_date <- end_date - offset
  date_seq <- seq(start_date, end_date, by = "day")
  format(date_seq, "%Y-%m-%d")
}

#' Repair GeoJSON Geometries
#'
#' Validates and repairs invalid geometries in sf object
#'
#' @param sf_object sf object with potentially invalid geometries
#' @return sf object with repaired geometries
repair_geojson_geometries <- function(sf_object) {
  if (!all(sf::st_is_valid(sf_object))) {
    sf_object <- sf::st_make_valid(sf_object)
  }
  sf_object
}

#' Load harvesting data from Excel
#'
#' Loads crop harvest schedule from harvest.xlsx file.
#' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.).
#'
#' @param data_dir Directory containing harvest.xlsx file
#' @return Data frame with columns: field, sub_field, year, season_start,
#'         season_end, age (weeks), sub_area, tonnage_ha. Returns NULL if not found.
load_harvesting_data <- function(data_dir) {
  harvest_file <- file.path(data_dir, "harvest.xlsx")

  if (!file.exists(harvest_file)) {
    warning(paste("Harvest data file not found at path:", harvest_file))
    return(NULL)
  }

  # Helper function to parse dates with multiple format detection
  parse_flexible_date <- function(x) {
    if (is.na(x) || is.null(x)) return(NA_real_)
    if (inherits(x, "Date")) return(x)
    if (inherits(x, "POSIXct")) return(as.Date(x))

    if (is.numeric(x)) {
      return(as.Date(x, origin = "1899-12-30"))
    }

    x_char <- as.character(x)
    formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")

    for (fmt in formats) {
      result <- suppressWarnings(as.Date(x_char, format = fmt))
      if (!is.na(result)) return(result)
    }

    return(NA)
  }

  tryCatch({
    harvesting_data <- read_excel(harvest_file) %>%
      dplyr::select(c(
        "field", "sub_field", "year",
        "season_start", "season_end", "age", "sub_area", "tonnage_ha"
      )) %>%
      mutate(
        field = as.character(field),
        sub_field = as.character(sub_field),
        year = as.numeric(year),
        season_start = sapply(season_start, parse_flexible_date),
        season_end = sapply(season_end, parse_flexible_date),
        season_start = as.Date(season_start, origin = "1970-01-01"),
        season_end = as.Date(season_end, origin = "1970-01-01"),
        age = as.numeric(age),
        sub_area = as.character(sub_area),
        tonnage_ha = as.numeric(tonnage_ha)
      ) %>%
      mutate(
        season_end = case_when(
          season_end > Sys.Date() ~ Sys.Date(),
          is.na(season_end) ~ Sys.Date(),
          TRUE ~ season_end
        ),
        age = round(as.numeric(season_end - season_start) / 7, 0)
      )

    return(harvesting_data)
  }, error = function(e) {
    warning(paste("Error loading harvesting data:", e$message))
    return(NULL)
  })
}

# ==============================================================================
# SECTION 6: LOGGING SYSTEM SETUP
# ==============================================================================
# Create default logging functions

#' Safe Logging Function
#'
#' Generic logging with [LEVEL] prefix. Works standalone without any framework.
#' Consistent with SmartCane logging standard.
#'
#' @param message The message to log
#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
#' @return NULL (invisible, used for side effects)
#'
safe_log <- function(message, level = "INFO") {
  prefix <- sprintf("[%s]", level)
  cat(sprintf("%s %s\n", prefix, message))
}

#' SmartCane Debug Logging (Conditional)
#'
#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
#'
#' @param message The message to log
#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
#' @return NULL (invisible, used for side effects)
#'
smartcane_debug <- function(message, verbose = FALSE) {
  if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
    return(invisible(NULL))
  }
  safe_log(message, level = "DEBUG")
}

#' SmartCane Warning Logging
#'
#' Logs WARN-level messages. Convenience wrapper around safe_log().
#'
#' @param message The message to log
#' @return NULL (invisible, used for side effects)
#'
smartcane_warn <- function(message) {
  safe_log(message, level = "WARN")
}

log_message <- function(message, level = "INFO") {
  prefix <- sprintf("[%s]", level)
  cat(sprintf("%s %s\n", prefix, message))
}

log_head <- function(data, level = "INFO") {
  log_message(paste(capture.output(str(head(data))), collapse = "\n"), level)
}

#' Setup full logging system with file output
#'
#' Creates log directory and returns logging functions that write to both
#' console and log file.
#'
#' @param log_dir Character. Directory for log files
#' @return List with log_file path, log_message function, and log_head function
setup_logging <- function(log_dir) {
  dir.create(log_dir, showWarnings = FALSE, recursive = TRUE)

  log_file <- file.path(log_dir, sprintf(
    "smartcane_%s.log",
    format(Sys.time(), "%Y%m%d_%H%M%S")
  ))

  log_message <- function(message, level = "INFO") {
    prefix <- sprintf("[%s] [%s]", level, format(Sys.time(), "%Y-%m-%d %H:%M:%S"))
    formatted_message <- sprintf("%s %s", prefix, message)
    cat(formatted_message, "\n")
    cat(formatted_message, "\n", file = log_file, append = TRUE)
  }

  log_head <- function(list, level = "INFO") {
    log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
  }

  assign("log_message", log_message, envir = .GlobalEnv)
  assign("log_head", log_head, envir = .GlobalEnv)

  return(list(
    log_file = log_file,
    log_message = log_message,
    log_head = log_head
  ))
}

# ==============================================================================
# SECTION 6B: DATA SOURCE DETECTION
# ==============================================================================

#' Detect data source for project
#'
#' Returns the data source directory (always "merged_tif" for consistency)
#'
#' @param project_dir Character. Project name
#' @return Character. "merged_tif"
detect_data_source <- function(project_dir) {
  return("merged_tif")
}

#' Detect tile structure from merged TIF directory
#'
#' Checks if tiles exist by looking for:
#' 1. tiling_config.json metadata file (most reliable)
#' 2. Grid subdirectories (5x5, 10x10, etc.)
#' 3. Tile-named files (*_XX.tif pattern)
#'
#' @param merged_final_tif_dir Character. Path to merged TIF directory
#' @param daily_tiles_split_dir Character. Optional path to tiles directory
#' @return List with has_tiles (logical), detected_tiles, total_files, source, grid_size
detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
  # PRIORITY 1: Check for tiling_config.json metadata file from script 10
  if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
    config_files <- list.files(daily_tiles_split_dir,
                               pattern = "tiling_config\\.json$",
                               recursive = TRUE,
                               full.names = TRUE)

    if (length(config_files) > 0) {
      config_file <- config_files[which.max(file.info(config_files)$mtime)]

      tryCatch({
        config_json <- jsonlite::read_json(config_file)
        return(list(
          has_tiles = config_json$has_tiles %||% TRUE,
          detected_tiles = character(),
          total_files = 0,
          source = "tiling_config.json",
          grid_size = config_json$grid_size %||% "unknown"
        ))
      }, error = function(e) {
        warning("Error reading tiling_config.json: ", e$message)
      })
    }
  }

  # PRIORITY 2: File-based detection (fallback)
  if (!dir.exists(merged_final_tif_dir)) {
    return(list(
      has_tiles = FALSE,
      detected_tiles = character(),
      total_files = 0,
      source = "directory_not_found"
    ))
  }

  # Check for grid-size subdirectories (5x5, 10x10, etc.)
  grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE)
  grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)

  if (length(grid_patterns) > 0) {
    grid_size <- grid_patterns[1]
    grid_dir <- file.path(merged_final_tif_dir, grid_size)
    sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3]

    return(list(
      has_tiles = TRUE,
      detected_tiles = sample_tiles,
      total_files = length(sample_tiles),
      source = "grid_subdirectory_detection",
      grid_size = grid_size,
      grid_path = grid_dir
    ))
  }

  # Check for tile-named files (*_XX.tif pattern)
  tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)

  if (length(tif_files) == 0) {
    return(list(
      has_tiles = FALSE,
      detected_tiles = character(),
      total_files = 0,
      source = "no_files_found"
    ))
  }

  tile_pattern <- "_(\\d{2})\\.tif$"
  tile_files <- tif_files[grepl(tile_pattern, tif_files)]
  has_tiles <- length(tile_files) > 0

  return(list(
    has_tiles = has_tiles,
    detected_tiles = tile_files,
    total_files = length(tif_files),
    source = "file_pattern_detection"
  ))
}

# ==============================================================================
# SECTION 7: MOSAIC & KPI VERIFICATION HELPERS
# ==============================================================================
# Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths

#' Detect mosaic mode from project structure
#'
#' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics
#'
#' @param project_dir Character. Project name
#' @return Character. "tiled" or "single-file"
detect_mosaic_mode <- function(project_dir) {
  # Per-field architecture is standard - always return "single-file"
  # unless weekly_tile_max directory exists with content
  mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")

  if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) {
    return("tiled")
  }
  return("single-file")
}

#' Detect grid size from tile directory structure
#'
#' For per-field architecture, returns "unknown" (grid-based tiling is legacy)
#'
#' @param project_dir Character. Project name
#' @return Character. Grid size ("unknown" for per-field)
detect_grid_size <- function(project_dir) {
  # Per-field architecture doesn't use grid-based organization
  return("unknown")
}

#' Get project storage path
#'
#' @param project_dir Character. Project name
#' @param subdir Character. Optional subdirectory (default NULL)
#' @return Character. Full path
get_project_storage_path <- function(project_dir, subdir = NULL) {
  path <- file.path("laravel_app", "storage", "app", project_dir)
  if (!is.null(subdir)) {
    path <- file.path(path, subdir)
  }
  return(path)
}

#' Get mosaic directory
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
  if (mosaic_mode == "auto") {
    mosaic_mode <- detect_mosaic_mode(project_dir)
  }

  if (mosaic_mode == "tiled") {
    get_project_storage_path(project_dir, "weekly_tile_max")
  } else {
    get_project_storage_path(project_dir, "weekly_mosaic")
  }
}

#' Get KPI directory based on client type
#'
#' @param project_dir Character. Project name
#' @param client_type Character. Client type
#' @return Character. Full path to KPI directory
get_kpi_dir <- function(project_dir, client_type) {
  base_path <- get_project_storage_path(project_dir, "reports/kpis")

  if (client_type == "agronomic_support") {
    return(file.path(base_path, "field_level"))
  } else {
    return(file.path(base_path, "field_analysis"))
  }
}

#' Get expected output path for harvest imminent file
#'
#' @param project_dir Character. Project name
#' @param week_num Integer. ISO week number
#' @param year_num Integer. Year
#' @return Character. Full path to expected harvest imminent CSV file
get_harvest_output_path <- function(project_dir, week_num, year_num) {
  file.path(
    "laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats",
    sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, week_num, year_num)
  )
}

#' Check if harvest output file exists for a specific week
#'
#' @param project_dir Character. Project name
#' @param week_num Integer. ISO week number
#' @param year_num Integer. Year
#' @return Logical. TRUE if file exists
check_harvest_output_exists <- function(project_dir, week_num, year_num) {
  path <- get_harvest_output_path(project_dir, week_num, year_num)
  file.exists(path)
}

#' Get mosaic verification directory
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
  base <- file.path("laravel_app", "storage", "app", project_dir)

  if (mosaic_mode == "tiled") {
    file.path(base, "weekly_tile_max")
  } else {
    file.path(base, "weekly_mosaic")
  }
}

#' Check if mosaic files exist for a specific week
#'
#' @param project_dir Character. Project name
#' @param week_num Integer. ISO week number
#' @param year_num Integer. Year
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return List with created (logical), file_count (int), sample_files (char vector)
check_mosaic_exists <- function(project_dir, week_num, year_num, mosaic_mode) {
  mosaic_dir <- get_mosaic_verification_dir(project_dir, mosaic_mode)

  if (!dir.exists(mosaic_dir)) {
    return(list(created = FALSE, file_count = 0, sample_files = character()))
  }

  week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
  mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE)

  list(
    created = length(mosaic_files) > 0,
    file_count = length(mosaic_files),
    sample_files = head(mosaic_files, 3)
  )
}

#' Check KPI completeness for reporting window
#'
#' @param project_dir Character. Project name
#' @param client_type Character. Client type
#' @param end_date Date. End date of reporting window
#' @param reporting_weeks Integer. Number of weeks to check
#' @return List with kpi_dir, kpis_df, missing_count
check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks = 1) {
  kpi_dir <- get_kpi_dir(project_dir, client_type)

  if (!dir.exists(kpi_dir)) {
    dir.create(kpi_dir, showWarnings = FALSE, recursive = TRUE)
  }

  kpis_df <- data.frame()
  missing_count <- 0

  for (weeks_back in 0:(reporting_weeks - 1)) {
    target_week <- end_date - lubridate::weeks(weeks_back)
    wwy <- get_iso_week_year(target_week)

    # Check for KPI file for this week
    kpi_pattern <- sprintf("week_%02d_%d", wwy$week, wwy$year)
    kpi_files <- list.files(kpi_dir, pattern = kpi_pattern)
    has_kpis <- length(kpi_files) > 0
    file_count <- length(kpi_files)

    if (!has_kpis) missing_count <- missing_count + 1

    kpis_df <- rbind(kpis_df, data.frame(
      week = wwy$week,
      year = wwy$year,
      date = target_week,
      has_kpis = has_kpis,
      file_count = file_count
    ))
  }

  return(list(
    kpi_dir = kpi_dir,
    kpis_df = kpis_df,
    missing_count = missing_count
  ))
}

# ==============================================================================
# SECTION 8: PROJECT INITIALIZATION
# ==============================================================================

#' Initialize the project
#'
#' Sets up directory structure, logging, and loads configuration
#'
#' @param project_dir Character. Project name
#' @param data_source Character. "merged_tif" or "merged_tif_8b"
#' @return List with all project directories and settings
initialize_project <- function(project_dir, data_source = "merged_tif") {
  dirs <- setup_project_directories(project_dir, data_source = data_source)
  logging <- setup_logging(dirs$log_dir)

  return(list(
    dirs = dirs,
    logging = logging,
    project_dir = project_dir,
    client_type = get_client_type(project_dir)
  ))
}

# ==============================================================================
# AUTO-INITIALIZATION: Set project_dir global if not already set
# ==============================================================================

if (exists("project_dir")) {
  assign("project_dir", project_dir, envir = .GlobalEnv)
} else {
  warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
}

# ==============================================================================
# END PARAMETERS_PROJECT_2.R
# ==============================================================================