Merge remote-tracking branch 'origin/code-improvements' into translation

2026-02-18 16:02:27 +01:00 · 2026-02-18 16:02:27 +01:00 · 24fd5bb8b3
parent 4ba2d5d84f aaea7f62c7
commit 24fd5bb8b3
22 changed files with 3288 additions and 3709 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,6 +9,14 @@
 __pycache__/
 *.pyc
 *.pyo
 *.py[cod]
 *.py[cod]
 myenv/myenv/
 pip-wheel-metadata/
 dist/
 *.egg-info/
 *.egg/*.egg/
 .ipynb_checkpoints
 # R Output Files
 *.Rout
@ -16,6 +24,7 @@ __pycache__/
 *.RData
 *.Rdata
 .Rproj.user
 .Rprofile
 Rplots.pdf
 *.pdf
@ -48,15 +57,24 @@ reports/
 # Experiment Outputs (temporary plots, analysis artifacts)
 python_app/harvest_detection_experiments/*/plots/
 python_app/harvest_detection_experiments/*/*.ipynb_checkpoints/
 CI_report_dashboard_planet_files/
 # Cache Files
 rosm.cache/
-*.cache
+renv/cache/
 # Logs
 *.log
 package_manager.log
 # Temporary Files
 *.tmp
 *.save
 *.bak
 *.swp
 *.swo
 package_manager.log
 # Laravel Storage (contains user data and outputs)
 laravel_app/storage/app/*/Data/
 laravel_app/storage/app/*/reports/
--- a/.renvignore
+++ b/.renvignore
@ -5,9 +5,12 @@ laravel_app/
 data_validation_tool/
 python_app/harvest_detection_experiments/
 python_app/experiments/
 r_app/old_scripts/
 r_app/experiments/
 phase2_refinement/
 webapps/
 tools/
 old_sh/
 output/
 renv/
 *.py
--- a/python_app/.gitignore
+++ b/python_app/.gitignore
@ -1,43 +0,0 @@
 # .gitignore
 # Ignore the virtual environment directory
 myenv/
 # Byte-compiled / optimized files
 __pycache__/
 *.py[cod]
 *$py.class
 # Ignore all .pyc files in any directory
 **/*.pyc
 # Ignore pip wheel metadata
 pip-wheel-metadata/
 # Ignore the `dist/` directory used for package distributions
 dist/
 # Ignore Python egg metadata, regenerated from source files by setuptools
 *.egg-info/
 *.egg-info/*
 *.egg/
 # Ignore Jupyter Notebook
 .ipynb_checkpoints
 # Ignore VSCode directory
 .vscode/
 # Ignore JetBrains IDEs directory
 .idea/
 # Ignore MacOS directory
 .DS_Store
 # Ignore other unwanted file
 *.log
 *.bak
 *.swp
 *.swo
 *.swp
--- a/python_app/22_harvest_baseline_prediction.py
+++ b/python_app/22_harvest_baseline_prediction.py
@ -111,7 +111,7 @@ def main():
    # [3/4] Run model predictions with two-step detection
    print("\n[3/4] Running two-step harvest detection...")
-    print("      (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)")
+    print("      (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)")
    refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
                                             phase1_threshold=0.3, phase1_consecutive=2)
--- a/python_app/harvest_date_pred_utils.py
+++ b/python_app/harvest_date_pred_utils.py
@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128,
 # FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
 # ============================================================================
-def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
+def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame:
    """Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
    features = pd.DataFrame(index=ci_series.index)
@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
        ma = ci_series.rolling(window=window, min_periods=1).mean()
        features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
-    # DOY normalized
+    # DAH normalized (Days After Harvest)
-    if doy_series is not None:
+    if dah_series is not None:
-        features['DOY_normalized'] = doy_series / 450.0
+        features['DAH_normalized'] = dah_series / 450.0
    return features.fillna(0)
@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
        data_df: DataFrame with Date and CI data (may be a window after a harvest)
        feature_names: List of feature names to extract
        ci_column: Name of CI column
-        season_anchor_day: Day in FULL sequence where this season started (for DOY reset)
+        season_anchor_day: Day in FULL sequence where this season started (for DAH reset)
-                          DOY will be recalculated as: 1, 2, 3, ... from this point
+                          DAH will be recalculated as: 1, 2, 3, ... from this point
        lookback_start: Starting index in original full data (for season reset calculation)
    Returns:
@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
    # Compute all CI features
    ci_series = data_df[ci_column].astype(float)
-    # Compute DOY (age/days since season start) - NOT day-of-year!
+    # Compute DAH (age/days since season start) - NOT day-of-year!
-    # DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
+    # DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
    # It only resets to 1 after a harvest is detected (new season)
-    doy_series = None
+    dah_series = None
-    if 'DOY_normalized' in feature_names:
+    if 'DAH_normalized' in feature_names:
        if season_anchor_day is not None and lookback_start >= season_anchor_day:
-            # Season was reset after harvest. Recalculate DOY as simple counter from 1
+            # Season was reset after harvest. Recalculate DAH as simple counter from 1
-            # This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ...
+            # This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ...
-            doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
+            dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
-        elif 'DOY' in data_df.columns:
+        elif 'DAH' in data_df.columns:
-            # Use DOY directly from CSV - already calculated as continuous age counter
+            # Use DAH directly from CSV - already calculated as continuous age counter
-            doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index)
+            dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index)
        else:
            # Fallback: create continuous age counter (1, 2, 3, ...)
-            doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
+            dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
-    all_features = compute_ci_features(ci_series, doy_series)
+    all_features = compute_ci_features(ci_series, dah_series)
    # Select requested features
    requested = [f for f in feature_names if f in all_features.columns]
@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
 def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device, 
                              threshold=0.45, consecutive_days=2):
    """
-    Phase 1: Growing window detection with DOY season reset.
+    Phase 1: Growing window detection with DAH season reset.
-    For each detected harvest, reset DOY counter for the next season.
+    For each detected harvest, reset DAH counter for the next season.
    This allows the model to detect multiple consecutive harvests in multi-year data.
    """
    harvest_dates = []
--- a/python_app/model_config.json
+++ b/python_app/model_config.json
@ -44,7 +44,7 @@
    "7d_std",
    "14d_std",
    "21d_std",
-    "DOY_normalized"
+    "DAH_normalized"
  ],
  "model": {
    "type": "LSTM",
--- a/r_app/.gitignore
+++ b/r_app/.gitignore
@ -1,20 +0,0 @@
 # .gitignore
 # Ignore renv binary cache
 renv/cache/
 renv
 # Ignore temporary files
 *.tmp
 *.swp
 *.save
 # Ignore files related to Rproj
 .Rproj.user/
 .Rhistory
 .RData
 .Rprofile
 # Ignore OSX files
 .DS_Store
 CI_report_dashboard_planet_files/
--- a/r_app/00_common_utils.R
+++ b/r_app/00_common_utils.R
@ -48,10 +48,30 @@
 # #' safe_log("Check input file", "WARNING")
 # #' safe_log("Failed to load data", "ERROR")
 # #'
-# safe_log <- function(message, level = "INFO") {
+safe_log <- function(message, level = "INFO") {
-#   prefix <- sprintf("[%s]", level)
+  # Build the full log message with timestamp
-#   cat(sprintf("%s %s\n", prefix, message))
+  timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
-# }
+  log_msg <- sprintf("[%s] [%s] %s", timestamp, level, message)
  # Only output to console if NOT rendering with knitr
  if (!isTRUE(getOption('knitr.in.progress'))) {
    cat(log_msg, "\n")
  }
  # Always write to log file if available
  if (exists("LOG_FILE", envir = .GlobalEnv)) {
    log_file <- get("LOG_FILE", envir = .GlobalEnv)
    if (!is.null(log_file) && nzchar(log_file)) {
      tryCatch({
        cat(log_msg, "\n", file = log_file, append = TRUE)
      }, error = function(e) {
        # Silently fail if log file can't be written
      })
    }
  }
  invisible(log_msg)
 }
 # #' SmartCane Debug Logging (Conditional)
 # #'
--- a/r_app/21_convert_ci_rds_to_csv.R
+++ b/r_app/21_convert_ci_rds_to_csv.R
@ -14,7 +14,7 @@
 # OUTPUT DATA:
 #   - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
 #   - Format: CSV (long format)
-#   - Columns: field, sub_field, Date, FitData, DOY, value
+#   - Columns: field, sub_field, Date, FitData, DAH, value
 #
 # USAGE:
 #   Rscript 21_convert_ci_rds_to_csv.R [project]
@ -38,7 +38,7 @@
 # NOTES:
 #   - Data source: Uses interpolated CI data from Script 30 (growth model output)
 #   - Handles both wide format and long format inputs from growth model
-#   - DOY (Day of Year): Calculated from date for seasonal analysis
+#   - DAH (Days After Harvest): Calculated from date; represents crop age in days
 #   - Python integration: CSV format compatible with pandas/scikit-learn workflows
 #   - Used by: Python harvest detection models (harvest_date_prediction.py)
 #   - Exports complete growth curves with interpolated values for ML training
@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) {
    filter(!is.na(FitData))
 }
-#' Create daily interpolated sequences with DOY for each field
+#' Create daily interpolated sequences with DAH for each field
 #'
 #' For each field/sub_field combination, creates complete daily sequences from first to last date,
 #' fills in measurements, and interpolates missing dates.
 #'
 #' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
-#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
+#' @return Tibble with: field, sub_field, Date, FitData, DAH, value
 create_interpolated_daily_sequences <- function(ci_data_long) {
  ci_data_long %>%
    group_by(field, sub_field) %>%
@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
          Date = date_seq,
          value = NA_real_,
          FitData = NA_real_,
-          DOY = seq_along(date_seq)  # Continuous day counter: 1, 2, 3, ...
+          DAH = seq_along(date_seq)  # Continuous day counter: 1, 2, 3, ...
        )
        # Fill in actual measurement values
@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
      })
    ) %>%
    unnest(data) %>%
-    select(field, sub_field, Date, FitData, DOY, value) %>%
+    select(field, sub_field, Date, FitData, DAH, value) %>%
    arrange(field, Date)
 }
--- a/r_app/30_growth_model_utils.R
+++ b/r_app/30_growth_model_utils.R
@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
    # Add additional columns
    CI <- CI %>%
      dplyr::mutate(
-        DOY = seq(1, n(), 1),
+        DAH = seq(1, n(), 1),
        model = paste0("Data", season, " : ", field_name),
        season = season,
        subField = field_name
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@ -141,12 +141,15 @@ suppressPackageStartupMessages({
  library(writexl)   # For writing Excel outputs (KPI summary tables)
  library(progress)  # For progress bars during field processing
-  # ML/Analysis (optional - only for harvest model inference)
+  # ML models (for yield prediction KPI)
-  tryCatch({
+  library(caret)     # For training Random Forest with cross-validation
-    library(torch)   # For PyTorch model inference (harvest readiness prediction)
+  library(CAST)      # For Forward Feature Selection in caret models
-  }, error = function(e) {
+  
-    message("Note: torch package not available - harvest model inference will be skipped")
+  # ML models (for yield prediction KPI)
-  })
+  library(caret)     # For training Random Forest with cross-validation
  library(CAST)      # For Forward Feature Selection in caret models
 })
 # ============================================================================
@ -316,6 +319,9 @@ main <- function() {
  # Use centralized paths from setup object (no need for file.path calls)
  weekly_mosaic <- setup$weekly_mosaic_dir
  daily_vals_dir <- setup$daily_ci_vals_dir
  reports_dir <- setup$kpi_reports_dir
  data_dir <- setup$data_dir
  tryCatch({
    source(here("r_app", "30_growth_model_utils.R"))
@ -332,8 +338,8 @@ main <- function() {
    message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
    message(strrep("=", 70))
-    # Prepare inputs for KPI calculation (already created by setup_project_directories)
+    # Prepare outputs and inputs for KPI calculation (already created by setup_project_directories)
-    reports_dir_kpi <- setup$kpi_reports_dir
+    reports_dir_kpi <- file.path(setup$reports_dir, "kpis")
    cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
    # Load field boundaries for workflow (use data_dir from setup)
@ -356,18 +362,15 @@ main <- function() {
      stop("ERROR loading field boundaries: ", e$message)
    })
-    # Load harvesting data
+    # Load harvesting data for yield prediction (using common helper function)
-    if (!exists("harvesting_data")) {
+    harvesting_data <- load_harvest_data(setup$data_dir)
      warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
      harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
    }
    # Extract current week/year from end_date
    current_week <- as.numeric(format(end_date, "%V"))
    current_year <- as.numeric(format(end_date, "%G"))
    # Call with correct signature
-    kpi_results <- calculate_all_kpis(
+    kpi_results <- calculate_all_field_analysis_agronomic_support(
      field_boundaries_sf = field_boundaries_sf,
      current_week = current_week,
      current_year = current_year,
@ -376,8 +379,13 @@ main <- function() {
      ci_rds_path = NULL,
      harvesting_data = harvesting_data,
      output_dir = reports_dir_kpi,
      data_dir = setup$data_dir,
      project_dir = project_dir
    )
    # Extract results 
    field_analysis_df <- kpi_results$field_analysis_df
    export_paths <- kpi_results$export_paths
    cat("\n===  KPI CALCULATION COMPLETE ===\n")
    cat("Summary tables saved for Script 90 integration\n")
@ -389,599 +397,57 @@ main <- function() {
    message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
    message(strrep("=", 70))
-    # Set reports_dir for CANE_SUPPLY workflow (used by export functions)
+   # Define variables needed for workflow functions
-    reports_dir <- setup$kpi_reports_dir
+   data_dir <- setup$data_dir
    data_dir <- setup$data_dir
    # Continue with existing per-field analysis code below
    message("\n", strrep("-", 70))
    message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
    message(strrep("-", 70))
    weeks <- calculate_week_numbers(end_date)
    current_week <- weeks$current_week
    current_year <- weeks$current_year
    previous_week <- weeks$previous_week
    previous_year <- weeks$previous_year
    message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))  
    # Find per-field weekly mosaics
    message("Finding per-field weekly mosaics...")
-    if (!dir.exists(weekly_mosaic)) {
+   # Load field boundaries for workflow (use data_dir from setup)
-      stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
+    message("\nLoading field boundaries for KPI calculation...")
                 "\nScript 40 (mosaic creation) must be run first."))
    }
    field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
    field_dirs <- field_dirs[field_dirs != ""]
    if (length(field_dirs) == 0) {
      stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
                 "\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
    }
    # Verify we have mosaics for this week
    single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
    per_field_files <- c()
    for (field in field_dirs) {
      field_mosaic_dir <- file.path(weekly_mosaic, field)
      files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
      if (length(files) > 0) {
        per_field_files <- c(per_field_files, files)
      }
    }
    if (length(per_field_files) == 0) {
      stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
                 "\nExpected pattern:", single_file_pattern,
                 "\nChecked:", weekly_mosaic))
    }
    message(paste("  ✓ Found", length(per_field_files), "per-field weekly mosaics"))
    mosaic_mode <- "per-field"
    mosaic_dir <- weekly_mosaic
    # Load field boundaries
    tryCatch({
-      boundaries_result <- load_field_boundaries(data_dir)
+      boundaries_result <- load_field_boundaries(setup$data_dir)
-    
+      
      if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
        field_boundaries_sf <- boundaries_result$field_boundaries_sf
      } else {
        field_boundaries_sf <- boundaries_result
      }
-    
+      
      if (nrow(field_boundaries_sf) == 0) {
        stop("No fields loaded from boundaries")
      }
-    
+      
-      message(paste("  Loaded", nrow(field_boundaries_sf), "fields"))
+      message(paste("  ✓ Loaded", nrow(field_boundaries_sf), "fields"))
    }, error = function(e) {
      stop("ERROR loading field boundaries: ", e$message)
    })
    # Load harvesting data for yield prediction (using common helper function)
    harvesting_data <- load_harvest_data(setup$data_dir)
    # Extract current week/year from end_date
    current_week <- as.numeric(format(end_date, "%V"))
    current_year <- as.numeric(format(end_date, "%G"))
-    message("Loading historical field data for trend calculations...")
+   # Call the main orchestrator function from kpi_calculation_utils.R
-    # Load up to 8 weeks (max of 4-week and 8-week trend requirements)
+    workflow_results <- calculate_field_analysis_cane_supply(
-    # Function gracefully handles missing weeks and loads whatever exists
+      setup = setup,
-    num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)  # Always 8
+      client_config = client_config,
-    message(paste("  Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
+      end_date = end_date,
    # Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
    allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
    historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir, 
                                                   num_weeks = num_weeks_to_load, 
                                                   auto_generate = allow_auto_gen,
                                                   field_boundaries_sf = field_boundaries_sf,
                                                   daily_vals_dir = daily_vals_dir)
    # Load harvest.xlsx for planting dates (season_start)
    message("\nLoading harvest data from harvest.xlsx for planting dates...")
    harvest_file_path <- file.path(data_dir, "harvest.xlsx")
    harvesting_data <- tryCatch({
      if (file.exists(harvest_file_path)) {
        harvest_raw <- readxl::read_excel(harvest_file_path)
        harvest_raw$season_start <- as.Date(harvest_raw$season_start)
        harvest_raw$season_end <- as.Date(harvest_raw$season_end)
        message(paste("  ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
        harvest_raw
      } else {
        message(paste("  WARNING: harvest.xlsx not found at", harvest_file_path))
        NULL
      }
    }, error = function(e) {
      message(paste("  ERROR loading harvest.xlsx:", e$message))
      NULL
    })
    planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
    # Validate planting_dates
    if (is.null(planting_dates) || nrow(planting_dates) == 0) {
      message("WARNING: No planting dates available. Using NA for all fields.")
      planting_dates <- data.frame(
        field_id = field_boundaries_sf$field,
        planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
        stringsAsFactors = FALSE
      )
    }  
    # Build per-field configuration
    message("\nPreparing mosaic configuration for statistics calculation...")
    message("  ✓ Using per-field mosaic architecture (1 TIFF per field)")
    # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
    field_grid <- list(
      mosaic_dir = mosaic_dir,
      mode = "per-field"
    )
    message("\nUsing modular RDS-based approach for weekly statistics...")
    # Load/calculate CURRENT week stats (from tiles or RDS cache)
    message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
    current_stats <- load_or_calculate_weekly_stats(
      week_num = current_week,
      year = current_year,
      project_dir = project_dir,
      weekly_mosaic = weekly_mosaic,
      daily_vals_dir = daily_vals_dir,
      field_boundaries_sf = field_boundaries_sf,
-      mosaic_dir = field_grid$mosaic_dir,
+      data_dir = data_dir
      reports_dir = reports_dir,
      report_date = end_date
    )
    message(paste("  ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
    # Load/calculate PREVIOUS week stats (from RDS cache or tiles)
    message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
    # Calculate report date for previous week (7 days before current)
    prev_report_date <- end_date - 7
    prev_stats <- load_or_calculate_weekly_stats(
      week_num = previous_week,
      year = previous_year,
      project_dir = project_dir,
      field_boundaries_sf = field_boundaries_sf,
      mosaic_dir = field_grid$mosaic_dir,
      reports_dir = reports_dir,
      report_date = prev_report_date
    )
    message(paste("  ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
    message(paste("    Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
    message(paste("    CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))
    # Apply trend calculations (requires both weeks)
    message("\n3. Calculating trend columns...")
    current_stats <- calculate_kpi_trends(current_stats, prev_stats, 
                                          project_dir = project_dir,
                                          reports_dir = reports_dir,
                                          current_week = current_week,
                                          year = current_year)
    message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
    # Load weekly harvest probabilities from script 31 (if available)
    # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
    message("\n4. Loading harvest probabilities from script 31...")
    harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
    harvest_prob_file <- file.path(harvest_prob_dir, 
                                     sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
    message(paste("  Looking for:", harvest_prob_file))
    imminent_prob_data <- tryCatch({
      if (file.exists(harvest_prob_file)) {
        prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
                                   col_types = readr::cols(.default = readr::col_character()))
        message(paste("  ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
        prob_df %>%
          select(field, imminent_prob, detected_prob) %>%
          rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
      } else {
        message(paste("  INFO: Harvest probabilities not available (script 31 not run)"))
        NULL
      }
    }, error = function(e) {
      message(paste("  WARNING: Could not load harvest probabilities:", e$message))
      NULL
    })
    # ============================================================================
    # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R)
    # ============================================================================
    message("\nCalculating gap filling scores (2σ method)...")
    # Process per-field mosaics
    message(paste("  Using per-field mosaics for", length(per_field_files), "fields"))
    field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
    process_gap_for_field <- function(field_file) {
      field_id <- basename(dirname(field_file))
      field_bounds <- field_boundaries_by_id[[field_id]]
      if (is.null(field_bounds) || nrow(field_bounds) == 0) {
        return(data.frame(Field_id = field_id, gap_score = NA_real_))
      }
      tryCatch({
        field_raster <- terra::rast(field_file)
        ci_band_name <- "CI"
        if (!(ci_band_name %in% names(field_raster))) {
          return(data.frame(Field_id = field_id, gap_score = NA_real_))
        }
        field_ci_band <- field_raster[[ci_band_name]]
        names(field_ci_band) <- "CI"
        gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
        if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
          return(data.frame(Field_id = field_id, gap_score = NA_real_))
        }
        gap_scores <- gap_result$field_results
        gap_scores$Field_id <- gap_scores$field
        gap_scores <- gap_scores[, c("Field_id", "gap_score")]
        stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
      }, error = function(e) {
        message(paste("  WARNING: Gap score failed for field", field_id, ":", e$message))
        data.frame(Field_id = field_id, gap_score = NA_real_)
      })
    }
    # Process fields sequentially with progress bar
    message("  Processing gap scores for ", length(per_field_files), " fields...")
    pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
-    results_list <- lapply(seq_along(per_field_files), function(idx) {
+    # Extract results
-      result <- process_gap_for_field(per_field_files[[idx]])
+    field_analysis_df <- workflow_results$field_analysis_df
-      utils::setTxtProgressBar(pb, idx)
+    farm_kpi_results <- workflow_results$farm_kpi_results
-      result
+    export_paths <- workflow_results$export_paths
    })
    close(pb)
    gap_scores_df <- dplyr::bind_rows(results_list)
    if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
      gap_scores_df <- gap_scores_df %>%
        dplyr::group_by(Field_id) %>%
        dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
      message(paste("  ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
      message(paste("    Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
    } else {
      message("  WARNING: No gap scores calculated from per-field mosaics")
      gap_scores_df <- NULL
    }
    # ============================================================================
    # Build final output dataframe with all 22 columns (including Gap_score)
    # ============================================================================
    message("\nBuilding final field analysis output...")
    # Pre-calculate acreages with geometry validation
    # This avoids geometry errors during field_analysis construction
    acreage_lookup <- tryCatch({
      lookup_df <- field_boundaries_sf %>%
        sf::st_drop_geometry() %>%
        as.data.frame() %>%
        mutate(
          geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
            tryCatch({
              sf::st_is_valid(field_boundaries_sf[idx, ])
            }, error = function(e) FALSE)
          }),
          area_ha = 0
        )
-      # Calculate area for valid geometries
+  } else {
-      for (idx in which(lookup_df$geometry_valid)) {
+    # Unknown client type - log warning and exit
-        tryCatch({
+    warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
          area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
          lookup_df$area_ha[idx] <- area_m2 / 10000
        }, error = function(e) {
          lookup_df$area_ha[idx] <<- NA_real_
        })
      }
      # Convert hectares to acres
      lookup_df %>%
        mutate(acreage = area_ha / 0.404686) %>%
        select(field, acreage)
    }, error = function(e) {
      message(paste("Warning: Could not calculate acreages from geometries -", e$message))
      data.frame(field = character(0), acreage = numeric(0))
    })
    field_analysis_df <- current_stats %>%
      mutate(
        # Column 2: Farm_Section (user fills)
        Farm_Section = NA_character_,
        # Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
        Field_name = Field_id,
        # Column 4: Acreage (calculate from geometry)
        Acreage = {
          acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
          if_else(is.na(acreages_vec), 0, acreages_vec)
        },
        # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
        # Column 7: Four_week_trend (from current_stats)
        # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
        Last_harvest_or_planting_date = {
          planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
        },
        # Column 9: Age_week (calculated from report date and planting date)
        Age_week = {
          sapply(seq_len(nrow(current_stats)), function(idx) {
            planting_dt <- Last_harvest_or_planting_date[idx]
            if (is.na(planting_dt)) {
              return(NA_real_)
            }
            round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0)
          })
        },
        # Column 10: Phase (recalculate based on updated Age_week)
        Phase = {
          sapply(Age_week, function(age) {
            if (is.na(age)) return(NA_character_)
            if (age >= 0 & age < 4) return("Germination")
            if (age >= 4 & age < 17) return("Tillering")
            if (age >= 17 & age < 39) return("Grand Growth")
            if (age >= 39) return("Maturation")
            NA_character_
          })
        },
        # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends)
        # Column 12: Germination_progress (calculated here from CI values)
        # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
        Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) {
          if (is.na(pct)) return(NA_character_)
          if (pct >= 95) return("95-100%")
          else if (pct >= 90) return("90-95%")
          else if (pct >= 80) return("80-90%")
          else if (pct >= 70) return("70-80%")
          else if (pct >= 60) return("60-70%")
          else if (pct >= 50) return("50-60%")
          else if (pct >= 40) return("40-50%")
          else if (pct >= 30) return("30-40%")
          else if (pct >= 20) return("20-30%")
          else if (pct >= 10) return("10-20%")
          else return("0-10%")
        }),
        # Column 13: Imminent_prob (from script 31 or NA if not available)
        Imminent_prob = {
          if (!is.null(imminent_prob_data)) {
            imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
          } else {
            rep(NA_real_, nrow(current_stats))
          }
        },
        # Column 14: Status_Alert (based on harvest probability + crop health status)
        # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA
        Status_Alert = {
          sapply(seq_len(nrow(current_stats)), function(idx) {
            imminent_prob <- Imminent_prob[idx]
            age_w <- Age_week[idx]
            weekly_ci_chg <- Weekly_ci_change[idx]
            mean_ci_val <- Mean_CI[idx]
            # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months)
            if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) {
              return("Ready for harvest-check")
            }
            # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5)
            if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) {
              return("Strong decline in crop health")
            }
            # Priority 3: Harvested/bare (Mean CI < 1.5)
            if (!is.na(mean_ci_val) && mean_ci_val < 1.5) {
              return("Harvested/bare")
            }
            # Fallback: no alert
            NA_character_
          })
        },
        # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
        # Column 17: Already in current_stats (CV)
        # Column 18: Already in current_stats (CV_Trend_Short_Term)
        # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
        # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
        # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01)
        CV_Trend_Long_Term_Category = {
          sapply(current_stats$CV_Trend_Long_Term, function(slope) {
            if (is.na(slope)) {
              return(NA_character_)
            } else if (slope < -0.01) {
              return("More uniform")
            } else if (slope > 0.01) {
              return("Less uniform")
            } else {
              return("Stable uniformity")
            }
          })
        },
        # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
        # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
        Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) {
          if (is.na(pct)) return(NA_character_)
          if (pct >= 95) return("95-100%")
          else if (pct >= 90) return("90-95%")
          else if (pct >= 80) return("80-90%")
          else if (pct >= 70) return("70-80%")
          else if (pct >= 60) return("60-70%")
          else if (pct >= 50) return("50-60%")
          else if (pct >= 40) return("40-50%")
          else if (pct >= 30) return("30-40%")
          else if (pct >= 20) return("20-30%")
          else if (pct >= 10) return("10-20%")
          else return("0-10%")
        }),
        # Column 22: Gap_score (2σ below median - from kpi_utils.R)
        Gap_score = {
          if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
            # Debug: Print first few gap scores
            message(sprintf("  Joining %d gap scores to field_analysis (first 3: %s)", 
                           nrow(gap_scores_df),
                           paste(head(gap_scores_df$gap_score, 3), collapse=", ")))
            message(sprintf("  First 3 Field_ids in gap_scores_df: %s", 
                           paste(head(gap_scores_df$Field_id, 3), collapse=", ")))
            message(sprintf("  First 3 Field_ids in current_stats: %s", 
                           paste(head(current_stats$Field_id, 3), collapse=", ")))
            gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
          } else {
            rep(NA_real_, nrow(current_stats))
          }
        }
      ) %>%
      select(
        all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
                 "Last_harvest_or_planting_date", "Age_week", "Phase", 
                 "Germination_progress",
                 "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
                 "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
                 "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
      )
    message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)"))
    export_paths <- export_field_analysis_excel(
      field_analysis_df,
      NULL,
      project_dir,
      current_week,
      current_year,
      reports_dir
    )
    cat("\n--- Per-field Results (first 10) ---\n")
    available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
    available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
    if (length(available_cols) > 0) {
      print(head(field_analysis_df[, available_cols], 10))
    }
    # ========== FARM-LEVEL KPI AGGREGATION ==========
    # Aggregate the per-field analysis into farm-level summary statistics
    cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
    # Filter to only fields that have actual data (non-NA CI and valid acreage)
    field_data <- field_analysis_df %>%
      filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
      filter(Acreage > 0)
    if (nrow(field_data) > 0) {
      if (nrow(field_data) > 0) {
        # Create summary statistics
        farm_summary <- list()
        # 1. PHASE DISTRIBUTION
        phase_dist <- field_data %>%
          group_by(Phase) %>%
          summarise(
            num_fields = n(),
            acreage = sum(Acreage, na.rm = TRUE),
            .groups = 'drop'
          ) %>%
          rename(Category = Phase)
        farm_summary$phase_distribution <- phase_dist
        # 2. STATUS ALERT DISTRIBUTION
        status_dist <- field_data %>%
          group_by(Status_Alert) %>%
          summarise(
            num_fields = n(),
            acreage = sum(Acreage, na.rm = TRUE),
            .groups = 'drop'
          ) %>%
          rename(Category = Status_Alert)
        farm_summary$status_distribution <- status_dist
        # 3. CLOUD COVERAGE DISTRIBUTION
        cloud_dist <- field_data %>%
          group_by(Cloud_category) %>%
          summarise(
            num_fields = n(),
            acreage = sum(Acreage, na.rm = TRUE),
            .groups = 'drop'
          ) %>%
          rename(Category = Cloud_category)
        farm_summary$cloud_distribution <- cloud_dist
        # 4. OVERALL STATISTICS
        farm_summary$overall_stats <- data.frame(
          total_fields = nrow(field_data),
          total_acreage = sum(field_data$Acreage, na.rm = TRUE),
          mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
          median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
          mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
          week = current_week,
          year = current_year,
          date = as.character(end_date)
        )
        # Print summaries
        cat("\n--- PHASE DISTRIBUTION ---\n")
        print(phase_dist)
        cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
        print(status_dist)
        cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
        print(cloud_dist)
        cat("\n--- OVERALL FARM STATISTICS ---\n")
        print(farm_summary$overall_stats)
        farm_kpi_results <- farm_summary
      } else {
        farm_kpi_results <- NULL
      }
    } else {
      farm_kpi_results <- NULL
    }
    # ========== FINAL SUMMARY ==========
    cat("\n", strrep("=", 70), "\n")
    cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
    cat(strrep("=", 70), "\n")
    cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
    cat("Excel export:", export_paths$excel, "\n")
    cat("RDS export:", export_paths$rds, "\n")
    cat("CSV export:", export_paths$csv, "\n")
    if (!is.null(farm_kpi_results)) {
      cat("\nFarm-level KPIs: CALCULATED\n")
    } else {
      cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
    }
    cat("\n✓ Consolidated KPI calculation complete!\n")
    cat("  - Per-field data exported\n")
    cat("  - Farm-level KPIs calculated\n")
    cat("  - All outputs in:", reports_dir, "\n\n")
    } else {
      # Unknown client type - log warning and exit
      warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
    cat("\n⚠️  Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
    cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
    cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
--- a/r_app/80_utils_agronomic_support.R
+++ b/r_app/80_utils_agronomic_support.R
--- a/r_app/80_utils_cane_supply.R
+++ b/r_app/80_utils_cane_supply.R
@ -27,173 +27,685 @@ library(tidyr)
 library(readxl)
 library(writexl)
 # ============================================================================
 # ALERT THRESHOLDS & CONFIGURATION CONSTANTS
 # ============================================================================
 # CI change thresholds for alert categorization and status determination
 # These values are project-standard and should be consistent across all workflows
 CI_CHANGE_RAPID_GROWTH_THRESHOLD <- 0.5      # Weekly CI change for positive growth alert
 CI_CHANGE_POSITIVE_GROWTH_THRESHOLD <- 0.2   # Weekly CI change for acceptable growth
 CI_CHANGE_STABLE_THRESHOLD <- -0.2            # Weekly CI change for stable status (between -0.2 and +0.2)
 CI_CHANGE_STRESS_TREND_THRESHOLD <- -0.3    # 4-week trend threshold for stress detection
 CI_CHANGE_RAPID_DECLINE_THRESHOLD <- -0.5   # Weekly CI change for rapid decline alert
 # Deprecated aliases (for backward compatibility if needed):
 CI_CHANGE_DECLINE_THRESHOLD <- CI_CHANGE_RAPID_DECLINE_THRESHOLD    # Weekly CI change threshold for decline alerts
 CI_CHANGE_INCREASE_THRESHOLD <- CI_CHANGE_RAPID_GROWTH_THRESHOLD    # Weekly CI change threshold for increase alerts
 # ============================================================================
 # ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
 # ============================================================================
-#' Placeholder: ANGATA harvest readiness assessment
+#' Calculate acreage for each field from geometry
 #' @param field_boundaries_sf sf object with field geometries
 #' @return data.frame with field and acreage columns
 calculate_field_acreages <- function(field_boundaries_sf) {
  tryCatch({
    # Project to equal-area CRS (EPSG:6933) for accurate area calculations
    field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933")
    lookup_df <- field_boundaries_proj %>%
      sf::st_drop_geometry() %>%
      as.data.frame() %>%
      mutate(
        geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) {
          tryCatch({
            sf::st_is_valid(field_boundaries_proj[idx, ])
          }, error = function(e) FALSE)
        }),
        area_ha = 0
      )
    # Calculate area for valid geometries
    valid_indices <- which(lookup_df$geometry_valid)
    areas_ha <- vapply(valid_indices, function(idx) {
      tryCatch({
        area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ]))
        area_m2 / 10000
      }, error = function(e) NA_real_)
    }, numeric(1))
    lookup_df$area_ha[valid_indices] <- areas_ha
    # Convert hectares to acres
    lookup_df %>%
      mutate(acreage = area_ha / 0.404686) %>%
      # Aggregate by field to handle multi-row fields (e.g., sub_fields)
      group_by(field) %>%
      summarise(acreage = sum(acreage, na.rm = TRUE), .groups = "drop") %>%
      select(field, acreage)
  }, error = function(e) {
    message(paste("Warning: Could not calculate acreages from geometries -", e$message))
    data.frame(field = character(0), acreage = numeric(0))
  })
 }
 #' Calculate age in weeks from planting date
 #'
-#' Future implementation will integrate ANGATA-specific harvest readiness criteria:
+#' @param planting_date Date of planting
-#'   - Maturation phase detection (CI threshold-based)
+#' @param reference_date Date to calculate age relative to (typically end_date)
-#'   - Field age tracking (days since planting)
+#' @return Numeric age in weeks (rounded to nearest week)
-#'   - Weather-based ripeness indicators (if available)
+calculate_age_week <- function(planting_date, reference_date) {
-#'   - Historical yield correlations
+  if (is.na(planting_date)) {
-#'
+    return(NA_real_)
 #' @param field_ci CI values for the field
 #' @param field_age_days Days since planting
 #'
 #' @return Character string with harvest readiness assessment
 assess_harvest_readiness <- function(field_ci, field_age_days = NULL) {
  # Placeholder implementation
  # Real version would check:
  # - Mean CI > 3.5 (maturation threshold)
  # - Age > 350 days
  # - Weekly growth rate < threshold (mature plateau)
  if (is.null(field_ci) || all(is.na(field_ci))) {
    return("No data available")
  }
-  
+  round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0)
-  mean_ci <- mean(field_ci, na.rm = TRUE)
+}
-  
+
-  if (mean_ci > 3.5) {
+#' Assign crop phase based on age in weeks
-    return("Ready for harvest")
+#'
-  } else if (mean_ci > 2.5) {
+#' Determines crop phase from age in weeks using canonical PHASE_DEFINITIONS
-    return("Approaching harvest readiness")
+#' from 80_utils_common.R for consistency across all workflows.
 #'
 #' @param age_week Numeric age in weeks
 #' @return Character phase name (from PHASE_DEFINITIONS or "Unknown")
 #' 
 #' @details
 #' Uses the shared PHASE_DEFINITIONS to ensure identical phase boundaries
 #' across all scripts. This wrapper delegates to get_phase_by_age() which
 #' is the authoritative phase lookup function.
 #' 
 #' Phase boundaries (from PHASE_DEFINITIONS):
 #'   - Germination: 0-6 weeks
 #'   - Tillering: 4-16 weeks
 #'   - Grand Growth: 17-39 weeks
 #'   - Maturation: 39+ weeks
 calculate_phase <- function(age_week) {
  # Delegate to canonical get_phase_by_age() from 80_utils_common.R
  # This ensures all phase boundaries are consistent across workflows
  get_phase_by_age(age_week)
 }
 #' Bin percentage into 10% intervals with special handling for 90-100%
 #'
 #' @param pct Numeric percentage value (0-100)
 #' @return Character bin label
 bin_percentage <- function(pct) {
  if (is.na(pct)) return(NA_character_)
  if (pct >= 95) return("95-100%")
  else if (pct >= 90) return("90-95%")
  else if (pct >= 80) return("80-90%")
  else if (pct >= 70) return("70-80%")
  else if (pct >= 60) return("60-70%")
  else if (pct >= 50) return("50-60%")
  else if (pct >= 40) return("40-50%")
  else if (pct >= 30) return("30-40%")
  else if (pct >= 20) return("20-30%")
  else if (pct >= 10) return("10-20%")
  else return("0-10%")
 }
 #' Calculate germination progress from CI threshold percentage
 #'
 #' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2
 #' @return Character bin label
 calculate_germination_progress <- function(pct_pixels_ci_gte_2) {
  bin_percentage(pct_pixels_ci_gte_2)
 }
 #' Categorize CV trend (long-term slope) into qualitative labels
 #'
 #' @param cv_slope Numeric slope from CV trend analysis
 #' @return Character category: "More uniform", "Stable uniformity", or "Less uniform"
 categorize_cv_trend_long_term <- function(cv_slope) {
  if (is.na(cv_slope)) {
    return(NA_character_)
  } else if (cv_slope < -0.01) {
    return("More uniform")
  } else if (cv_slope > 0.01) {
    return("Less uniform")
  } else {
-    return("Not ready - continue monitoring")
+    return("Stable uniformity")
  }
 }
-#' Placeholder: ANGATA supply chain status flags
+#' Determine status alert for CANE_SUPPLY client (harvest/milling workflow)
 #' 
 #' Alerts focus on: harvest readiness, crop health monitoring, exception detection
 #' Uses WEEKLY trends (Four_week_trend) not daily noise for consistency
 #' Designed for harvest/milling clients who manage expectation, not daily operations
 #'
-#' Future implementation will add supply chain-specific status indicators:
+#' Priority order:
-#'   - Harvest scheduling readiness
+#' 1. harvest_ready      → Schedule harvest operations
-#'   - Equipment availability impact
+#' 2. harvested_bare     → Record completion / detect unexpected harvest
-#'   - Transportation/logistics flags
+#' 3. stress_detected    → Monitor crop health (consistent decline)
-#'   - Quality parameter flags
+#' 4. germination_delayed → Early warning for young fields
 #' 5. growth_on_track    → Positive signal (no action needed)
 #' 6. NA                 → Normal growth (no alert)
 #'
-#' @param field_analysis Data frame with field analysis results
+#' @param imminent_prob Numeric harvest probability (0-1)
-#'
+#' @param age_week Numeric age in weeks since planting/harvest
-#' @return Data frame with supply chain status columns
+#' @param mean_ci Numeric mean Chlorophyll Index
-assess_supply_chain_status <- function(field_analysis) {
+#' @param four_week_trend Numeric 4-week trend in CI (slope of growth)
-  # Placeholder: return field analysis as-is
+#' @param weekly_ci_change Numeric week-over-week CI change
-  # Real version would add columns for:
+#' @param cv Numeric coefficient of variation (field uniformity)
-  # - schedule_ready (bool)
+#' @return Character status alert code or NA
-  # - harvest_window_days (numeric)
+calculate_status_alert <- function(imminent_prob, age_week, mean_ci, 
-  # - transportation_priority (char)
+                                    four_week_trend, weekly_ci_change, cv) {
  # - quality_flags (char)
-  return(field_analysis)
+  # Priority 1: HARVEST READY - highest business priority
  # Field is mature (≥12 months) AND harvest model predicts imminent harvest
  if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) {
    return("harvest_ready")
  }
  # Priority 2: HARVESTED/BARE - indicator of completion (or unexpected harvest)
  # Mean CI dropped below vegetative threshold
  if (!is.na(mean_ci) && mean_ci < 1.5) {
    return("harvested_bare")
  }
  # Priority 3: STRESS DETECTED - consistent health decline (weekly trend)
  # Uses Four_week_trend (smooth trend) NOT daily fluctuation to avoid noise
  # Crop declining but not yet bare → opportunity to investigate
  # References: CI_CHANGE_STRESS_TREND_THRESHOLD for 4-week trend detection
  if (!is.na(four_week_trend) && four_week_trend < CI_CHANGE_STRESS_TREND_THRESHOLD && 
      !is.na(mean_ci) && mean_ci > 1.5) {
    return("stress_detected")
  }
  # Priority 4: GERMINATION DELAYED - early warning for young fields
  # Age 4-8 weeks is typical germination window; low CI = slow start
  if (!is.na(age_week) && age_week >= 4 && age_week <= 8 && 
      !is.na(mean_ci) && mean_ci < 1.5) {
    return("germination_delayed")
  }
  # Priority 5: GROWTH ON TRACK - positive operational status
  # Field is healthy, uniform, and growing steadily (no action needed)
  # Conditions: good uniformity (CV < 0.15) AND stable/positive weekly trend
  # References: CI_CHANGE_STABLE_THRESHOLD (±0.2 = stable, no change)
  if (!is.na(cv) && cv < 0.15 && 
      !is.na(four_week_trend) && four_week_trend >= CI_CHANGE_STABLE_THRESHOLD &&
      !is.na(weekly_ci_change) && weekly_ci_change >= CI_CHANGE_STABLE_THRESHOLD) {
    return("growth_on_track")
  }
  # Default: NORMAL GROWTH (no specific alert)
  # Field is growing but may have minor variability; continues normal monitoring
  NA_character_
 }
 #' Calculate yield prediction for CANE_SUPPLY workflows (Wrapper)
 #'
 #' This function wraps the shared yield prediction model from 80_utils_common.R
 #' to provide CANE_SUPPLY clients (e.g., ANGATA) with ML-based yield forecasting.
 #' 
 #' Uses Random Forest with Forward Feature Selection trained on:
 #'   - Cumulative Canopy Index (CI) from growth model
 #'   - Days After Harvest (DAH) / crop age
 #'   - CI-per-day (growth velocity)
 #'
 #' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
 #'   - Top 25%: High-yield fields
 #'   - Average: Mid-range yield fields
 #'   - Lowest 25%: Lower-yield fields
 #'
 #' @param field_boundaries_sf SF object with field geometries
 #' @param harvesting_data Data frame with harvest history (must have tonnage_ha column)
 #' @param cumulative_CI_vals_dir Directory with combined CI RDS files
 #'
 #' @return List with:
 #'   - summary: Data frame with field_groups, count, and yield quartile predictions
 #'   - field_results: Data frame with field-level forecasts (yield_forecast_t_ha in t/ha)
 #'
 #' @details
 #' **Data Requirements:**
 #'   - harvesting_data must include tonnage_ha column (yield in t/ha) for training
 #'   - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
 #'   - If either is missing, returns graceful fallback with NA values (not fake numbers)
 #'
 #' **Integration:**
 #'   This can be called from calculate_all_field_kpis() in cane_supply workflow to add
 #'   a new "Yield_Forecast" column to the 22-column KPI output.
 #'
 #' **Example:**
 #'   ```r
 #'   yield_result <- calculate_yield_prediction_kpi_cane_supply(
 #'     field_boundaries_sf, 
 #'     harvesting_data, 
 #'     file.path(data_dir, "combined_CI")
 #'   )
 #'   # yield_result$summary has quartiles
 #'   # yield_result$field_results has per-field forecasts
 #'   ```
 calculate_yield_prediction_kpi_cane_supply <- function(field_boundaries_sf, 
                                                       harvesting_data, 
                                                       cumulative_CI_vals_dir) {
  # Call the shared yield prediction function from 80_utils_common.R
  result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir)
  return(result)
 }
 #' Build complete per-field KPI dataframe with all 22 columns
 #' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats
 #' @param planting_dates data.frame with field_id and planting_date columns
 #' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL)
 #' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL)
 #' @param field_boundaries_sf sf object with field geometries
 #' @param end_date Date object for current report date
 #' @return data.frame with all 22 KPI columns
 calculate_all_field_kpis <- function(current_stats, 
                                      planting_dates, 
                                      imminent_prob_data, 
                                      gap_scores_df,
                                      field_boundaries_sf,
                                      end_date) {
  message("\nBuilding final field analysis output...")
  # Pre-calculate acreages
  acreage_lookup <- calculate_field_acreages(field_boundaries_sf)
  field_analysis_df <- current_stats %>%
    mutate(
      # Column 2: Farm_Section (user fills manually)
      Farm_Section = NA_character_,
      # Column 3: Field_name (from GeoJSON)
      Field_name = Field_id,
      # Column 4: Acreage (from geometry)
      Acreage = {
        acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
        if_else(is.na(acreages_vec), 0, acreages_vec)
      },
      # Column 8: Last_harvest_or_planting_date (from harvest.xlsx)
      Last_harvest_or_planting_date = {
        planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
      },
      # Column 9: Age_week (calculated)
      Age_week = {
        sapply(seq_len(nrow(current_stats)), function(idx) {
          calculate_age_week(Last_harvest_or_planting_date[idx], end_date)
        })
      },
      # Column 10: Phase (based on Age_week)
      Phase = sapply(Age_week, calculate_phase),
      # Column 12: Germination_progress (binned Pct_pixels_CI_gte_2)
      Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress),
      # Column 13: Imminent_prob (from script 31 or NA)
      Imminent_prob = {
        if (!is.null(imminent_prob_data)) {
          as.numeric(imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)])
        } else {
          rep(NA_real_, nrow(current_stats))
        }
      },
      # Column 14: Status_Alert (multi-priority logic for harvest/milling workflow)
      Status_Alert = {
        sapply(seq_len(nrow(current_stats)), function(idx) {
          calculate_status_alert(
            imminent_prob = Imminent_prob[idx],
            age_week = Age_week[idx],
            mean_ci = Mean_CI[idx],
            four_week_trend = Four_week_trend[idx],
            weekly_ci_change = Weekly_ci_change[idx],
            cv = CV[idx]
          )
        })
      },
      # Column 19b: CV_Trend_Long_Term_Category (categorical slope)
      CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term),
      # Column 21: Cloud_pct_clear (binned into intervals)
      Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage),
      # Column 22: Gap_score (2σ method)
      Gap_score = {
        if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
          gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
        } else {
          rep(NA_real_, nrow(current_stats))
        }
      }
    ) %>%
    select(
      all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
               "Last_harvest_or_planting_date", "Age_week", "Phase", 
               "Germination_progress",
               "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
               "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
               "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
    )
  message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns"))
  return(field_analysis_df)
 }
 #' Aggregate per-field data into farm-level KPI summary
 #'
 #' @param field_analysis_df data.frame with per-field KPI data
 #' @param current_week Numeric current week number
 #' @param current_year Numeric current year
 #' @param end_date Date object for current report date
 #' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats
 calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) {
  cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
  # Filter to only fields with actual data
  field_data <- field_analysis_df %>%
    filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
    filter(Acreage > 0)
  if (nrow(field_data) == 0) {
    message("No valid field data for farm-level aggregation")
    return(NULL)
  }
  farm_summary <- list()
  # 1. PHASE DISTRIBUTION
  phase_dist <- field_data %>%
    group_by(Phase) %>%
    summarise(
      num_fields = n(),
      acreage = sum(Acreage, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    rename(Category = Phase)
  farm_summary$phase_distribution <- phase_dist
  # 2. STATUS ALERT DISTRIBUTION
  status_dist <- field_data %>%
    group_by(Status_Alert) %>%
    summarise(
      num_fields = n(),
      acreage = sum(Acreage, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    rename(Category = Status_Alert)
  farm_summary$status_distribution <- status_dist
  # 3. CLOUD COVERAGE DISTRIBUTION
  cloud_dist <- field_data %>%
    group_by(Cloud_category) %>%
    summarise(
      num_fields = n(),
      acreage = sum(Acreage, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    rename(Category = Cloud_category)
  farm_summary$cloud_distribution <- cloud_dist
  # 4. OVERALL STATISTICS
  farm_summary$overall_stats <- data.frame(
    total_fields = nrow(field_data),
    total_acreage = sum(field_data$Acreage, na.rm = TRUE),
    mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
    median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
    mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
    week = current_week,
    year = current_year,
    date = as.character(end_date)
  )
  # Print summaries
  cat("\n--- PHASE DISTRIBUTION ---\n")
  print(phase_dist)
  cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
  print(status_dist)
  cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
  print(cloud_dist)
  cat("\n--- OVERALL FARM STATISTICS ---\n")
  print(farm_summary$overall_stats)
  return(farm_summary)
 }
 # ============================================================================
 # ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
 # ============================================================================
-#' Orchestrate ANGATA weekly field analysis and reporting
+#' Main orchestrator for CANE_SUPPLY per-field KPI workflow
 #'
-#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows.
+#' This function coordinates all KPI calculations for the per-field analysis workflow.
-#' Currently uses common utilities; future versions will add client-specific logic.
+#' It loads historical data, calculates current/previous week statistics, computes
 #' all 22 KPI columns, and aggregates farm-level summaries.
 #'
-#' @param field_boundaries_sf SF object with field geometries
+#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.)
-#' @param current_week ISO week number (1-53)
+#' @param client_config List with workflow configuration (script_91_compatible, outputs)
-#' @param current_year ISO week year
+#' @param end_date Date object for current report date
-#' @param mosaic_dir Directory containing weekly mosaics
+#' @param project_dir Character project identifier
-#' @param field_boundaries_path Path to field GeoJSON
+#' @param weekly_mosaic Character path to weekly mosaic directory
-#' @param harvesting_data Data frame with harvest data (optional)
+#' @param daily_vals_dir Character path to daily values directory
-#' @param output_dir Directory for exports
+#' @param field_boundaries_sf sf object with field geometries
-#' @param data_dir Base data directory
+#' @param data_dir Character path to data directory
-#'
+#' @return List with field_analysis_df, farm_kpi_results, export_paths
-#' @return List with field analysis results
+calculate_field_analysis_cane_supply <- function(setup,
-#'
+                                                 client_config,
-#' @details
+                                                 end_date,
-#' This function:
+                                                 project_dir,
-#' 1. Loads weekly mosaic and extracts field statistics
+                                                 weekly_mosaic,
-#' 2. Calculates field statistics (using common utilities)
+                                                 daily_vals_dir,
-#' 3. Prepares field analysis summary
+                                                 field_boundaries_sf,
-#' 4. Exports to Excel/CSV/RDS
+                                                 data_dir) {
 #' 5. (Future) Applies ANGATA-specific assessments
 #'
 calculate_field_analysis_cane_supply <- function(
    field_boundaries_sf,
    current_week,
    current_year,
    mosaic_dir,
    field_boundaries_path = NULL,
    harvesting_data = NULL,
    output_dir = file.path(PROJECT_DIR, "output"),
    data_dir = NULL
 ) {
-  message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============")
+  message("\n", strrep("=", 70))
  message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
  message(strrep("=", 70))
-  # Load current week mosaic
+  reports_dir <- file.path(setup$reports_dir, "kpis")
  message("Loading current week mosaic...")
  current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year)
-  if (is.null(current_mosaic)) {
+  # ========== PHASE 1: WEEKLY ANALYSIS SETUP ==========
-    warning(paste("Could not load current week mosaic for week", current_week, current_year))
+  message("\n", strrep("-", 70))
-    return(NULL)
+  message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
  message(strrep("-", 70))
  weeks <- calculate_week_numbers(end_date)
  current_week <- weeks$current_week
  current_year <- weeks$current_year
  previous_week <- weeks$previous_week
  previous_year <- weeks$previous_year
  message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))
  # Find per-field weekly mosaics
  message("Finding per-field weekly mosaics...")
  if (!dir.exists(weekly_mosaic)) {
    stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
               "\nScript 40 (mosaic creation) must be run first."))
  }
-  # Extract field statistics
+  field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
-  message("Extracting field statistics from current mosaic...")
+  field_dirs <- field_dirs[field_dirs != ""]
  field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
-  # Load previous week stats for comparison
+  if (length(field_dirs) == 0) {
-  message("Loading historical data for trends...")
+    stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
-  target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
+               "\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
  previous_stats <- NULL
  previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year)
  if (!is.null(previous_mosaic)) {
    previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
  }
-  # Calculate 4-week historical trend
+  # Verify we have mosaics for this week
-  message("Calculating field trends...")
+  single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
-  ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds")
+  per_field_files <- c()
  for (field in field_dirs) {
    field_mosaic_dir <- file.path(weekly_mosaic, field)
    files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
    if (length(files) > 0) {
      per_field_files <- c(per_field_files, files)
    }
  }
-  field_analysis <- calculate_field_statistics(
+  if (length(per_field_files) == 0) {
-    field_stats = field_stats,
+    stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
-    previous_stats = previous_stats,
+               "\nExpected pattern:", single_file_pattern,
               "\nChecked:", weekly_mosaic))
  }
  message(paste("  ✓ Found", length(per_field_files), "per-field weekly mosaics"))
  # ========== PHASE 2: LOAD HISTORICAL DATA ==========
  message("\nLoading historical field data for trend calculations...")
  num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
  message(paste("  Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
  allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
  historical_data <- load_historical_field_data(
    project_dir, current_week, current_year, reports_dir, 
    num_weeks = num_weeks_to_load, 
    auto_generate = allow_auto_gen,
    field_boundaries_sf = field_boundaries_sf,
    daily_vals_dir = daily_vals_dir
  )
  # ========== PHASE 3: LOAD PLANTING DATES ==========
  message("\nLoading harvest data from harvest.xlsx for planting dates...")
  # Use load_harvest_data() from 80_utils_common.R for consistency with 80_calculate_kpis.R
  harvesting_data <- load_harvest_data(data_dir)
  planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
  if (is.null(planting_dates) || nrow(planting_dates) == 0) {
    message("WARNING: No planting dates available. Using NA for all fields.")
    planting_dates <- data.frame(
      field_id = field_boundaries_sf$field,
      planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
      stringsAsFactors = FALSE
    )
  }
  # ========== PHASE 4: CALCULATE WEEKLY STATISTICS ==========
  message("\nUsing modular RDS-based approach for weekly statistics...")
  # Current week
  message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
  current_stats <- load_or_calculate_weekly_stats(
    week_num = current_week,
    year = current_year,
-    ci_rds_path = ci_rds_path,
+    project_dir = project_dir,
    field_boundaries_sf = field_boundaries_sf,
-    harvesting_data = harvesting_data
+    mosaic_dir = weekly_mosaic,
    reports_dir = reports_dir,
    report_date = end_date
  )
  message(paste("  ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
  # Previous week
  message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
  prev_report_date <- end_date - 7
  prev_stats <- load_or_calculate_weekly_stats(
    week_num = previous_week,
    year = previous_year,
    project_dir = project_dir,
    field_boundaries_sf = field_boundaries_sf,
    mosaic_dir = weekly_mosaic,
    reports_dir = reports_dir,
    report_date = prev_report_date
  )
  message(paste("  ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
  # ========== PHASE 5: CALCULATE TRENDS ==========
  message("\n3. Calculating trend columns...")
  current_stats <- calculate_kpi_trends(
    current_stats, prev_stats, 
    project_dir = project_dir,
    reports_dir = reports_dir,
    current_week = current_week,
    year = current_year
  )
  message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
  # ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
  message("\n4. Loading harvest probabilities from script 31...")
  # Use get_harvest_output_path() to safely build path (stored in kpi_reports_dir)
  harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year)
  message(paste("  Looking for:", harvest_prob_file))
  imminent_prob_data <- tryCatch({
    if (file.exists(harvest_prob_file)) {
      prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
                                 col_types = readr::cols(.default = readr::col_character()))
      message(paste("  ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
      prob_df %>%
        select(field, imminent_prob, detected_prob) %>%
        rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
    } else {
      message(paste("  INFO: Harvest probabilities not available (script 31 not run)"))
      NULL
    }
  }, error = function(e) {
    message(paste("  WARNING: Could not load harvest probabilities:", e$message))
    NULL
  })
  # ========== PHASE 7: CALCULATE GAP SCORES ==========
  gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf)
  # ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ==========
  field_analysis_df <- calculate_all_field_kpis(
    current_stats = current_stats,
    planting_dates = planting_dates,
    imminent_prob_data = imminent_prob_data,
    gap_scores_df = gap_scores_df,
    field_boundaries_sf = field_boundaries_sf,
    end_date = end_date
  )
-  if (is.null(field_analysis)) {
+  # ========== PHASE 9: EXPORT PER-FIELD RESULTS ==========
    message("Could not generate field analysis")
    return(NULL)
  }
  # Generate summary
  message("Generating summary statistics...")
  summary_df <- generate_field_analysis_summary(field_analysis)
  # Export
  message("Exporting field analysis...")
  export_paths <- export_field_analysis_excel(
-    field_analysis,
+    field_analysis_df,
-    summary_df,
+    NULL,
-    PROJECT_DIR,
+    project_dir,
    current_week,
    current_year,
-    output_dir
+    reports_dir
  )
-  message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n"))
+  # cat("\n--- Per-field Results (first 10) ---\n")
  # available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
  # available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
  # if (length(available_cols) > 0) {
  #   print(head(field_analysis_df[, available_cols], 10))
  # }
-  result <- list(
+  # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ==========
-    field_analysis = field_analysis,
+  # farm_kpi_results <- calculate_farm_level_kpis(
-    summary = summary_df,
+  #   field_analysis_df,
-    exports = export_paths
+  #   current_week,
-  )
+  #   current_year,
  #   end_date
  # )
-  return(result)
+  # For now, farm-level KPIs are not implemented in CANE_SUPPLY workflow
  farm_kpi_results <- NULL
  # ========== RETURN RESULTS ==========
  return(list(
    field_analysis_df = field_analysis_df,
    farm_kpi_results = farm_kpi_results,
    export_paths = export_paths
  ))
 }
 # ============================================================================
--- a/r_app/80_utils_common.R
+++ b/r_app/80_utils_common.R
@ -8,10 +8,34 @@
 #   - Field statistics extraction
 #   - Week/year calculations for consistent date handling
 #   - Excel/CSV/RDS export utilities
 #   - Yield prediction using ML models (Random Forest with Feature Selection)
 #
 # Used by: 80_calculate_kpis.R, all client-specific utils files
 #
 # NOTE: Libraries required by yield prediction (caret, CAST, here) are loaded
 #   in the main script 80_calculate_kpis.R, not here. This keeps dependencies
 #   centralized in the orchestrator script.
 # ============================================================================
 # ============================================================================
 # LOAD PROJECT CONFIGURATION (Guard against re-sourcing)
 # ============================================================================
 # Ensure parameters_project.R has been sourced to provide global configuration
 # (PROJECT, data_dir, field_boundaries_path, etc.). Use a sentinel to avoid double-sourcing.
 if (!exists("PROJECT", envir = .GlobalEnv)) {
  tryCatch({
    source(here::here("r_app", "parameters_project.R"))
  }, error = function(e) {
    # Fallback: try relative path if here() doesn't work
    tryCatch({
      source("parameters_project.R")
    }, error = function(e2) {
      warning(paste("Could not source parameters_project.R:", e2$message, 
                    "- using defaults or expecting caller to set PROJECT/data_dir"))
    })
  })
 }
 # ============================================================================
 # CONSTANTS (from 80_calculate_kpis.R)
 # ============================================================================
@ -355,12 +379,11 @@ calculate_cv_trend_long_term <- function(cv_values) {
 }
 #' Calculate Gap Filling Score KPI (2σ method)
-#' @param ci_raster Current week CI raster
+#' @param ci_raster Current week CI raster (single band)
-#' @param field_boundaries Field boundaries
+#' @param field_boundaries Field boundaries (sf or SpatVector)
-#' @return Data frame with field-level gap filling scores
+#' @return List with summary data frame and field-level results data frame
 calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
-  safe_log("Calculating Gap Filling Score KPI (placeholder)")
+  
  # Handle both sf and SpatVector inputs
  if (!inherits(field_boundaries, "SpatVector")) {
    field_boundaries_vect <- terra::vect(field_boundaries)
@ -368,19 +391,11 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
    field_boundaries_vect <- field_boundaries
  }
  # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions
  n_fields_vect <- length(field_boundaries_vect)
  n_fields_sf <- nrow(field_boundaries)
  if (n_fields_sf != n_fields_vect) {
    warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length."))
  }
  field_results <- data.frame()
  for (i in seq_len(nrow(field_boundaries))) {
-    field_name <- field_boundaries$field[i]
+    field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
-    sub_field_name <- field_boundaries$sub_field[i]
+    sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
    field_vect <- field_boundaries_vect[i]
    # Extract CI values using helper function
@ -391,16 +406,17 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
      # Gap score using 2σ below median to detect outliers
      median_ci <- median(valid_values)
      sd_ci <- sd(valid_values)
-      outlier_threshold <- median_ci - (2 * sd_ci)
+      outlier_threshold <- median_ci - (1 * sd_ci)
      low_ci_pixels <- sum(valid_values < outlier_threshold)
      total_pixels <- length(valid_values)
-      gap_score <- (low_ci_pixels / total_pixels) * 100
+      gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
      # Classify gap severity
      gap_level <- dplyr::case_when(
        gap_score < 10 ~ "Minimal",
        gap_score < 25 ~ "Moderate", 
-        TRUE ~ "Significant"
+        gap_score >= 25 ~ "Significant",
        TRUE ~ NA_character_
      )
      field_results <- rbind(field_results, data.frame(
@ -412,7 +428,6 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
        outlier_threshold = outlier_threshold
      ))
    } else {
      # Not enough valid data, fill with NA row
      field_results <- rbind(field_results, data.frame(
        field = field_name,
        sub_field = sub_field_name,
@ -423,9 +438,99 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
      ))
    }
  }
  # Summarize results
  gap_summary <- field_results %>%
    dplyr::group_by(gap_level) %>%
    dplyr::summarise(field_count = n(), .groups = 'drop') %>%
    dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
  return(list(summary = gap_summary, field_results = field_results))
 }
-
+#' Calculate gap filling scores for all per-field mosaics (wrapper)
 #' 
 #' This wrapper handles per-field mosaic structure by iterating over
 #' individual field files and calling the basic KPI function
 #' 
 #' @param per_field_files Character vector of paths to per-field mosaic TIFFs
 #' @param field_boundaries_sf sf object with field geometries
 #' @return data.frame with Field_id and gap_score columns
 calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
  message("\nCalculating gap filling scores (2σ method)...")
  message(paste("  Using per-field mosaics for", length(per_field_files), "fields"))
  field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
  process_gap_for_field <- function(field_file) {
    field_id <- basename(dirname(field_file))
    field_bounds <- field_boundaries_by_id[[field_id]]
    if (is.null(field_bounds) || nrow(field_bounds) == 0) {
      return(data.frame(Field_id = field_id, gap_score = NA_real_))
    }
    tryCatch({
      field_raster <- terra::rast(field_file)
      ci_band_name <- "CI"
      if (!(ci_band_name %in% names(field_raster))) {
        return(data.frame(Field_id = field_id, gap_score = NA_real_))
      }
      field_ci_band <- field_raster[[ci_band_name]]
      names(field_ci_band) <- "CI"
      gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
      if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
        return(data.frame(Field_id = field_id, gap_score = NA_real_))
      }
      gap_scores <- gap_result$field_results
      gap_scores$Field_id <- gap_scores$field
      gap_scores <- gap_scores[, c("Field_id", "gap_score")]
      stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
    }, error = function(e) {
      message(paste("  WARNING: Gap score failed for field", field_id, ":", e$message))
      data.frame(Field_id = field_id, gap_score = NA_real_)
    })
  }
  # Process fields sequentially with progress bar
  message("  Processing gap scores for ", length(per_field_files), " fields...")
  pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
  results_list <- lapply(seq_along(per_field_files), function(idx) {
    result <- process_gap_for_field(per_field_files[[idx]])
    utils::setTxtProgressBar(pb, idx)
    result
  })
  close(pb)
  gap_scores_df <- dplyr::bind_rows(results_list)
  if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
    gap_scores_df <- gap_scores_df %>%
      dplyr::group_by(Field_id) %>%
      dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
    message(paste("  ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
    # Guard against all-NA values which would produce Inf/-Inf warnings
    if (any(is.finite(gap_scores_df$gap_score))) {
      min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2)
      max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2)
      message(paste("    Gap score range:", min_score, "-", max_score, "%"))
    } else {
      message("    Gap score range: All values are NA (no valid gap scores)")
    }
  } else {
    message("  WARNING: No gap scores calculated from per-field mosaics")
    gap_scores_df <- NULL
  }
  return(gap_scores_df)
 }
 # ============================================================================
 # HELPER FUNCTIONS
@ -517,6 +622,88 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
  })
 }
 # ============================================================================
 # DATA LOADING HELPERS
 # ============================================================================
 #' Load and validate harvest data from harvest.xlsx
 #'
 #' Encapsulates harvest data loading with validation, type coercion, and error handling.
 #' Returns a data frame with required columns (field, year, tonnage_ha) or an empty 
 #' data frame with proper structure if loading fails.
 #'
 #' @param data_dir Path to data directory containing harvest.xlsx
 #'
 #' @return data.frame with columns: field (character), year (numeric), tonnage_ha (numeric)
 #'   - On success: data frame with N rows of harvest records
 #'   - On failure: empty data frame with correct structure (0 rows, 3 columns)
 #'
 #' @details
 #' **File Location**: Expected at `file.path(data_dir, "harvest.xlsx")`
 #'
 #' **Validation**:
 #'   - Checks file existence before reading
 #'   - Validates required columns: field, year, tonnage_ha
 #'   - Coerces year and tonnage_ha to numeric
 #'   - Logs status messages for debugging
 #'
 #' **Error Handling**:
 #'   - Missing file: Returns empty DF (logs NOTE)
 #'   - Missing columns: Returns empty DF (logs WARNING)
 #'   - Read errors: Returns empty DF (logs WARNING)
 #'   - Always returns valid data frame structure (won't return NULL)
 #'
 #' **Usage**:
 #'   ```r
 #'   harvesting_data <- load_harvest_data(setup$data_dir)
 #'   # harvesting_data is guaranteed to be a data.frame with 3 columns
 #'   # even if harvest.xlsx is unavailable or invalid
 #'   ```
 load_harvest_data <- function(data_dir) {
  harvesting_data <- NULL
  harvest_file <- file.path(data_dir, "harvest.xlsx")
  if (file.exists(harvest_file)) {
    tryCatch({
      harvesting_data <- readxl::read_excel(harvest_file)
      # Ensure required columns are present
      required_cols <- c("field", "year", "tonnage_ha")
      if (all(required_cols %in% names(harvesting_data))) {
        # Convert to data frame and ensure column types
        harvesting_data <- as.data.frame(harvesting_data)
        # CRITICAL: Coerce field to character to preserve leading zeros (e.g., "01", "02")
        harvesting_data$field <- as.character(harvesting_data$field)
        harvesting_data$year <- as.numeric(harvesting_data$year)
        harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha)
        message(paste("  ✓ Loaded harvest data:", nrow(harvesting_data), "records from harvest.xlsx"))
        return(harvesting_data)
      } else {
        message(paste("  WARNING: harvest.xlsx missing required columns. Expected: field, year, tonnage_ha"))
        harvesting_data <- NULL
      }
    }, error = function(e) {
      message(paste("  WARNING: Could not read harvest.xlsx:", e$message))
    })
  } else {
    message(paste("  NOTE: harvest.xlsx not found at", harvest_file))
  }
  # Fallback: create empty data frame if loading failed
  if (is.null(harvesting_data)) {
    message("  WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)")
    harvesting_data <- data.frame(
      field = character(),   # Explicitly character to preserve leading zeros when data is added
      year = numeric(),
      tonnage_ha = numeric(),
      stringsAsFactors = FALSE
    )
  }
  return(harvesting_data)
 }
 # ============================================================================
 # FIELD STATISTICS EXTRACTION
 # ============================================================================
@ -678,13 +865,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
    NULL
  }
-  output_subdir <- file.path(reports_dir, "field_analysis")
+  output_dir <- file.path(reports_dir)
-  if (!dir.exists(output_subdir)) {
+  if (!dir.exists(output_dir)) {
-    dir.create(output_subdir, recursive = TRUE)
+    dir.create(output_dir, recursive = TRUE)
  }
  excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
-  excel_path <- file.path(output_subdir, excel_filename)
+  excel_path <- file.path(output_dir, excel_filename)
  excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
  # Build sheets list dynamically
@ -709,14 +896,14 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
    )
  )
-  rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
+  rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds")
-  rds_path <- file.path(reports_dir, rds_filename)
+  rds_path <- file.path(output_dir, rds_filename)
  saveRDS(kpi_data, rds_path)
  message(paste("✓ Field analysis RDS exported to:", rds_path))
  csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
-  csv_path <- file.path(output_subdir, csv_filename)
+  csv_path <- file.path(output_dir, csv_filename)
  write_csv(field_df_rounded, csv_path)
  message(paste("✓ Field analysis CSV exported to:", csv_path))
@ -1304,7 +1491,7 @@ prepare_predictions <- function(predictions, newdata) {
    dplyr::mutate(
      sub_field = newdata$sub_field,
      field = newdata$field,
-      Age_days = newdata$DOY,
+      Age_days = newdata$DAH,
      total_CI = round(newdata$cumulative_CI, 0),
      predicted_Tcha = round(predicted_Tcha, 0),
      season = newdata$season
@ -1313,3 +1500,258 @@ prepare_predictions <- function(predictions, newdata) {
    dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
  )
 }
 # ============================================================================
 # YIELD PREDICTION KPI (SHARED ML-BASED MODEL FOR ALL CLIENT TYPES)
 # ============================================================================
 #' Helper function for graceful fallback when training data unavailable
 #'
 #' @param field_boundaries Field boundaries (sf or SpatVector)
 #' @return List with summary and field_results (both with NA values)
 create_fallback_result <- function(field_boundaries) {
  # Convert to SpatVector if needed (for terra::project)
  if (!inherits(field_boundaries, "SpatVector")) {
    field_boundaries <- terra::vect(field_boundaries)
  }
  field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection
  field_areas <- terra::expanse(field_boundaries_projected) / 10000  # Convert m² to hectares
  total_area <- sum(field_areas)
  summary_result <- data.frame(
    field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
    count = c(0, 0, 0, nrow(field_boundaries)),
    value = c(NA_real_, NA_real_, NA_real_, round(total_area, 1)),
    stringsAsFactors = FALSE
  )
  field_results <- data.frame(
    field = character(0),
    sub_field = character(0),
    Age_days = numeric(0),
    yield_forecast_t_ha = numeric(0),
    season = numeric(0),
    stringsAsFactors = FALSE
  )
  return(list(summary = summary_result, field_results = field_results))
 }
 #' Calculate yield prediction KPI using Random Forest with Feature Selection
 #'
 #' Trains a Random Forest model on historical harvest data with cumulative CI,
 #' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
 #' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
 #' 
 #' @param field_boundaries Field boundaries (sf or SpatVector)
 #' @param harvesting_data Data frame with harvest data including tonnage_ha column
 #' @param cumulative_CI_vals_dir Directory containing "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
 #'
 #' @return List with:
 #'   - summary: Data frame with field_groups, count, value (quartiles and total area)
 #'   - field_results: Data frame with field-level yield forecasts (yield_forecast_t_ha)
 #'
 #' @details
 #' **Training Data Requirements:**
 #'   - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
 #'   - harvesting_data must have tonnage_ha column with numeric yield values
 #'   - Training stops gracefully if either is missing (returns NA values, not fake numbers)
 #'
 #' **Model Specifications:**
 #'   - Algorithm: Random Forest (caret + CAST)
 #'   - Feature Selection: Forward Feature Selection (CAST::ffs)
 #'   - Cross-validation: 5-fold CV
 #'   - Predictors: cumulative_CI, DAH, CI_per_day
 #'   - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
 #'   - Output: Field-level yield forecasts grouped by quartile
 #'
 #' **Error Handling:**
 #'   - Missing tonnage_ha: Returns graceful fallback with NA (not zero) values
 #'   - No training data: Logs WARNING, returns empty field_results
 #'   - RDS file missing: Returns graceful fallback
 #'   - Prediction errors: Wrapped in tryCatch, returns fallback on failure
 calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) {
  safe_log("Calculating yield prediction KPI using Random Forest with Feature Selection")
  tryCatch({
    # Check if tonnage_ha column is present and has valid data
    if (is.null(harvesting_data) || 
        !("tonnage_ha" %in% names(harvesting_data)) || 
        all(is.na(harvesting_data$tonnage_ha))) {
      safe_log("No harvest data available: lacking tonnage_ha column or all values are NA", "WARNING")
      return(create_fallback_result(field_boundaries))
    }
    # Check if CI quadrant RDS file exists
    ci_quadrant_path <- file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
    if (!file.exists(ci_quadrant_path)) {
      safe_log(paste("CI quadrant file not found at:", ci_quadrant_path), "WARNING")
      return(create_fallback_result(field_boundaries))
    }
    # Load CI quadrant data and fill missing field/sub_field values
    CI_quadrant <- readRDS(ci_quadrant_path) %>%
      dplyr::group_by(model) %>%
      tidyr::fill(field, sub_field, .direction = "downup") %>%
      dplyr::ungroup()
    # Rename year column to season for consistency
    harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year)
    # Join CI and yield data
    CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, 
                                      by = c("field", "sub_field", "season")) %>%
      dplyr::group_by(sub_field, season) %>%
      dplyr::slice(which.max(DAH)) %>%
      dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
      dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_))
    # Define predictors and response variables
    predictors <- c("cumulative_CI", "DAH", "CI_per_day")
    response <- "tonnage_ha"
    # Prepare training dataset (fields with harvest data)
    CI_and_yield_test <- CI_and_yield %>%
      as.data.frame() %>%
      dplyr::filter(!is.na(tonnage_ha))
    # Check if we have minimum training data
    if (nrow(CI_and_yield_test) == 0) {
      safe_log("No training data available: no fields with tonnage_ha observations", "WARNING")
      return(create_fallback_result(field_boundaries))
    }
    # Pre-clean training data: remove rows with any NAs in predictors or response
    # This is required because CAST::ffs doesn't support na.rm parameter
    CI_and_yield_test <- CI_and_yield_test %>%
      dplyr::filter(!dplyr::if_any(dplyr::all_of(c(predictors, response)), is.na))
    if (nrow(CI_and_yield_test) == 0) {
      safe_log("No complete training data after removing NAs in predictors/response", "WARNING")
      return(create_fallback_result(field_boundaries))
    }
    # Prepare prediction dataset (fields without harvest data, mature fields only)
    prediction_yields <- CI_and_yield %>%
      as.data.frame() %>%
      dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD)  # Mature fields only
    # Configure model training parameters
    ctrl <- caret::trainControl(
      method = "cv",
      savePredictions = TRUE,
      allowParallel = TRUE,
      number = 5,
      verboseIter = FALSE
    )
    # Train the model with forward feature selection
    set.seed(202) # For reproducibility
    safe_log("Training Random Forest model with Forward Feature Selection...")
    model_ffs_rf <- CAST::ffs(
      CI_and_yield_test[, predictors],
      CI_and_yield_test[, response],
      method = "rf",
      trControl = ctrl,
      importance = TRUE,
      withinSE = TRUE,
      tuneLength = 5
    )
    # Predict yields on validation data (same as training data for RMSE calculation)
    pred_ffs_rf <- prepare_predictions(
      stats::predict(model_ffs_rf, newdata = CI_and_yield_test), 
      CI_and_yield_test
    )
    # Extract cross-validated RMSE from the model object (more honest than in-sample RMSE)
    # The CAST::ffs model stores CV results in $results data frame
    # We extract the RMSE from the best model (lowest RMSE across folds)
    if (!is.null(model_ffs_rf$results) && "RMSE" %in% names(model_ffs_rf$results)) {
      # Get minimum RMSE from cross-validation results (best model from feature selection)
      rmse_value <- min(model_ffs_rf$results$RMSE, na.rm = TRUE)
      safe_log(paste("Yield prediction RMSE (cross-validated):", round(rmse_value, 2), "t/ha"))
    } else {
      # Fallback: compute in-sample RMSE if CV results unavailable, but label it clearly
      rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_test$tonnage_ha)^2, na.rm = TRUE))
      safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
    }
    # Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
    if (nrow(prediction_yields) > 0) {
      pred_rf_current_season <- prepare_predictions(
        stats::predict(model_ffs_rf, newdata = prediction_yields), 
        prediction_yields
      ) %>%
        dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
        dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
    } else {
      pred_rf_current_season <- data.frame()
    }
    # Calculate summary statistics for KPI
    if (nrow(pred_rf_current_season) > 0) {
      safe_log(paste("Number of fields with yield predictions:", nrow(pred_rf_current_season)))
      safe_log(paste("Predicted yield range:", 
                     round(min(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
                     "-", 
                     round(max(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
                     "t/ha"))
      # Calculate quartiles for grouping
      yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, 
                                  probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
      # Count fields in each quartile
      top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE)
      average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & 
                           pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE)
      lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE)
      # Calculate total area
      if (!inherits(field_boundaries, "SpatVector")) {
        field_boundaries_vect <- terra::vect(field_boundaries)
      } else {
        field_boundaries_vect <- field_boundaries
      }
      # Handle both sf and SpatVector inputs for area calculation
      if (inherits(field_boundaries, "sf")) {
        field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933")
        field_areas <- sf::st_area(field_boundaries_projected) / 10000  # m² to hectares
      } else {
        field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933")
        field_areas <- terra::expanse(field_boundaries_projected) / 10000
      }
      total_area <- sum(as.numeric(field_areas))
      safe_log(paste("Total area:", round(total_area, 1), "hectares"))
      # Build summary result
      result <- data.frame(
        field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
        count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)),
        value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), 
                 round(yield_quartiles[1], 1), round(total_area, 1)),
        stringsAsFactors = FALSE
      )
      # Prepare field-level results
      field_level_results <- pred_rf_current_season %>%
        dplyr::select(field, Age_days, predicted_Tcha, season) %>%
        dplyr::rename(yield_forecast_t_ha = predicted_Tcha)
      safe_log("✓ Yield prediction complete")
      return(list(summary = result, field_results = field_level_results))
    } else {
      safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
      return(list(summary = create_fallback_result(field_boundaries)$summary, 
                 field_results = data.frame()))
    }
  }, error = function(e) {
    safe_log(paste("Error in yield prediction:", e$message), "ERROR")
    return(create_fallback_result(field_boundaries))
  })
 }
--- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
+++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
--- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd
+++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd
@ -2,7 +2,7 @@
 params:
  ref: "word-styles-reference-var1.docx"
  output_file: CI_report.docx
-  report_date: "2025-09-30"
+  report_date: "2026-02-04"
  data_dir: "angata"
  mail_day: "Wednesday"
  borders: FALSE
@ -61,6 +61,10 @@ suppressPackageStartupMessages({
  library(flextable)  # For formatted tables in Word output (professional table styling)
 })
 # Configure tmap for static plotting (required for legend.outside to work)
 tmap_mode("plot")  # CRITICAL: Must be "plot" mode for legends outside to render properly
 tmap_options(component.autoscale = FALSE)
 # Load custom utility functions
 tryCatch({
  source("r_app/report_utils.R")
@ -140,12 +144,12 @@ week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
 # Candidate filenames we expect (exact and common variants)
 expected_summary_names <- c(
-  paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
+  paste0(project_dir, "_field_analysis_", week_suffix, ".rds"),
-  paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
+  paste0(project_dir, "_field_analysis_", date_suffix, ".rds"),
-  paste0(project_dir, "_kpi_summary_tables.rds"),
+  paste0(project_dir, "_field_analysis.rds"),
-  "kpi_summary_tables.rds",
+  "field_analysis.rds",
-  paste0("kpi_summary_tables_", week_suffix, ".rds"),
+  paste0("field_analysis_", week_suffix, ".rds"),
-  paste0("kpi_summary_tables_", date_suffix, ".rds")
+  paste0("field_analysis_", date_suffix, ".rds")
 )
 expected_field_details_names <- c(
@ -165,13 +169,26 @@ try_load_from_dir <- function(dir, candidates) {
  return(NULL)
 }
-# Try primary directory first
+# Try primary directory first (field_level/)
 summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
 field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
-# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
+# If not found, try parent directory (kpis/) where RDS is often saved by Script 80
 if (is.null(summary_file) || is.null(field_details_file)) {
-  safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
+  parent_dir <- dirname(kpi_data_dir)  # One level up: reports/kpis/
  safe_log(paste("KPI files not found in", kpi_data_dir, "—trying parent directory:", parent_dir))
  if (is.null(summary_file)) {
    summary_file <- try_load_from_dir(parent_dir, expected_summary_names)
  }
  if (is.null(field_details_file)) {
    field_details_file <- try_load_from_dir(parent_dir, expected_field_details_names)
  }
 }
 # If still not found, perform a workspace-wide search (slower) limited to laravel_app storage
 if (is.null(summary_file) || is.null(field_details_file)) {
  safe_log(paste("KPI files not found in", kpi_data_dir, "or parent directory—searching workspace for RDS files"))
  # List rds files under laravel_app/storage/app recursively
  files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
  # Try to match by expected names
@ -483,7 +500,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
 ## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready  
-```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
+```{r overview_map, fig.width=7, fig.height=6, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
 # Create a hexbin overview map with ggplot
 tryCatch({
  # Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
@ -510,6 +527,8 @@ tryCatch({
  TARGET_CRS <- 4326  # WGS84 for web basemap compatibility (was 32736 UTM)
  # Process polygons into points
  # IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid 
  # st_centroid warnings about longitude/latitude data, then transform back to WGS84
  points_processed <- field_boundaries_sf %>%
    st_make_valid() %>%
    mutate(
@ -525,8 +544,9 @@ tryCatch({
      analysis_data %>% select(Field_id, Status_trigger),
      by = c("field" = "Field_id")
    ) %>%
-    st_transform(crs = TARGET_CRS) %>%
+    st_transform(crs = 32736) %>%  # UTM zone 36S (southern Africa)
    st_centroid() %>%
    st_transform(crs = TARGET_CRS) %>%
    bind_cols(st_coordinates(.))
  # Validate coordinates - check for NaN, Inf, or missing values
@ -553,30 +573,8 @@ tryCatch({
  labels_vec[length(labels_vec)] <- ">30"
  labels_vec[1] <- "0.1"
  # Create dummy point to anchor hexbin grids for consistency
  dummy_point <- data.frame(
    field = NA,
    sub_field = NA,
    area_ac = 0,
    Status_trigger = NA,
    X = min(points_processed$X, na.rm = TRUE),
    Y = min(points_processed$Y, na.rm = TRUE),
    geometry = NA
  )
  # Convert dummy point to sf and add xy coordinates
  dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
  dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
  # Mark dummy point with anchor flag before binding
  # Referenced: dummy_point, st_as_sf, st_coordinates, area_ac
  dummy_point$anchor_dummy <- TRUE
  # Add dummy point to ensure consistent hexbin grid anchoring
  points_ready <- rbind(points_ready, dummy_point)
  points_not_ready <- rbind(points_not_ready, dummy_point)
  # Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
  # Use actual data bounds without dummy points to avoid column mismatch
  x_limits <- c(
    floor(min(points_processed$X, na.rm = TRUE) * 20) / 20,  # Round down to avoid edge clipping
    ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20   # Round up for padding
@ -653,22 +651,17 @@ tryCatch({
      )
    )
  # Remove dummy point rows after grid anchoring to prevent dummy cells in plot
  # Referenced: points_ready, points_not_ready, anchor_dummy flag filtering
  points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE)
  points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE)
 }, error = function(e) {
  warning("Error creating hexbin map:", e$message)
 })
 ```
-\newpage
+
 ## 1.2 Key Performance Indicators 
 ```{r combined_kpi_table, echo=FALSE, results='asis'}
-# Create summary KPI table from field_analysis_summary data
+# Create consolidated KPI table from field_analysis data
-# This shows: Phases, Triggers, Area Change, and Total Farm acreage
+# Shows: Phases, Triggers, Area Change, Cloud Influence, and Total Farm
 if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
  # Load field analysis data
@ -679,24 +672,46 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
      !is.data.frame(summary_data$field_analysis_summary)) {
    # Create summary by aggregating by Status_Alert and Phase categories
    # This groups fields by their phase and status to show distribution
    phase_summary <- field_analysis_df %>%
      filter(!is.na(Phase)) %>%
      group_by(Phase) %>%
-      summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
+      summarise(
        Acreage = sum(Acreage, na.rm = TRUE),
        Field_count = n_distinct(Field_id),
        .groups = "drop"
      ) %>%
      mutate(Category = Phase) %>%
-      select(Category, Acreage)
+      select(Category, Acreage, Field_count)
-    # Try to create Status trigger summary  - use Status_Alert if available, otherwise use empty
+    # Create Status trigger summary - includes both active alerts and "No active triggers"
    trigger_summary <- tryCatch({
-      field_analysis_df %>%
+      # Active alerts (fields with non-NA Status_Alert)
      active_alerts <- field_analysis_df %>%
        filter(!is.na(Status_Alert), Status_Alert != "") %>%
        group_by(Status_Alert) %>%
-        summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
+        summarise(
          Acreage = sum(Acreage, na.rm = TRUE),
          Field_count = n_distinct(Field_id),
          .groups = "drop"
        ) %>%
        mutate(Category = Status_Alert) %>%
-        select(Category, Acreage)
+        select(Category, Acreage, Field_count)
      # No active triggers (fields with NA Status_Alert)
      no_alerts <- field_analysis_df %>%
        filter(is.na(Status_Alert) | Status_Alert == "") %>%
        summarise(
          Acreage = sum(Acreage, na.rm = TRUE),
          Field_count = n_distinct(Field_id),
          .groups = "drop"
        ) %>%
        mutate(Category = "No active triggers") %>%
        select(Category, Acreage, Field_count)
      # Combine active alerts and no-alert fields
      bind_rows(active_alerts, no_alerts)
    }, error = function(e) {
-      data.frame(Category = character(), Acreage = numeric())
+      data.frame(Category = character(), Acreage = numeric(), Field_count = numeric())
    })
    # Combine into summary
@ -709,25 +724,38 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
  # Phase names and trigger names to extract from summary
  phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
-  trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected", 
+  # Trigger names now include both active alerts AND "No active triggers" (calculated dynamically above)
-                     "Germination Complete", "Germination Started", "No Active Trigger", 
+  trigger_names <- c("harvest_ready", "harvested_bare", "stress_detected", 
-                     "Ready for harvest-check", "Strong decline in crop health", "Harvested/bare")
+                     "germination_delayed", "growth_on_track", "No active triggers")
  # Extract phase distribution - match on category names directly
  if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
    # Phase rows with field count
    phase_rows <- field_analysis_summary %>%
      filter(Category %in% phase_names) %>%
-      select(Category, Acreage) %>%
+      select(Category, Acreage, Field_count) %>%
      mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1)
-    # Extract status triggers - match on category names directly
+    # Trigger rows with field count
    trigger_rows <- field_analysis_summary %>%
      filter(Category %in% trigger_names) %>%
-      select(Category, Acreage) %>%
+      select(Category, Acreage, Field_count) %>%
-      mutate(KPI_Group = "STATUS TRIGGERS", .before = 1)
+      mutate(KPI_Group = "OPERATIONAL ALERTS", .before = 1)
    # If no triggers found, add a placeholder row
    if (nrow(trigger_rows) == 0) {
      trigger_rows <- data.frame(
        KPI_Group = "OPERATIONAL ALERTS",
        Category = "No active triggers",
        Acreage = 0,
        Field_count = 0,
        stringsAsFactors = FALSE
      )
    }
    # Calculate area change from field_analysis data
    total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE)
    total_fields <- n_distinct(field_analysis_df$Field_id)
    # Parse Weekly_ci_change to determine improvement/decline
    parse_ci_change <- function(change_str) {
@ -741,10 +769,20 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
    field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change)
-    improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE)
+    # Area change rows with field count
-    declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE)
+    improving_df <- field_analysis_df %>%
-    stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 & 
+      filter(ci_change_numeric > 0.2)
-                                                     field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE)
+    stable_df <- field_analysis_df %>%
      filter(ci_change_numeric >= -0.2 & ci_change_numeric <= 0.2)
    declining_df <- field_analysis_df %>%
      filter(ci_change_numeric < -0.2)
    improving_acreage <- sum(improving_df$Acreage, na.rm = TRUE)
    improving_field_count <- n_distinct(improving_df$Field_id)
    stable_acreage <- sum(stable_df$Acreage, na.rm = TRUE)
    stable_field_count <- n_distinct(stable_df$Field_id)
    declining_acreage <- sum(declining_df$Acreage, na.rm = TRUE)
    declining_field_count <- n_distinct(declining_df$Field_id)
    improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0)
    declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0)
@ -761,24 +799,54 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
      KPI_Group = "AREA CHANGE",
      Category = c("Improving", "Stable", "Declining"),
      Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)),
      Field_count = c(improving_field_count, stable_field_count, declining_field_count),
      Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")),
      stringsAsFactors = FALSE
    )
    # Cloud influence rows with field count - aggregate by Cloud_category
    cloud_rows <- tryCatch({
      field_analysis_df %>%
        filter(!is.na(Cloud_category)) %>%
        group_by(Cloud_category) %>%
        summarise(
          Acreage = sum(Acreage, na.rm = TRUE),
          Field_count = n_distinct(Field_id),
          .groups = "drop"
        ) %>%
        mutate(
          KPI_Group = "CLOUD INFLUENCE",
          Category = Cloud_category,
          Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"),
          Acreage = round(Acreage, 2)
        ) %>%
        select(KPI_Group, Category, Acreage, Field_count, Percent)
    }, error = function(e) {
      data.frame(
        KPI_Group = character(),
        Category = character(),
        Acreage = numeric(),
        Field_count = numeric(),
        Percent = character()
      )
    })
    # Total farm row
    total_row <- data.frame(
      KPI_Group = "TOTAL FARM",
      Category = "Total Acreage",
      Acreage = round(total_acreage, 2),
      Field_count = total_fields,
      Percent = "100%",
      stringsAsFactors = FALSE
    )
-    # Combine all rows with percentages for all
+    # Combine all rows
    combined_df <- bind_rows(
      phase_pcts,
      trigger_pcts,
      area_change_rows,
      cloud_rows,
      total_row
    )
@ -789,7 +857,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
        KPI_display = if_else(row_number() == 1, KPI_Group, "")
      ) %>%
      ungroup() %>%
-      select(KPI_display, Category, Acreage, Percent)
+      select(KPI_display, Category, Acreage, Percent, Field_count)
    # Render as flextable with merged cells
    ft <- flextable(combined_df) %>%
@ -797,7 +865,8 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
        KPI_display = "KPI Category",
        Category = "Item",
        Acreage = "Acreage",
-        Percent = "Percent"
+        Percent = "Percentage of total fields",
        Field_count = "# Fields"
      ) %>%
      merge_v(j = "KPI_display") %>%
      autofit()
@ -807,8 +876,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
    phase_count <- nrow(phase_rows)
    trigger_count <- nrow(trigger_rows)
    area_count <- nrow(area_change_rows)
    cloud_count <- nrow(cloud_rows)
-    # Add lines after phases, triggers, and area change groups (before totals)
+    # Add lines after phases, triggers, area change, and cloud groups (before totals)
    if (phase_count > 0) {
      ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1))
    }
@ -818,6 +888,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
    if (area_count > 0) {
      ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1))
    }
    if (cloud_count > 0) {
      ft <- ft %>% hline(i = phase_count + trigger_count + area_count + cloud_count, border = officer::fp_border(width = 1))
    }
    ft
  } else {
@ -828,40 +901,6 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
 }
 ```
 ## Cloud Coverage Summary
 ```{r cloud_coverage_summary, echo=FALSE}
 # Display cloud coverage summary aggregated by category
 # Cloud coverage data is included in the field_analysis RDS from Script 80
 if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
  field_analysis_df <- summary_data$field_analysis
  # Aggregate cloud coverage by category
  cloud_summary <- field_analysis_df %>%
    filter(!is.na(Cloud_category)) %>%
    group_by(Cloud_category) %>%
    summarise(
      "Number of Fields" = n(),
      "Total Acreage" = round(sum(Acreage, na.rm = TRUE), 1),
      .groups = "drop"
    ) %>%
    rename("Cloud Category" = Cloud_category) %>%
    arrange(`Cloud Category`)  
  if (nrow(cloud_summary) > 0) {
    # Create flextable
    ft <- flextable(cloud_summary) %>%
      autofit() %>%
      theme_vanilla()
    ft
  } else {
    cat("Cloud coverage data not available for summary.\n")
  }
 } else {
  cat("Field analysis data not available for cloud coverage summary.\n")
 }
 ```
 ```{r data, message=TRUE, warning=TRUE, include=FALSE}
 # All data comes from the field analysis performed in 09_field_analysis_weekly.R
 # The report renders KPI tables and field summaries from that data
@ -951,9 +990,9 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
-```{r ci_fig, echo=FALSE, fig.align='right', out.width='40%', fig.cap="Chlorophyll Index Example"}
+<div align="center">
-knitr::include_graphics("CI_graph_example.png")
+![Chlorophyll Index Example](CI_graph_example.png)
-```
+</div>
 ### Data File Structure and Columns
@ -1025,15 +1064,42 @@ Both algorithms are not always in sync, and can have contradictory results. Wide
 ## Report Metadata
 ```{r report_metadata, echo=FALSE}
 # Calculate total area from field analysis data
 total_area_acres <- 0
 if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
  total_area_acres <- sum(summary_data$field_analysis$Acreage, na.rm = TRUE)
 }
 # Calculate total fields
 total_fields_count <- 0
 if (exists("AllPivots0")) {
  total_fields_count <- nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise())
 } else if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
  total_fields_count <- n_distinct(summary_data$field_analysis$Field_id)
 }
 metadata_info <- data.frame(
-  Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"),
+  Metric = c(
    "Report Generated", 
    "Data Source", 
    "Analysis Period", 
    "Total Fields [number]",
    "Total Area [acres]",
    "Next Update",
    "Service provided",
    "Starting date service"
  ),
  Value = c(
    format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
    paste("Project", toupper(project_dir)),
    paste("Week", current_week, "of", year),
-    ifelse(exists("AllPivots0"), nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise()), "Unknown"),
+    ifelse(total_fields_count > 0, total_fields_count, "Unknown"),
-    "Next Wednesday"
+    ifelse(total_area_acres > 0, round(total_area_acres, 0), "Unknown"),
-  )
+    "Next Wednesday",
    "Cane Supply Office - Weekly",
    "23 dec 2025"
  ),
  stringsAsFactors = FALSE
 )
 ft <- flextable(metadata_info) %>%
@ -1043,4 +1109,4 @@ ft <- flextable(metadata_info) %>%
 ft
 ```
-*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team.*
+*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team at info@smartcane.ag.*
--- a/r_app/MANUAL_PIPELINE_RUNNER.R
+++ b/r_app/MANUAL_PIPELINE_RUNNER.R
@ -239,7 +239,7 @@
 #
 # OUTPUT:
 #   - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
-#   - Columns: field, sub_field, Date, FitData, DOY, value
+#   - Columns: field, sub_field, Date, FitData, DAH, value
 #
 # PARAMETERS:
 #   PROJECT:  angata, chemba, xinavane, esa, simba
@ -438,8 +438,8 @@
 #   rmarkdown::render(
 rmarkdown::render(
  "r_app/90_CI_report_with_kpis_agronomic_support.Rmd",
-  params = list(data_dir = "aura", report_date = as.Date("2022-12-08")),
+  params = list(data_dir = "aura", report_date = as.Date("2026-02-04")),
-  output_file = "SmartCane_Report_agronomic_support_aura_2022-12-08.docx",
+  output_file = "SmartCane_Report_agronomic_support_aura_2026-02-04.docx",
  output_dir = "laravel_app/storage/app/aura/reports"
 )
 #
@ -450,7 +450,7 @@ rmarkdown::render(
 rmarkdown::render(
  "r_app/91_CI_report_with_kpis_cane_supply.Rmd",
  params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
-  output_file = "SmartCane_Report_basemap_test.docx",
+  output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
  output_dir = "laravel_app/storage/app/angata/reports"
 )
 #
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@ -33,8 +33,14 @@ suppressPackageStartupMessages({
 })
 # ==============================================================================
-# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION
+# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
 # ==============================================================================
 # Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
 # Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
 # ~240 days ≈ 8 months, typical sugarcane maturity window
 DAH_MATURITY_THRESHOLD <- 240
 # Maps project names to client types for pipeline control
 # This determines which scripts run and what outputs they produce
@ -45,7 +51,8 @@ CLIENT_TYPE_MAP <- list(
  "esa" = "agronomic_support",
  "simba" = "agronomic_support",
  "john" = "agronomic_support",
-  "huss" = "agronomic_support"
+  "huss" = "agronomic_support",
  "aura" = "agronomic_support"
 )
 #' Get client type for a project
@ -162,13 +169,11 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
  # TIER 5: MOSAICS (Script 40 output)
  weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
-  field_tiles_ci_dir <- here(weekly_mosaic_dir, "field_tiles_CI")
+  weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max")
  # TIER 6: KPI & REPORTING (Scripts 80/90/91 output)
  reports_dir <- here(laravel_storage_dir, "reports")
-  kpi_reports_dir <- here(reports_dir, "kpis", "field_level")
+  kpi_reports_dir <- here(reports_dir, "kpis")
  kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats")
  kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis")
  # TIER 7: SUPPORT (various scripts)
  vrt_dir <- here(data_dir, "vrt")  # Virtual Raster files created during CI extraction
@ -180,8 +185,8 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
    merged_tif_folder, field_tiles_dir, field_tiles_ci_dir,
    extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
    growth_model_interpolated_dir,
-    weekly_mosaic_dir,  field_tiles_ci_dir, 
+    weekly_mosaic_dir, weekly_tile_max_dir,
-    reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
+    reports_dir, kpi_reports_dir,
    data_dir, vrt_dir, harvest_dir, log_dir
  )
@ -215,13 +220,11 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
    # TIER 5: Mosaics
    weekly_mosaic_dir = weekly_mosaic_dir,
-    field_tiles_ci_dir = field_tiles_ci_dir,
+    weekly_tile_max_dir = weekly_tile_max_dir,
    # TIER 6: KPI & reporting
    reports_dir = reports_dir,
    kpi_reports_dir = kpi_reports_dir,
    kpi_field_stats_dir = kpi_field_stats_dir,
    kpi_field_analysis_dir = kpi_field_analysis_dir,
    # TIER 7: Support
    data_dir = data_dir,
@ -511,7 +514,63 @@ setup_logging <- function(log_dir) {
 # ==============================================================================
 # Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths
 #' Detect mosaic mode from project structure
 #'
 #' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics
 #'
 #' @param project_dir Character. Project name
 #' @return Character. "tiled" or "single-file"
 detect_mosaic_mode <- function(project_dir) {
  # Per-field architecture is standard - always return "single-file"
  # unless weekly_tile_max directory exists with content
  mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
  if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) {
    return("tiled")
  }
  return("single-file")
 }
 #' Detect grid size from tile directory structure
 #'
 #' For per-field architecture, returns "unknown" (grid-based tiling is legacy)
 #'
 #' @param project_dir Character. Project name
 #' @return Character. Grid size ("unknown" for per-field)
 detect_grid_size <- function(project_dir) {
  # Per-field architecture doesn't use grid-based organization
  return("unknown")
 }
 #' Get project storage path
 #'
 #' @param project_dir Character. Project name
 #' @param subdir Character. Optional subdirectory (default NULL)
 #' @return Character. Full path
 get_project_storage_path <- function(project_dir, subdir = NULL) {
  path <- file.path("laravel_app", "storage", "app", project_dir)
  if (!is.null(subdir)) {
    path <- file.path(path, subdir)
  }
  return(path)
 }
 #' Get mosaic directory
 #'
 #' @param project_dir Character. Project name
 #' @param mosaic_mode Character. "tiled" or "single-file"
 #' @return Character. Full path to mosaic directory
 get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
  if (mosaic_mode == "auto") {
    mosaic_mode <- detect_mosaic_mode(project_dir)
  }
  if (mosaic_mode == "tiled") {
    get_project_storage_path(project_dir, "weekly_tile_max")
  } else {
    get_project_storage_path(project_dir, "weekly_mosaic")
  }
 }
 #' Get KPI directory based on client type
 #'
@ -552,6 +611,20 @@ check_harvest_output_exists <- function(project_dir, week_num, year_num) {
  file.exists(path)
 }
 #' Get mosaic verification directory
 #'
 #' @param project_dir Character. Project name
 #' @param mosaic_mode Character. "tiled" or "single-file"
 #' @return Character. Full path to mosaic directory
 get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
  base <- file.path("laravel_app", "storage", "app", project_dir)
  if (mosaic_mode == "tiled") {
    file.path(base, "weekly_tile_max")
  } else {
    file.path(base, "weekly_mosaic")
  }
 }
 #' Check if mosaic files exist for a specific week
 #'
--- a/r_app/report_utils.R
+++ b/r_app/report_utils.R
@ -24,7 +24,7 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
                      "\n`","``
  ")
-  cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE))
+  cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE)) 
 } 
 #' Creates a Chlorophyll Index map for a pivot
@ -34,12 +34,13 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
 #' @param pivot_spans Additional boundary data for the field
 #' @param show_legend Whether to show the legend (default: FALSE)
 #' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
 #' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom")
 #' @param week Week number to display in the title
 #' @param age Age of the crop in weeks
 #' @param borders Whether to display field borders (default: FALSE)
 #' @return A tmap object with the CI map
 #' 
-create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, colorblind = FALSE){
+create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week, age, borders = FALSE, colorblind = FALSE){
  # Input validation
  if (missing(pivot_raster) || is.null(pivot_raster)) {
    stop("pivot_raster is required")
@ -64,25 +65,29 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
  map <- tm_shape(pivot_raster, unit = "m")
  # Add raster with continuous spectrum (fixed scale 8-1 for consistent comparison, reversed)
-  map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
+  map <- map + tm_raster(
-                                                       limits = c(1,8)),
+    "CI",
-                          col.legend = tm_legend(title = "CI",
+    col.scale = tm_scale_continuous(
-                                               orientation = if(legend_is_portrait) "portrait" else "landscape", 
+      values = palette,
-                                               show = show_legend,
+      limits = c(1, 8),
-                                               position = if(show_legend) tm_pos_out("left", "center") else c("left", "bottom"),
+      ticks = seq(1, 8, by = 1),
-                                               reverse = TRUE
+      outliers.trunc = c(TRUE, TRUE)
-                                               ))
+    ),
    col.legend = tm_legend(
      title = "CI",
      orientation = if (legend_is_portrait) "portrait" else "landscape",
      show = show_legend,
      position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"),
      reverse = TRUE
    )
  )
    # Add layout elements
-  map <- map + tm_title(text = paste0("Max CI week ", week,"\n",  age, " weeks (", age * 7, " days) old"),
+  map <- map + tm_layout(
-                        size = 0.7)
+    main.title = paste0("Max CI week ", week,"\n",  age, " weeks (", age * 7, " days) old"),
-  # Add layout configuration to prevent legend rescaling
+    main.title.size = 0.7,
-  map <- map + tm_layout(legend.position = c("left", "bottom"),
+    #legend.height = 0.85,  # Constrain vertical legend height to not exceed map
-                         legend.outside = FALSE,
+    asp = 1  # Fixed aspect ratio
-                         inner.margins = 0.05,
+  )
                         asp = 1)  # Force 1:1 aspect ratio for consistent sizing
  # Add bounds/view settings for fixed aspect ratio
  map <- map + tm_view(asp = 1)
  # Add borders if requested
  if (borders) {
@ -104,13 +109,14 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
 #' @param pivot_spans Additional boundary data for the field
 #' @param show_legend Whether to show the legend (default: FALSE)
 #' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
 #' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom")
 #' @param week_1 First week number for comparison
 #' @param week_2 Second week number for comparison
 #' @param age Age of the crop in weeks
 #' @param borders Whether to display field borders (default: TRUE)
 #' @return A tmap object with the CI difference map
 #'
-create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, colorblind = FALSE){
+create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week_1, week_2, age, borders = TRUE, colorblind = FALSE){
  # Input validation
  if (missing(pivot_raster) || is.null(pivot_raster)) {
    stop("pivot_raster is required")
@ -135,26 +141,30 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
  map <- tm_shape(pivot_raster, unit = "m")
  # Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale, reversed)
-  map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
+  map <- map + tm_raster(
-                                                       midpoint = 0,
+    "CI",
-                                                       limits = c(-3, 3)),
+    col.scale = tm_scale_continuous(
-                          col.legend = tm_legend(title = "CI diff.",
+      values = palette,
-                                               orientation = if(legend_is_portrait) "portrait" else "landscape",
+      limits = c(-3, 3),
-                                               show = show_legend,
+      ticks = seq(-3, 3, by = 1),
-                                               position = if(show_legend) tm_pos_out("right", "center") else c("left", "bottom"),
+      midpoint = 0,
-                                               reverse = TRUE
+      outliers.trunc = c(TRUE, TRUE)
-                                               ))
+    ),
    col.legend = tm_legend(
      title = "CI diff.",
      orientation = if (legend_is_portrait) "portrait" else "landscape",
      show = show_legend,
      position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"),
      reverse = TRUE
    )
  )
    # Add layout elements
-  map <- map + tm_title(text = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"),
+  map <- map + tm_layout(
-                        size = 0.7)
+    main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"),
-  # Add layout configuration to prevent legend rescaling
+    main.title.size = 0.7,
-  map <- map + tm_layout(legend.position = c("right", "bottom"),
+    #legend.height = 0.85,  # Constrain vertical legend height to not exceed map
-                         legend.outside = FALSE,
+    asp = 1  # Fixed aspect ratio
-                         inner.margins = 0.05,
+  )
                         asp = 1)  # Force 1:1 aspect ratio for consistent sizing
  # Add bounds/view settings for fixed aspect ratio
  map <- map + tm_view(asp = 1)
  # Add borders if requested
  if (borders) {
@ -269,18 +279,16 @@ ci_plot <- function(pivotName,
    # Create historical maps only if data is available
    # Build list with all available maps - order matches original: [m2, m1, current, diff_1w, diff_3w]
    # Widths match original hardcoded: c(0.23, 0.18, 0.18, 0.18, 0.23)
    maps_to_arrange <- list()
    widths_to_use <- c()
    field_heading_note <- ""
    # Try to create 2-week ago map (legend on left)
    if (!is.null(singlePivot_m2)) {
      CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2, 
                                show_legend = TRUE, legend_is_portrait = TRUE, 
                                legend_position = "left",
                                week = week_minus_2, age = age - 2, borders = borders, colorblind = colorblind_friendly)
      maps_to_arrange <- c(maps_to_arrange, list(CImap_m2))
      widths_to_use <- c(widths_to_use, 0.24)
    }
    # Try to create 1-week ago map
@ -289,12 +297,10 @@ ci_plot <- function(pivotName,
                                show_legend = FALSE, legend_is_portrait = FALSE, 
                                week = week_minus_1, age = age - 1, borders = borders, colorblind = colorblind_friendly)
      maps_to_arrange <- c(maps_to_arrange, list(CImap_m1))
      widths_to_use <- c(widths_to_use, 0.17)
    }
    # Always add current week map (center position)
    maps_to_arrange <- c(maps_to_arrange, list(CImap))
    widths_to_use <- c(widths_to_use, 0.17)
    # Try to create 1-week difference map
    if (!is.null(abs_CI_last_week)) {
@ -302,21 +308,17 @@ ci_plot <- function(pivotName,
                                                show_legend = FALSE, legend_is_portrait = FALSE, 
                                                week_1 = week, week_2 = week_minus_1, age = age, borders = borders, colorblind = colorblind_friendly)
      maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_last_week))
      widths_to_use <- c(widths_to_use, 0.17)
    }
    # Try to create 3-week difference map (legend on right)
    if (!is.null(abs_CI_three_week)) {
      CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2, 
                                                 show_legend = TRUE, legend_is_portrait = TRUE, 
                                                 legend_position = "right",
                                                 week_1 = week, week_2 = week_minus_3, age = age, borders = borders, colorblind = colorblind_friendly)
      maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_three_week))
      widths_to_use <- c(widths_to_use, 0.24)
    }
    # Normalize widths to sum to 1
    widths_to_use <- widths_to_use / sum(widths_to_use)
    # Add note if historical data is limited
    if (length(maps_to_arrange) == 1) {
      field_heading_note <- " (Current week only - historical data not yet available)"
@ -324,8 +326,21 @@ ci_plot <- function(pivotName,
      field_heading_note <- " (Limited historical data)"
    }
-    # Arrange the maps with normalized widths
+    # Arrange the maps in a row with more width for first and last (for legends)
-    tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths_to_use)))
+    # Give maps with legends (1st and 5th) more space: 23%, middle maps get 18% each
    widths <- if (length(maps_to_arrange) == 5) {
      c(0.23, 0.18, 0.18, 0.18, 0.23)
    } else if (length(maps_to_arrange) == 4) {
      c(0.25, 0.25, 0.25, 0.25)  # Equal if only 4 maps
    } else if (length(maps_to_arrange) == 3) {
      c(0.33, 0.33, 0.34)  # Equal if only 3 maps
    } else if (length(maps_to_arrange) == 2) {
      c(0.5, 0.5)  # Equal if only 2 maps
    } else {
      NULL  # Single map or other cases
    }
    tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths)))
    # Output heading and map to R Markdown
    age_months <- round(age / 4.348, 1)
@ -342,10 +357,10 @@ ci_plot <- function(pivotName,
 #' Creates a plot showing Chlorophyll Index data over time for a pivot field
 #'
 #' @param pivotName The name or ID of the pivot field to visualize
-#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns
+#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns
 #' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
 #' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
-#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days")
+#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days")
 #' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
 #' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
 #' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
@ -378,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
    # Process data
    data_ci2 <- data_ci %>% 
-      dplyr::mutate(CI_rate = cumulative_CI / DOY,
+      dplyr::mutate(CI_rate = cumulative_CI / DAH,
             week = lubridate::week(Date)) %>% 
      dplyr::group_by(field) %>%
      dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
@ -433,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
      # Determine x-axis variable based on x_unit parameter
      x_var <- if (x_unit == "days") {
-        if (facet_on) "Date" else "DOY"
+        if (facet_on) "Date" else "DAH"
      } else {
        "week"
      }
@ -443,14 +458,21 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
                       "weeks" = "Week Number")
      # Calculate dynamic max values for breaks
-      max_doy <- max(plot_data$DOY, na.rm = TRUE) + 20
+      max_dah <- max(plot_data$DAH, na.rm = TRUE) + 20
      max_week <- max(as.numeric(plot_data$week), na.rm = TRUE) + ceiling(20 / 7)
-      # Create plot with either facets by season or overlay by DOY/week
+      # Create plot with either facets by season or overlay by DAH/week
      if (facet_on) {
        g <- ggplot2::ggplot(data = plot_data) +
          ggplot2::facet_wrap(~season, scales = "free_x") +
-          ggplot2::geom_line(ggplot2::aes_string(x = x_var, y = "ci_value", col = "sub_field", group = "sub_field")) +
+          ggplot2::geom_line(
            ggplot2::aes(
              x = .data[[x_var]],
              y = .data[["ci_value"]],
              col = .data[["sub_field"]],
              group = .data[["sub_field"]]
            )
          ) +
          ggplot2::labs(title = paste("Plot of", y_label),
               color = "Field Name",
               y = y_label,
@ -460,10 +482,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
                                                   breaks = scales::breaks_pretty(),
                                                   labels = function(x) round(as.numeric(x - min(x)) / 30.44, 1))) +
          ggplot2::theme_minimal() +
-          ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
+              ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
                axis.text.x.top = ggplot2::element_text(hjust = 0.5),
                axis.title.x.top = ggplot2::element_text(size = 8),
-                legend.justification = c(1, 0), legend.position = c(1, 0),
+                legend.justification = c(1, 0),
                legend.position = "inside",
                legend.position.inside = c(1, 0),
                legend.title = ggplot2::element_text(size = 8),
                legend.text = ggplot2::element_text(size = 8)) +
          ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
@ -482,32 +506,46 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
              benchmark_subset <- benchmark_data %>% 
                dplyr::filter(ci_type == ci_type_filter) %>%
                dplyr::mutate(
-                  benchmark_x = if (x_var == "DOY") {
+                  benchmark_x = if (x_var == "DAH") {
-                    DOY
+                    DAH
                  } else if (x_var == "week") {
-                    DOY / 7  # Approximate conversion
+                    DAH / 7  # Approximate conversion
                  } else {
-                    DOY  # For Date, use DOY as is (may not align perfectly)
+                    DAH  # For Date, use DAH as is (may not align perfectly)
                  }
                )
              ggplot2::geom_smooth(
                data = benchmark_subset,
-                ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
+                ggplot2::aes(
-                color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
+                  x = .data[["benchmark_x"]],
                  y = .data[["benchmark_value"]],
                  group = factor(.data[["percentile"]])
                ),
                color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE
              )
            }
          } +
          # Plot older seasons with lighter lines
          ggplot2::geom_line(
            data = plot_data %>% dplyr::filter(!is_latest),
-            ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
+            ggplot2::aes(
-            size = 0.7, alpha = 0.4
+              x = .data[[x_var]],
              y = .data[["ci_value"]],
              col = .data[["season"]],
              group = .data[["season"]]
            ),
            linewidth = 0.7, alpha = 0.4
          ) +
          # Plot latest season with thicker, more prominent line
          ggplot2::geom_line(
            data = plot_data %>% dplyr::filter(is_latest),
-            ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
+            ggplot2::aes(
-            size = 1.5, alpha = 1
+              x = .data[[x_var]],
              y = .data[["ci_value"]],
              col = .data[["season"]],
              group = .data[["season"]]
            ),
            linewidth = 1.5, alpha = 1
          ) +
          ggplot2::labs(title = paste("Plot of", y_label, "for Field", pivotName, title_suffix),
               color = "Season",
@ -515,17 +553,19 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
               x = x_label) +
          color_scale +
          {
-            if (x_var == "DOY") {
+            if (x_var == "DAH") {
-              ggplot2::scale_x_continuous(breaks = seq(0, max_doy, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
+              ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
            } else if (x_var == "week") {
              ggplot2::scale_x_continuous(breaks = seq(0, max_week, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
            }
          } +
          ggplot2::theme_minimal() +
-          ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
+              ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
                axis.text.x.top = ggplot2::element_text(hjust = 0.5),
                axis.title.x.top = ggplot2::element_text(size = 8),
-                legend.justification = c(1, 0), legend.position = c(1, 0),
+                legend.justification = c(1, 0),
                legend.position = "inside",
                legend.position.inside = c(1, 0),
                legend.title = ggplot2::element_text(size = 8),
                legend.text = ggplot2::element_text(size = 8)) +
          ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
@ -561,7 +601,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
      # Determine x-axis variable based on x_unit parameter
      x_var <- if (x_unit == "days") {
-        if (facet_on) "Date" else "DOY"
+        if (facet_on) "Date" else "DAH"
      } else {
        "week"
      }
@ -578,7 +618,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
      }
      # Calculate dynamic max values for breaks
-      max_doy_both <- max(plot_data_both$DOY, na.rm = TRUE) + 20
+      max_dah_both <- max(plot_data_both$DAH, na.rm = TRUE) + 20
      max_week_both <- max(as.numeric(plot_data_both$week), na.rm = TRUE) + ceiling(20 / 7)
      # Create the faceted plot
@ -588,12 +628,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
          if (!is.null(benchmark_data)) {
            benchmark_subset <- benchmark_data %>%
              dplyr::mutate(
-                benchmark_x = if (x_var == "DOY") {
+                benchmark_x = if (x_var == "DAH") {
-                  DOY
+                  DAH
                } else if (x_var == "week") {
-                  DOY / 7
+                  DAH / 7
                } else {
-                  DOY
+                  DAH
                },
                ci_type_label = case_when(
                  ci_type == "value" ~ "10-Day Rolling Mean CI",
@ -603,8 +643,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
              )
            ggplot2::geom_smooth(
              data = benchmark_subset,
-              ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
+              ggplot2::aes(
-              color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
+                x = .data[["benchmark_x"]],
                y = .data[["benchmark_value"]],
                group = factor(.data[["percentile"]])
              ),
              color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE
            )
          }
        } +
@ -612,14 +656,24 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
        # Plot older seasons with lighter lines
        ggplot2::geom_line(
          data = plot_data_both %>% dplyr::filter(!is_latest),
-          ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
+          ggplot2::aes(
-          size = 0.7, alpha = 0.4
+            x = .data[[x_var]],
            y = .data[["ci_value"]],
            col = .data[["season"]],
            group = .data[["season"]]
          ),
          linewidth = 0.7, alpha = 0.4
        ) +
        # Plot latest season with thicker, more prominent line
        ggplot2::geom_line(
          data = plot_data_both %>% dplyr::filter(is_latest),
-          ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
+          ggplot2::aes(
-          size = 1.5, alpha = 1
+            x = .data[[x_var]],
            y = .data[["ci_value"]],
            col = .data[["season"]],
            group = .data[["season"]]
          ),
          linewidth = 1.5, alpha = 1
        ) +
        ggplot2::labs(title = paste("CI Analysis for Field", pivotName),
             color = "Season",
@ -627,8 +681,8 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
             x = x_label) +
        color_scale +
        {
-          if (x_var == "DOY") {
+          if (x_var == "DAH") {
-            ggplot2::scale_x_continuous(breaks = seq(0, max_doy_both, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
+            ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
          } else if (x_var == "week") {
            ggplot2::scale_x_continuous(breaks = seq(0, max_week_both, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
          } else if (x_var == "Date") {
@ -636,12 +690,14 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
          }
        } +
        ggplot2::theme_minimal() +
-        ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
+          ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
-              axis.text.x.top = ggplot2::element_text(hjust = 0.5),
+            axis.text.x.top = ggplot2::element_text(hjust = 0.5),
-              axis.title.x.top = ggplot2::element_text(size = 8),
+            axis.title.x.top = ggplot2::element_text(size = 8),
-              legend.justification = c(1, 0), legend.position = c(1, 0),
+            legend.justification = c(1, 0),
-              legend.title = ggplot2::element_text(size = 8),
+            legend.position = "inside",
-              legend.text = ggplot2::element_text(size = 8)) +
+            legend.position.inside = c(1, 0),
            legend.title = ggplot2::element_text(size = 8),
            legend.text = ggplot2::element_text(size = 8)) +
        ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
      # For the rolling mean data, we want to set reasonable y-axis limits
@ -659,9 +715,11 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
      dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]]))
      g_both <- g_both + 
-        ggplot2::geom_point(data = dummy_data, 
+        ggplot2::geom_point(
-                   ggplot2::aes_string(x = x_var, y = "ci_value"), 
+          data = dummy_data,
-                   alpha = 0, size = 0)  # Invisible points to set scale
+          ggplot2::aes(x = .data[[x_var]], y = .data[["ci_value"]]),
          alpha = 0, size = 0
        )  # Invisible points to set scale
      # Display the combined faceted plot
      subchunkify(g_both, 2.8, 10)
@ -698,9 +756,11 @@ cum_ci_plot2 <- function(pivotName){
           x = "Date", y = "CI Rate") +
      theme_minimal() +
      theme(axis.text.x = element_text(hjust = 0.5),
-            legend.justification = c(1, 0), legend.position = c(1, 0),
+        legend.justification = c(1, 0),
-            legend.title = element_text(size = 8),
+        legend.position = "inside",
-            legend.text = element_text(size = 8)) +
+        legend.position.inside = c(1, 0),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 8)) +
      annotate("text", x = midpoint_date, y = 2, label = "No data available", size = 6, hjust = 0.5)
    subchunkify(g, 3.2, 10)
@ -750,30 +810,9 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
    target_week <- sprintf("%02d", lubridate::isoweek(target_date))  # Left-pad week number with a zero if needed
    target_year <- lubridate::isoyear(target_date)
-    # Primary approach: Try single-file mosaic path first
+    # Load single-file mosaic for the given week
    path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
    # Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles
    if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) {
      # Try to locate tile-based mosaics in weekly_tile_max instead
      tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path)
      # Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.)
      if (dir.exists(tile_mosaic_path)) {
        tile_files <- list.files(tile_mosaic_path, 
                                  pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"),
                                  full.names = TRUE)
        if (length(tile_files) > 0) {
          # Found tiles - return the first tile as primary, note that multiple tiles exist
          safe_log(paste("Single-file mosaic not found for week", target_week, target_year, 
                        "but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO")
          # Return first tile - caller should aggregate if needed
          path_to_week <- tile_files[1]  # Return first tile; downstream can handle multiple tiles
        }
      }
    }
    # Log the path calculation
    safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")
@ -788,11 +827,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
 #' Computes historical percentile benchmarks for CI data per estate
 #'
-#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns
+#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns
 #' @param estate_name Name of the estate/client to filter data for
 #' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
 #' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
-#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data
+#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data
 #'
 compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
  # Input validation
@ -821,7 +860,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
    # Prepare data for both CI types
    data_prepared <- data_filtered %>%
      dplyr::ungroup() %>%  # Ensure no existing groupings
-      dplyr::select(DOY, value, cumulative_CI, season) %>%
+      dplyr::select(DAH, value, cumulative_CI, season) %>%
      tidyr::pivot_longer(
        cols = c("value", "cumulative_CI"),
        names_to = "ci_type",
@ -829,9 +868,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
      ) %>%
      dplyr::filter(!is.na(ci_value))  # Remove NA values
-    # Compute percentiles for each DOY and ci_type
+    # Compute percentiles for each DAH and ci_type
    benchmarks <- data_prepared %>%
-      dplyr::group_by(DOY, ci_type) %>%
+      dplyr::group_by(DAH, ci_type) %>%
      dplyr::summarise(
        p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
        p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
@ -839,7 +878,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
        n_observations = n(),
        .groups = 'drop'
      ) %>%
-      dplyr::filter(n_observations >= min_seasons) %>%  # Only include DOYs with sufficient data
+      dplyr::filter(n_observations >= min_seasons) %>%  # Only include DAHs with sufficient data
      tidyr::pivot_longer(
        cols = c(p10, p50, p90),
        names_to = "percentile",
@ -856,7 +895,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
    # Rename columns for clarity
    benchmarks <- benchmarks %>%
-      dplyr::select(DOY, ci_type, percentile, benchmark_value)
+      dplyr::select(DAH, ci_type, percentile, benchmark_value)
    safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
@ -1043,7 +1082,7 @@ get_field_priority_level <- function(cv, morans_i) {
 #'
 #' @param field_name Name of the field to summarize
 #' @param field_details_table Data frame with field-level KPI details
-#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns
+#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns
 #' @param report_date Report date (used for filtering current season data)
 #' @return Formatted text string with field KPI summary
 #'
@ -1064,10 +1103,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
    }
    current_season <- current_season_data %>% pull(season)
-    # Get the most recent DOY from the current season
+    # Get the most recent DAH from the current season
    field_age_data <- CI_quadrant %>%
      filter(field == field_name, season == current_season) %>%
-      pull(DOY)
+      pull(DAH)
    field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_    
    # Filter data for this specific field
@ -1082,7 +1121,7 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
    # For categorical data, take the most common value or highest risk level
    field_summary <- field_data %>%
      summarise(
-        field_size = sum(`Field Size (ha)`, na.rm = TRUE),
+        field_size = sum(`Field Size (acres)`, na.rm = TRUE),
        uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
        avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)),
        max_gap_score = max(`Gap Score`, na.rm = TRUE),
@ -1117,10 +1156,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
    }
    kpi_text <- paste0(
-      "Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels, 
+      "Size: ", round(field_summary$field_size * 0.404686, 1), " ha | Mean CI: ", round(field_summary$avg_mean_ci, 2),
      " | Growth Uniformity: ", field_summary$uniformity_levels, 
      " | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1), 
-      " | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk, 
+      " | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk
      " | Mean CI: ", round(field_summary$avg_mean_ci, 2)
    )
    # Wrap in smaller text HTML tags for Word output
@ -1134,4 +1173,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
  })
 }
-
+#' Normalize field_details_table column structure
 #'
 #' Standardizes column names and ensures all expected KPI columns exist.
 #' Handles Field → Field_id rename and injects missing columns as NA.
 #'
 #' @param field_details_table data.frame to normalize
 #' @return data.frame with standardized column structure
 normalize_field_details_columns <- function(field_details_table) {
  if (is.null(field_details_table) || nrow(field_details_table) == 0) {
    return(field_details_table)
  }
  # Rename Field → Field_id if needed
  if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
    field_details_table <- field_details_table %>%
      dplyr::rename(Field_id = Field)
  }
  # Ensure all expected KPI columns exist; add as NA if missing
  expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", 
                     "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
                     "Decline_Severity", "Patchiness_Risk")
  for (col in expected_cols) {
    if (!col %in% names(field_details_table)) {
      field_details_table[[col]] <- NA
    }
  }
  return(field_details_table)
 }
--- a/r_app/translations/translations.xlsx
+++ b/r_app/translations/translations.xlsx
--- a/renv.lock
+++ b/renv.lock
--- a/renv/settings.json
+++ b/renv/settings.json
@ -15,5 +15,12 @@
  "vcs.ignore.cellar": true,
  "vcs.ignore.library": true,
  "vcs.ignore.local": true,
-  "vcs.manage.ignores": true
+  "vcs.manage.ignores": true,
  "ignored.directories": [
    "old_sh",
    "r_app/old_scripts",
    "r_app/experiments",
    "python_app/experiments",
    "webapps"
  ]
 }