Update terminology from DOY to DAH across multiple scripts and reports for consistency in crop age calculations

2026-02-18 09:36:54 +01:00 · 2026-02-18 09:36:54 +01:00 · 1f677f2626
parent f0a3afad52
commit 1f677f2626
10 changed files with 82 additions and 76 deletions
--- a/python_app/22_harvest_baseline_prediction.py
+++ b/python_app/22_harvest_baseline_prediction.py
@ -111,7 +111,7 @@ def main():
    
    # [3/4] Run model predictions with two-step detection
    print("\n[3/4] Running two-step harvest detection...")
-    print("      (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)")
+    print("      (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)")
    refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
                                             phase1_threshold=0.3, phase1_consecutive=2)

--- a/python_app/harvest_date_pred_utils.py
+++ b/python_app/harvest_date_pred_utils.py
@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128,
 # FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
 # ============================================================================

-def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
+def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame:
    """Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
    features = pd.DataFrame(index=ci_series.index)
    
@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
        ma = ci_series.rolling(window=window, min_periods=1).mean()
        features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
    
-    # DOY normalized
-    if doy_series is not None:
-        features['DOY_normalized'] = doy_series / 450.0
+    # DAH normalized (Days After Harvest)
+    if dah_series is not None:
+        features['DAH_normalized'] = dah_series / 450.0
    
    return features.fillna(0)

@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
        data_df: DataFrame with Date and CI data (may be a window after a harvest)
        feature_names: List of feature names to extract
        ci_column: Name of CI column
-        season_anchor_day: Day in FULL sequence where this season started (for DOY reset)
-                          DOY will be recalculated as: 1, 2, 3, ... from this point
+        season_anchor_day: Day in FULL sequence where this season started (for DAH reset)
+                          DAH will be recalculated as: 1, 2, 3, ... from this point
        lookback_start: Starting index in original full data (for season reset calculation)
    
    Returns:
@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
    # Compute all CI features
    ci_series = data_df[ci_column].astype(float)
    
-    # Compute DOY (age/days since season start) - NOT day-of-year!
-    # DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
+    # Compute DAH (age/days since season start) - NOT day-of-year!
+    # DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
    # It only resets to 1 after a harvest is detected (new season)
-    doy_series = None
-    if 'DOY_normalized' in feature_names:
+    dah_series = None
+    if 'DAH_normalized' in feature_names:
        if season_anchor_day is not None and lookback_start >= season_anchor_day:
-            # Season was reset after harvest. Recalculate DOY as simple counter from 1
-            # This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ...
-            doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
-        elif 'DOY' in data_df.columns:
-            # Use DOY directly from CSV - already calculated as continuous age counter
-            doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index)
+            # Season was reset after harvest. Recalculate DAH as simple counter from 1
+            # This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ...
+            dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
+        elif 'DAH' in data_df.columns:
+            # Use DAH directly from CSV - already calculated as continuous age counter
+            dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index)
        else:
            # Fallback: create continuous age counter (1, 2, 3, ...)
-            doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
+            dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
    
-    all_features = compute_ci_features(ci_series, doy_series)
+    all_features = compute_ci_features(ci_series, dah_series)
    
    # Select requested features
    requested = [f for f in feature_names if f in all_features.columns]
--- a/r_app/21_convert_ci_rds_to_csv.R
+++ b/r_app/21_convert_ci_rds_to_csv.R
@ -14,7 +14,7 @@
 # OUTPUT DATA:
 #   - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
 #   - Format: CSV (long format)
-#   - Columns: field, sub_field, Date, FitData, DOY, value
+#   - Columns: field, sub_field, Date, FitData, DAH, value
 #
 # USAGE:
 #   Rscript 21_convert_ci_rds_to_csv.R [project]
@ -38,7 +38,7 @@
 # NOTES:
 #   - Data source: Uses interpolated CI data from Script 30 (growth model output)
 #   - Handles both wide format and long format inputs from growth model
-#   - DOY (Day of Year): Calculated from date for seasonal analysis
+#   - DAH (Days After Harvest): Calculated from date; represents crop age in days
 #   - Python integration: CSV format compatible with pandas/scikit-learn workflows
 #   - Used by: Python harvest detection models (harvest_date_prediction.py)
 #   - Exports complete growth curves with interpolated values for ML training
@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) {
    filter(!is.na(FitData))
 }

-#' Create daily interpolated sequences with DOY for each field
+#' Create daily interpolated sequences with DAH for each field
 #'
 #' For each field/sub_field combination, creates complete daily sequences from first to last date,
 #' fills in measurements, and interpolates missing dates.
 #'
 #' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
-#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
+#' @return Tibble with: field, sub_field, Date, FitData, DAH, value
 create_interpolated_daily_sequences <- function(ci_data_long) {
  ci_data_long %>%
    group_by(field, sub_field) %>%
@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
          Date = date_seq,
          value = NA_real_,
          FitData = NA_real_,
-          DOY = seq_along(date_seq)  # Continuous day counter: 1, 2, 3, ...
+          DAH = seq_along(date_seq)  # Continuous day counter: 1, 2, 3, ...
        )
        
        # Fill in actual measurement values
@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
      })
    ) %>%
    unnest(data) %>%
-    select(field, sub_field, Date, FitData, DOY, value) %>%
+    select(field, sub_field, Date, FitData, DAH, value) %>%
    arrange(field, Date)
 }

--- a/r_app/30_growth_model_utils.R
+++ b/r_app/30_growth_model_utils.R
@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
    # Add additional columns
    CI <- CI %>%
      dplyr::mutate(
-        DOY = seq(1, n(), 1),
+        DAH = seq(1, n(), 1),
        model = paste0("Data", season, " : ", field_name),
        season = season,
        subField = field_name
--- a/r_app/80_utils_cane_supply.R
+++ b/r_app/80_utils_cane_supply.R
@ -173,10 +173,10 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me
 #' 
 #' Uses Random Forest with Forward Feature Selection trained on:
 #'   - Cumulative Canopy Index (CI) from growth model
-#'   - Days of Year (DOY) / crop age
+#'   - Days After Harvest (DAH) / crop age
 #'   - CI-per-day (growth velocity)
 #'
-#' Predicts yields for mature fields (DOY >= 240, ~8 months) into quartiles:
+#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
 #'   - Top 25%: High-yield fields
 #'   - Average: Mid-range yield fields
 #'   - Lowest 25%: Lower-yield fields
--- a/r_app/80_utils_common.R
+++ b/r_app/80_utils_common.R
@ -1457,7 +1457,7 @@ prepare_predictions <- function(predictions, newdata) {
    dplyr::mutate(
      sub_field = newdata$sub_field,
      field = newdata$field,
-      Age_days = newdata$DOY,
+      Age_days = newdata$DAH,
      total_CI = round(newdata$cumulative_CI, 0),
      predicted_Tcha = round(predicted_Tcha, 0),
      season = newdata$season
@ -1506,8 +1506,8 @@ create_fallback_result <- function(field_boundaries) {
 #' Calculate yield prediction KPI using Random Forest with Feature Selection
 #'
 #' Trains a Random Forest model on historical harvest data with cumulative CI,
-#' days of year (DOY), and CI-per-day as predictors. Uses CAST::ffs() for
-#' Forward Feature Selection. Predicts yields for mature fields (DOY >= 240).
+#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
+#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
 #' 
 #' @param field_boundaries Field boundaries (sf or SpatVector)
 #' @param harvesting_data Data frame with harvest data including tonnage_ha column
@ -1527,8 +1527,8 @@ create_fallback_result <- function(field_boundaries) {
 #'   - Algorithm: Random Forest (caret + CAST)
 #'   - Feature Selection: Forward Feature Selection (CAST::ffs)
 #'   - Cross-validation: 5-fold CV
-#'   - Predictors: cumulative_CI, DOY, CI_per_day
-#'   - Mature field threshold: DOY >= 240 (8 months)
+#'   - Predictors: cumulative_CI, DAH, CI_per_day
+#'   - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
 #'   - Output: Field-level yield forecasts grouped by quartile
 #'
 #' **Error Handling:**
@ -1568,12 +1568,12 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
    CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, 
                                      by = c("field", "sub_field", "season")) %>%
      dplyr::group_by(sub_field, season) %>%
-      dplyr::slice(which.max(DOY)) %>%
-      dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
-      dplyr::mutate(CI_per_day = cumulative_CI / DOY)
+      dplyr::slice(which.max(DAH)) %>%
+      dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
+      dplyr::mutate(CI_per_day = cumulative_CI / DAH)

    # Define predictors and response variables
-    predictors <- c("cumulative_CI", "DOY", "CI_per_day")
+    predictors <- c("cumulative_CI", "DAH", "CI_per_day")
    response <- "tonnage_ha"

    # Prepare training dataset (fields with harvest data)
@ -1600,7 +1600,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
    # Prepare prediction dataset (fields without harvest data, mature fields only)
    prediction_yields <- CI_and_yield %>%
      as.data.frame() %>%
-      dplyr::filter(is.na(tonnage_ha) & DOY >= 240)  # Mature fields only
+      dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD)  # Mature fields only

    # Configure model training parameters
    ctrl <- caret::trainControl(
@ -1643,13 +1643,13 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
      safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
    }

-    # Predict yields for current season (mature fields >= 240 days)
+    # Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
    if (nrow(prediction_yields) > 0) {
      pred_rf_current_season <- prepare_predictions(
        stats::predict(model_ffs_rf, newdata = prediction_yields), 
        prediction_yields
      ) %>%
-        dplyr::filter(Age_days >= 240) %>%
+        dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
        dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
    } else {
      pred_rf_current_season <- data.frame()
@ -1710,7 +1710,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
      safe_log("✓ Yield prediction complete")
      return(list(summary = result, field_results = field_level_results))
    } else {
-      safe_log("No fields meet maturity threshold (DOY >= 240) for prediction", "WARNING")
+      safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
      return(list(summary = create_fallback_result(field_boundaries)$summary, 
                 field_results = data.frame()))
    }
--- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
+++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
@ -300,7 +300,7 @@ if (dir.exists(kpi_data_dir)) {
            }
          }
          
-          # 6. Gap filling summary - GROUP BY Gap_Level and COUNT
+          # 6. Gaps summary - GROUP BY Gap_Level and COUNT
          if ("Gap_Level" %in% names(field_details_table)) {
            summary_tables$gap_filling <- field_details_table %>%
              group_by(gap_level = Gap_Level) %>%
@ -581,7 +581,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
      growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"),
      patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"),
      tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"),
-      gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count")
+      gap_filling = list(display = "Gaps", level_col = "gap_level", count_col = "field_count")
    )

    standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) {
@ -1349,8 +1349,8 @@ tryCatch({
      ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") {
        CI_quadrant %>%
          dplyr::filter(field == "00F25") %>%
-          dplyr::arrange(DOY) %>%
-          dplyr::group_by(DOY) %>%
+          dplyr::arrange(DAH) %>%
+          dplyr::group_by(DAH) %>%
          dplyr::slice(1) %>%
          dplyr::ungroup()
      } else {
@ -1494,10 +1494,10 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
      CI_quadrant %>%
        filter(Date <= as.Date(report_date)) %>%
        group_by(field, season) %>%
-        summarise(last_date = max(Date), last_doy = max(DOY), .groups = 'drop') %>%
+        summarise(last_date = max(Date), last_dah = max(DAH), .groups = 'drop') %>%
        group_by(field) %>%
        filter(season == max(season)) %>%
-        select(field, Age_days = last_doy)
+        select(field, Age_days = last_dah)
    }, error = function(e) {
      data.frame(field = character(), Age_days = numeric())
    })
@ -1640,7 +1640,7 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
     - **High:** Gini > 0.12 (poor uniformity, recommend field scouting)
     - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure.
   
-   - **Gap Filling Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
+   - **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.

 2. **Overview Map: Growth on Farm:**  
   Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
--- a/r_app/MANUAL_PIPELINE_RUNNER.R
+++ b/r_app/MANUAL_PIPELINE_RUNNER.R
@ -239,7 +239,7 @@
 #
 # OUTPUT:
 #   - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
-#   - Columns: field, sub_field, Date, FitData, DOY, value
+#   - Columns: field, sub_field, Date, FitData, DAH, value
 #
 # PARAMETERS:
 #   PROJECT:  angata, chemba, xinavane, esa, simba
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@ -33,8 +33,14 @@ suppressPackageStartupMessages({
 })

 # ==============================================================================
-# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION
+# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
 # ==============================================================================
+
+# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
+# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
+# ~240 days ≈ 8 months, typical sugarcane maturity window
+DAH_MATURITY_THRESHOLD <- 240
+
 # Maps project names to client types for pipeline control
 # This determines which scripts run and what outputs they produce

--- a/r_app/report_utils.R
+++ b/r_app/report_utils.R
@ -357,10 +357,10 @@ ci_plot <- function(pivotName,
 #' Creates a plot showing Chlorophyll Index data over time for a pivot field
 #'
 #' @param pivotName The name or ID of the pivot field to visualize
-#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns
+#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns
 #' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
 #' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
-#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days")
+#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days")
 #' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
 #' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
 #' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
@ -393,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
    
    # Process data
    data_ci2 <- data_ci %>% 
-      dplyr::mutate(CI_rate = cumulative_CI / DOY,
+      dplyr::mutate(CI_rate = cumulative_CI / DAH,
             week = lubridate::week(Date)) %>% 
      dplyr::group_by(field) %>%
      dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
@ -448,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
      
      # Determine x-axis variable based on x_unit parameter
      x_var <- if (x_unit == "days") {
-        if (facet_on) "Date" else "DOY"
+        if (facet_on) "Date" else "DAH"
      } else {
        "week"
      }
@ -457,7 +457,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
                       "days" = if (facet_on) "Date" else "Age of Crop (Days)",
                       "weeks" = "Week Number")
      
-      # Create plot with either facets by season or overlay by DOY/week
+      # Create plot with either facets by season or overlay by DAH/week
      if (facet_on) {
        g <- ggplot2::ggplot(data = plot_data) +
          ggplot2::facet_wrap(~season, scales = "free_x") +
@ -502,12 +502,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
              benchmark_subset <- benchmark_data %>% 
                dplyr::filter(ci_type == ci_type_filter) %>%
                dplyr::mutate(
-                  benchmark_x = if (x_var == "DOY") {
-                    DOY
+                  benchmark_x = if (x_var == "DAH") {
+                    DAH
                  } else if (x_var == "week") {
-                    DOY / 7  # Approximate conversion
+                    DAH / 7  # Approximate conversion
                  } else {
-                    DOY  # For Date, use DOY as is (may not align perfectly)
+                    DAH  # For Date, use DAH as is (may not align perfectly)
                  }
                )
              ggplot2::geom_smooth(
@ -549,7 +549,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
               x = x_label) +
          color_scale +
          {
-            if (x_var == "DOY") {
+            if (x_var == "DAH") {
              ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
            } else if (x_var == "week") {
              ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
@ -597,7 +597,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
      
      # Determine x-axis variable based on x_unit parameter
      x_var <- if (x_unit == "days") {
-        if (facet_on) "Date" else "DOY"
+        if (facet_on) "Date" else "DAH"
      } else {
        "week"
      }
@ -620,12 +620,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
          if (!is.null(benchmark_data)) {
            benchmark_subset <- benchmark_data %>%
              dplyr::mutate(
-                benchmark_x = if (x_var == "DOY") {
-                  DOY
+                benchmark_x = if (x_var == "DAH") {
+                  DAH
                } else if (x_var == "week") {
-                  DOY / 7
+                  DAH / 7
                } else {
-                  DOY
+                  DAH
                },
                ci_type_label = case_when(
                  ci_type == "value" ~ "10-Day Rolling Mean CI",
@ -673,7 +673,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
             x = x_label) +
        color_scale +
        {
-          if (x_var == "DOY") {
+          if (x_var == "DAH") {
            ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
          } else if (x_var == "week") {
            ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
@ -840,11 +840,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {

 #' Computes historical percentile benchmarks for CI data per estate
 #'
-#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns
+#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns
 #' @param estate_name Name of the estate/client to filter data for
 #' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
 #' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
-#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data
+#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data
 #'
 compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
  # Input validation
@ -873,7 +873,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
    # Prepare data for both CI types
    data_prepared <- data_filtered %>%
      dplyr::ungroup() %>%  # Ensure no existing groupings
-      dplyr::select(DOY, value, cumulative_CI, season) %>%
+      dplyr::select(DAH, value, cumulative_CI, season) %>%
      tidyr::pivot_longer(
        cols = c("value", "cumulative_CI"),
        names_to = "ci_type",
@ -881,9 +881,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
      ) %>%
      dplyr::filter(!is.na(ci_value))  # Remove NA values
    
-    # Compute percentiles for each DOY and ci_type
+    # Compute percentiles for each DAH and ci_type
    benchmarks <- data_prepared %>%
-      dplyr::group_by(DOY, ci_type) %>%
+      dplyr::group_by(DAH, ci_type) %>%
      dplyr::summarise(
        p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
        p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
@ -891,7 +891,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
        n_observations = n(),
        .groups = 'drop'
      ) %>%
-      dplyr::filter(n_observations >= min_seasons) %>%  # Only include DOYs with sufficient data
+      dplyr::filter(n_observations >= min_seasons) %>%  # Only include DAHs with sufficient data
      tidyr::pivot_longer(
        cols = c(p10, p50, p90),
        names_to = "percentile",
@ -908,7 +908,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
    
    # Rename columns for clarity
    benchmarks <- benchmarks %>%
-      dplyr::select(DOY, ci_type, percentile, benchmark_value)
+      dplyr::select(DAH, ci_type, percentile, benchmark_value)
    
    safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
    
@ -1095,7 +1095,7 @@ get_field_priority_level <- function(cv, morans_i) {
 #'
 #' @param field_name Name of the field to summarize
 #' @param field_details_table Data frame with field-level KPI details
-#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns
+#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns
 #' @param report_date Report date (used for filtering current season data)
 #' @return Formatted text string with field KPI summary
 #'
@ -1116,10 +1116,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
    }
    current_season <- current_season_data %>% pull(season)
    
-    # Get the most recent DOY from the current season
+    # Get the most recent DAH from the current season
    field_age_data <- CI_quadrant %>%
      filter(field == field_name, season == current_season) %>%
-      pull(DOY)
+      pull(DAH)
    
    field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_    
    # Filter data for this specific field