diff --git a/python_app/22_harvest_baseline_prediction.py b/python_app/22_harvest_baseline_prediction.py index 4184608..f20f937 100644 --- a/python_app/22_harvest_baseline_prediction.py +++ b/python_app/22_harvest_baseline_prediction.py @@ -111,7 +111,7 @@ def main(): # [3/4] Run model predictions with two-step detection print("\n[3/4] Running two-step harvest detection...") - print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)") + print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)") refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device, phase1_threshold=0.3, phase1_consecutive=2) diff --git a/python_app/harvest_date_pred_utils.py b/python_app/harvest_date_pred_utils.py index 012a9f2..eacb11f 100644 --- a/python_app/harvest_date_pred_utils.py +++ b/python_app/harvest_date_pred_utils.py @@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128, # FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline) # ============================================================================ -def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame: +def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame: """Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV).""" features = pd.DataFrame(index=ci_series.index) @@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p ma = ci_series.rolling(window=window, min_periods=1).mean() features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6) - # DOY normalized - if doy_series is not None: - features['DOY_normalized'] = doy_series / 450.0 + # DAH normalized (Days After Harvest) + if dah_series is not None: + features['DAH_normalized'] = dah_series / 450.0 return features.fillna(0) @@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: data_df: DataFrame with Date and CI data (may be a window after a harvest) feature_names: List of feature names to extract ci_column: Name of CI column - season_anchor_day: Day in FULL sequence where this season started (for DOY reset) - DOY will be recalculated as: 1, 2, 3, ... from this point + season_anchor_day: Day in FULL sequence where this season started (for DAH reset) + DAH will be recalculated as: 1, 2, 3, ... from this point lookback_start: Starting index in original full data (for season reset calculation) Returns: @@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: # Compute all CI features ci_series = data_df[ci_column].astype(float) - # Compute DOY (age/days since season start) - NOT day-of-year! - # DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) + # Compute DAH (age/days since season start) - NOT day-of-year! + # DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) # It only resets to 1 after a harvest is detected (new season) - doy_series = None - if 'DOY_normalized' in feature_names: + dah_series = None + if 'DAH_normalized' in feature_names: if season_anchor_day is not None and lookback_start >= season_anchor_day: - # Season was reset after harvest. Recalculate DOY as simple counter from 1 - # This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ... - doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) - elif 'DOY' in data_df.columns: - # Use DOY directly from CSV - already calculated as continuous age counter - doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index) + # Season was reset after harvest. Recalculate DAH as simple counter from 1 + # This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ... + dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) + elif 'DAH' in data_df.columns: + # Use DAH directly from CSV - already calculated as continuous age counter + dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index) else: # Fallback: create continuous age counter (1, 2, 3, ...) - doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) + dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) - all_features = compute_ci_features(ci_series, doy_series) + all_features = compute_ci_features(ci_series, dah_series) # Select requested features requested = [f for f in feature_names if f in all_features.columns] diff --git a/r_app/21_convert_ci_rds_to_csv.R b/r_app/21_convert_ci_rds_to_csv.R index 78af1bb..282e87f 100644 --- a/r_app/21_convert_ci_rds_to_csv.R +++ b/r_app/21_convert_ci_rds_to_csv.R @@ -14,7 +14,7 @@ # OUTPUT DATA: # - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/ # - Format: CSV (long format) -# - Columns: field, sub_field, Date, FitData, DOY, value +# - Columns: field, sub_field, Date, FitData, DAH, value # # USAGE: # Rscript 21_convert_ci_rds_to_csv.R [project] @@ -38,7 +38,7 @@ # NOTES: # - Data source: Uses interpolated CI data from Script 30 (growth model output) # - Handles both wide format and long format inputs from growth model -# - DOY (Day of Year): Calculated from date for seasonal analysis +# - DAH (Days After Harvest): Calculated from date; represents crop age in days # - Python integration: CSV format compatible with pandas/scikit-learn workflows # - Used by: Python harvest detection models (harvest_date_prediction.py) # - Exports complete growth curves with interpolated values for ML training @@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) { filter(!is.na(FitData)) } -#' Create daily interpolated sequences with DOY for each field +#' Create daily interpolated sequences with DAH for each field #' #' For each field/sub_field combination, creates complete daily sequences from first to last date, #' fills in measurements, and interpolates missing dates. #' #' @param ci_data_long Long format tibble: field, sub_field, Date, FitData -#' @return Tibble with: field, sub_field, Date, FitData, DOY, value +#' @return Tibble with: field, sub_field, Date, FitData, DAH, value create_interpolated_daily_sequences <- function(ci_data_long) { ci_data_long %>% group_by(field, sub_field) %>% @@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) { Date = date_seq, value = NA_real_, FitData = NA_real_, - DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ... + DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ... ) # Fill in actual measurement values @@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) { }) ) %>% unnest(data) %>% - select(field, sub_field, Date, FitData, DOY, value) %>% + select(field, sub_field, Date, FitData, DAH, value) %>% arrange(field, Date) } diff --git a/r_app/30_growth_model_utils.R b/r_app/30_growth_model_utils.R index 647b811..cca107e 100644 --- a/r_app/30_growth_model_utils.R +++ b/r_app/30_growth_model_utils.R @@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, # Add additional columns CI <- CI %>% dplyr::mutate( - DOY = seq(1, n(), 1), + DAH = seq(1, n(), 1), model = paste0("Data", season, " : ", field_name), season = season, subField = field_name diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 100d231..3877c1c 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -173,10 +173,10 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me #' #' Uses Random Forest with Forward Feature Selection trained on: #' - Cumulative Canopy Index (CI) from growth model -#' - Days of Year (DOY) / crop age +#' - Days After Harvest (DAH) / crop age #' - CI-per-day (growth velocity) #' -#' Predicts yields for mature fields (DOY >= 240, ~8 months) into quartiles: +#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles: #' - Top 25%: High-yield fields #' - Average: Mid-range yield fields #' - Lowest 25%: Lower-yield fields diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 29739cf..37ca957 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -1457,7 +1457,7 @@ prepare_predictions <- function(predictions, newdata) { dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, - Age_days = newdata$DOY, + Age_days = newdata$DAH, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season @@ -1506,8 +1506,8 @@ create_fallback_result <- function(field_boundaries) { #' Calculate yield prediction KPI using Random Forest with Feature Selection #' #' Trains a Random Forest model on historical harvest data with cumulative CI, -#' days of year (DOY), and CI-per-day as predictors. Uses CAST::ffs() for -#' Forward Feature Selection. Predicts yields for mature fields (DOY >= 240). +#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for +#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD). #' #' @param field_boundaries Field boundaries (sf or SpatVector) #' @param harvesting_data Data frame with harvest data including tonnage_ha column @@ -1527,8 +1527,8 @@ create_fallback_result <- function(field_boundaries) { #' - Algorithm: Random Forest (caret + CAST) #' - Feature Selection: Forward Feature Selection (CAST::ffs) #' - Cross-validation: 5-fold CV -#' - Predictors: cumulative_CI, DOY, CI_per_day -#' - Mature field threshold: DOY >= 240 (8 months) +#' - Predictors: cumulative_CI, DAH, CI_per_day +#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days) #' - Output: Field-level yield forecasts grouped by quartile #' #' **Error Handling:** @@ -1568,12 +1568,12 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, by = c("field", "sub_field", "season")) %>% dplyr::group_by(sub_field, season) %>% - dplyr::slice(which.max(DOY)) %>% - dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% - dplyr::mutate(CI_per_day = cumulative_CI / DOY) + dplyr::slice(which.max(DAH)) %>% + dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>% + dplyr::mutate(CI_per_day = cumulative_CI / DAH) # Define predictors and response variables - predictors <- c("cumulative_CI", "DOY", "CI_per_day") + predictors <- c("cumulative_CI", "DAH", "CI_per_day") response <- "tonnage_ha" # Prepare training dataset (fields with harvest data) @@ -1600,7 +1600,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu # Prepare prediction dataset (fields without harvest data, mature fields only) prediction_yields <- CI_and_yield %>% as.data.frame() %>% - dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Mature fields only + dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only # Configure model training parameters ctrl <- caret::trainControl( @@ -1643,13 +1643,13 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha")) } - # Predict yields for current season (mature fields >= 240 days) + # Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days) if (nrow(prediction_yields) > 0) { pred_rf_current_season <- prepare_predictions( stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields ) %>% - dplyr::filter(Age_days >= 240) %>% + dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>% dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) } else { pred_rf_current_season <- data.frame() @@ -1710,7 +1710,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu safe_log("✓ Yield prediction complete") return(list(summary = result, field_results = field_level_results)) } else { - safe_log("No fields meet maturity threshold (DOY >= 240) for prediction", "WARNING") + safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING") return(list(summary = create_fallback_result(field_boundaries)$summary, field_results = data.frame())) } diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 216d722..55b81de 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -300,7 +300,7 @@ if (dir.exists(kpi_data_dir)) { } } - # 6. Gap filling summary - GROUP BY Gap_Level and COUNT + # 6. Gaps summary - GROUP BY Gap_Level and COUNT if ("Gap_Level" %in% names(field_details_table)) { summary_tables$gap_filling <- field_details_table %>% group_by(gap_level = Gap_Level) %>% @@ -581,7 +581,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"), patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"), tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"), - gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count") + gap_filling = list(display = "Gaps", level_col = "gap_level", count_col = "field_count") ) standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) { @@ -1349,8 +1349,8 @@ tryCatch({ ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") { CI_quadrant %>% dplyr::filter(field == "00F25") %>% - dplyr::arrange(DOY) %>% - dplyr::group_by(DOY) %>% + dplyr::arrange(DAH) %>% + dplyr::group_by(DAH) %>% dplyr::slice(1) %>% dplyr::ungroup() } else { @@ -1494,10 +1494,10 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field CI_quadrant %>% filter(Date <= as.Date(report_date)) %>% group_by(field, season) %>% - summarise(last_date = max(Date), last_doy = max(DOY), .groups = 'drop') %>% + summarise(last_date = max(Date), last_dah = max(DAH), .groups = 'drop') %>% group_by(field) %>% filter(season == max(season)) %>% - select(field, Age_days = last_doy) + select(field, Age_days = last_dah) }, error = function(e) { data.frame(field = character(), Age_days = numeric()) }) @@ -1640,7 +1640,7 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to - **High:** Gini > 0.12 (poor uniformity, recommend field scouting) - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure. - - **Gap Filling Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. + - **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. 2. **Overview Map: Growth on Farm:** Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. diff --git a/r_app/MANUAL_PIPELINE_RUNNER.R b/r_app/MANUAL_PIPELINE_RUNNER.R index 7ea203e..0a6f7d6 100644 --- a/r_app/MANUAL_PIPELINE_RUNNER.R +++ b/r_app/MANUAL_PIPELINE_RUNNER.R @@ -239,7 +239,7 @@ # # OUTPUT: # - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv -# - Columns: field, sub_field, Date, FitData, DOY, value +# - Columns: field, sub_field, Date, FitData, DAH, value # # PARAMETERS: # PROJECT: angata, chemba, xinavane, esa, simba diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index f0b5802..daae442 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -33,8 +33,14 @@ suppressPackageStartupMessages({ }) # ============================================================================== -# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION +# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING # ============================================================================== + +# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH) +# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts +# ~240 days ≈ 8 months, typical sugarcane maturity window +DAH_MATURITY_THRESHOLD <- 240 + # Maps project names to client types for pipeline control # This determines which scripts run and what outputs they produce diff --git a/r_app/report_utils.R b/r_app/report_utils.R index f4346a6..3827593 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -357,10 +357,10 @@ ci_plot <- function(pivotName, #' Creates a plot showing Chlorophyll Index data over time for a pivot field #' #' @param pivotName The name or ID of the pivot field to visualize -#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns +#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns #' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both" #' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE) -#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days") +#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days") #' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE) #' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE) #' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE) @@ -393,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " # Process data data_ci2 <- data_ci %>% - dplyr::mutate(CI_rate = cumulative_CI / DOY, + dplyr::mutate(CI_rate = cumulative_CI / DAH, week = lubridate::week(Date)) %>% dplyr::group_by(field) %>% dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE), @@ -448,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " # Determine x-axis variable based on x_unit parameter x_var <- if (x_unit == "days") { - if (facet_on) "Date" else "DOY" + if (facet_on) "Date" else "DAH" } else { "week" } @@ -457,7 +457,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " "days" = if (facet_on) "Date" else "Age of Crop (Days)", "weeks" = "Week Number") - # Create plot with either facets by season or overlay by DOY/week + # Create plot with either facets by season or overlay by DAH/week if (facet_on) { g <- ggplot2::ggplot(data = plot_data) + ggplot2::facet_wrap(~season, scales = "free_x") + @@ -502,12 +502,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " benchmark_subset <- benchmark_data %>% dplyr::filter(ci_type == ci_type_filter) %>% dplyr::mutate( - benchmark_x = if (x_var == "DOY") { - DOY + benchmark_x = if (x_var == "DAH") { + DAH } else if (x_var == "week") { - DOY / 7 # Approximate conversion + DAH / 7 # Approximate conversion } else { - DOY # For Date, use DOY as is (may not align perfectly) + DAH # For Date, use DAH as is (may not align perfectly) } ) ggplot2::geom_smooth( @@ -549,7 +549,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " x = x_label) + color_scale + { - if (x_var == "DOY") { + if (x_var == "DAH") { ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1))) } else if (x_var == "week") { ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1))) @@ -597,7 +597,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " # Determine x-axis variable based on x_unit parameter x_var <- if (x_unit == "days") { - if (facet_on) "Date" else "DOY" + if (facet_on) "Date" else "DAH" } else { "week" } @@ -620,12 +620,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " if (!is.null(benchmark_data)) { benchmark_subset <- benchmark_data %>% dplyr::mutate( - benchmark_x = if (x_var == "DOY") { - DOY + benchmark_x = if (x_var == "DAH") { + DAH } else if (x_var == "week") { - DOY / 7 + DAH / 7 } else { - DOY + DAH }, ci_type_label = case_when( ci_type == "value" ~ "10-Day Rolling Mean CI", @@ -673,7 +673,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " x = x_label) + color_scale + { - if (x_var == "DOY") { + if (x_var == "DAH") { ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1))) } else if (x_var == "week") { ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1))) @@ -840,11 +840,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) { #' Computes historical percentile benchmarks for CI data per estate #' -#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns +#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns #' @param estate_name Name of the estate/client to filter data for #' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90)) #' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3) -#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data +#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data #' compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) { # Input validation @@ -873,7 +873,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c # Prepare data for both CI types data_prepared <- data_filtered %>% dplyr::ungroup() %>% # Ensure no existing groupings - dplyr::select(DOY, value, cumulative_CI, season) %>% + dplyr::select(DAH, value, cumulative_CI, season) %>% tidyr::pivot_longer( cols = c("value", "cumulative_CI"), names_to = "ci_type", @@ -881,9 +881,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c ) %>% dplyr::filter(!is.na(ci_value)) # Remove NA values - # Compute percentiles for each DOY and ci_type + # Compute percentiles for each DAH and ci_type benchmarks <- data_prepared %>% - dplyr::group_by(DOY, ci_type) %>% + dplyr::group_by(DAH, ci_type) %>% dplyr::summarise( p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_), p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_), @@ -891,7 +891,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c n_observations = n(), .groups = 'drop' ) %>% - dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data + dplyr::filter(n_observations >= min_seasons) %>% # Only include DAHs with sufficient data tidyr::pivot_longer( cols = c(p10, p50, p90), names_to = "percentile", @@ -908,7 +908,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c # Rename columns for clarity benchmarks <- benchmarks %>% - dplyr::select(DOY, ci_type, percentile, benchmark_value) + dplyr::select(DAH, ci_type, percentile, benchmark_value) safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO") @@ -1095,7 +1095,7 @@ get_field_priority_level <- function(cv, morans_i) { #' #' @param field_name Name of the field to summarize #' @param field_details_table Data frame with field-level KPI details -#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns +#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns #' @param report_date Report date (used for filtering current season data) #' @return Formatted text string with field KPI summary #' @@ -1116,10 +1116,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr } current_season <- current_season_data %>% pull(season) - # Get the most recent DOY from the current season + # Get the most recent DAH from the current season field_age_data <- CI_quadrant %>% filter(field == field_name, season == current_season) %>% - pull(DOY) + pull(DAH) field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_ # Filter data for this specific field