Update terminology from DOY to DAH across multiple scripts and reports for consistency in crop age calculations

This commit is contained in:
Timon 2026-02-18 09:36:54 +01:00
parent f0a3afad52
commit 1f677f2626
10 changed files with 82 additions and 76 deletions

View file

@ -111,7 +111,7 @@ def main():
# [3/4] Run model predictions with two-step detection # [3/4] Run model predictions with two-step detection
print("\n[3/4] Running two-step harvest detection...") print("\n[3/4] Running two-step harvest detection...")
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)") print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)")
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device, refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
phase1_threshold=0.3, phase1_consecutive=2) phase1_threshold=0.3, phase1_consecutive=2)

View file

@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128,
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline) # FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
# ============================================================================ # ============================================================================
def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame: def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame:
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV).""" """Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
features = pd.DataFrame(index=ci_series.index) features = pd.DataFrame(index=ci_series.index)
@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
ma = ci_series.rolling(window=window, min_periods=1).mean() ma = ci_series.rolling(window=window, min_periods=1).mean()
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6) features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
# DOY normalized # DAH normalized (Days After Harvest)
if doy_series is not None: if dah_series is not None:
features['DOY_normalized'] = doy_series / 450.0 features['DAH_normalized'] = dah_series / 450.0
return features.fillna(0) return features.fillna(0)
@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
data_df: DataFrame with Date and CI data (may be a window after a harvest) data_df: DataFrame with Date and CI data (may be a window after a harvest)
feature_names: List of feature names to extract feature_names: List of feature names to extract
ci_column: Name of CI column ci_column: Name of CI column
season_anchor_day: Day in FULL sequence where this season started (for DOY reset) season_anchor_day: Day in FULL sequence where this season started (for DAH reset)
DOY will be recalculated as: 1, 2, 3, ... from this point DAH will be recalculated as: 1, 2, 3, ... from this point
lookback_start: Starting index in original full data (for season reset calculation) lookback_start: Starting index in original full data (for season reset calculation)
Returns: Returns:
@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
# Compute all CI features # Compute all CI features
ci_series = data_df[ci_column].astype(float) ci_series = data_df[ci_column].astype(float)
# Compute DOY (age/days since season start) - NOT day-of-year! # Compute DAH (age/days since season start) - NOT day-of-year!
# DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) # DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
# It only resets to 1 after a harvest is detected (new season) # It only resets to 1 after a harvest is detected (new season)
doy_series = None dah_series = None
if 'DOY_normalized' in feature_names: if 'DAH_normalized' in feature_names:
if season_anchor_day is not None and lookback_start >= season_anchor_day: if season_anchor_day is not None and lookback_start >= season_anchor_day:
# Season was reset after harvest. Recalculate DOY as simple counter from 1 # Season was reset after harvest. Recalculate DAH as simple counter from 1
# This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ... # This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ...
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
elif 'DOY' in data_df.columns: elif 'DAH' in data_df.columns:
# Use DOY directly from CSV - already calculated as continuous age counter # Use DAH directly from CSV - already calculated as continuous age counter
doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index) dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index)
else: else:
# Fallback: create continuous age counter (1, 2, 3, ...) # Fallback: create continuous age counter (1, 2, 3, ...)
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
all_features = compute_ci_features(ci_series, doy_series) all_features = compute_ci_features(ci_series, dah_series)
# Select requested features # Select requested features
requested = [f for f in feature_names if f in all_features.columns] requested = [f for f in feature_names if f in all_features.columns]

View file

@ -14,7 +14,7 @@
# OUTPUT DATA: # OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/ # - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
# - Format: CSV (long format) # - Format: CSV (long format)
# - Columns: field, sub_field, Date, FitData, DOY, value # - Columns: field, sub_field, Date, FitData, DAH, value
# #
# USAGE: # USAGE:
# Rscript 21_convert_ci_rds_to_csv.R [project] # Rscript 21_convert_ci_rds_to_csv.R [project]
@ -38,7 +38,7 @@
# NOTES: # NOTES:
# - Data source: Uses interpolated CI data from Script 30 (growth model output) # - Data source: Uses interpolated CI data from Script 30 (growth model output)
# - Handles both wide format and long format inputs from growth model # - Handles both wide format and long format inputs from growth model
# - DOY (Day of Year): Calculated from date for seasonal analysis # - DAH (Days After Harvest): Calculated from date; represents crop age in days
# - Python integration: CSV format compatible with pandas/scikit-learn workflows # - Python integration: CSV format compatible with pandas/scikit-learn workflows
# - Used by: Python harvest detection models (harvest_date_prediction.py) # - Used by: Python harvest detection models (harvest_date_prediction.py)
# - Exports complete growth curves with interpolated values for ML training # - Exports complete growth curves with interpolated values for ML training
@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) {
filter(!is.na(FitData)) filter(!is.na(FitData))
} }
#' Create daily interpolated sequences with DOY for each field #' Create daily interpolated sequences with DAH for each field
#' #'
#' For each field/sub_field combination, creates complete daily sequences from first to last date, #' For each field/sub_field combination, creates complete daily sequences from first to last date,
#' fills in measurements, and interpolates missing dates. #' fills in measurements, and interpolates missing dates.
#' #'
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData #' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
#' @return Tibble with: field, sub_field, Date, FitData, DOY, value #' @return Tibble with: field, sub_field, Date, FitData, DAH, value
create_interpolated_daily_sequences <- function(ci_data_long) { create_interpolated_daily_sequences <- function(ci_data_long) {
ci_data_long %>% ci_data_long %>%
group_by(field, sub_field) %>% group_by(field, sub_field) %>%
@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
Date = date_seq, Date = date_seq,
value = NA_real_, value = NA_real_,
FitData = NA_real_, FitData = NA_real_,
DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ... DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
) )
# Fill in actual measurement values # Fill in actual measurement values
@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
}) })
) %>% ) %>%
unnest(data) %>% unnest(data) %>%
select(field, sub_field, Date, FitData, DOY, value) %>% select(field, sub_field, Date, FitData, DAH, value) %>%
arrange(field, Date) arrange(field, Date)
} }

View file

@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
# Add additional columns # Add additional columns
CI <- CI %>% CI <- CI %>%
dplyr::mutate( dplyr::mutate(
DOY = seq(1, n(), 1), DAH = seq(1, n(), 1),
model = paste0("Data", season, " : ", field_name), model = paste0("Data", season, " : ", field_name),
season = season, season = season,
subField = field_name subField = field_name

View file

@ -173,10 +173,10 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me
#' #'
#' Uses Random Forest with Forward Feature Selection trained on: #' Uses Random Forest with Forward Feature Selection trained on:
#' - Cumulative Canopy Index (CI) from growth model #' - Cumulative Canopy Index (CI) from growth model
#' - Days of Year (DOY) / crop age #' - Days After Harvest (DAH) / crop age
#' - CI-per-day (growth velocity) #' - CI-per-day (growth velocity)
#' #'
#' Predicts yields for mature fields (DOY >= 240, ~8 months) into quartiles: #' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
#' - Top 25%: High-yield fields #' - Top 25%: High-yield fields
#' - Average: Mid-range yield fields #' - Average: Mid-range yield fields
#' - Lowest 25%: Lower-yield fields #' - Lowest 25%: Lower-yield fields

View file

@ -1457,7 +1457,7 @@ prepare_predictions <- function(predictions, newdata) {
dplyr::mutate( dplyr::mutate(
sub_field = newdata$sub_field, sub_field = newdata$sub_field,
field = newdata$field, field = newdata$field,
Age_days = newdata$DOY, Age_days = newdata$DAH,
total_CI = round(newdata$cumulative_CI, 0), total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0), predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season season = newdata$season
@ -1506,8 +1506,8 @@ create_fallback_result <- function(field_boundaries) {
#' Calculate yield prediction KPI using Random Forest with Feature Selection #' Calculate yield prediction KPI using Random Forest with Feature Selection
#' #'
#' Trains a Random Forest model on historical harvest data with cumulative CI, #' Trains a Random Forest model on historical harvest data with cumulative CI,
#' days of year (DOY), and CI-per-day as predictors. Uses CAST::ffs() for #' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
#' Forward Feature Selection. Predicts yields for mature fields (DOY >= 240). #' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
#' #'
#' @param field_boundaries Field boundaries (sf or SpatVector) #' @param field_boundaries Field boundaries (sf or SpatVector)
#' @param harvesting_data Data frame with harvest data including tonnage_ha column #' @param harvesting_data Data frame with harvest data including tonnage_ha column
@ -1527,8 +1527,8 @@ create_fallback_result <- function(field_boundaries) {
#' - Algorithm: Random Forest (caret + CAST) #' - Algorithm: Random Forest (caret + CAST)
#' - Feature Selection: Forward Feature Selection (CAST::ffs) #' - Feature Selection: Forward Feature Selection (CAST::ffs)
#' - Cross-validation: 5-fold CV #' - Cross-validation: 5-fold CV
#' - Predictors: cumulative_CI, DOY, CI_per_day #' - Predictors: cumulative_CI, DAH, CI_per_day
#' - Mature field threshold: DOY >= 240 (8 months) #' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
#' - Output: Field-level yield forecasts grouped by quartile #' - Output: Field-level yield forecasts grouped by quartile
#' #'
#' **Error Handling:** #' **Error Handling:**
@ -1568,12 +1568,12 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed,
by = c("field", "sub_field", "season")) %>% by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>% dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>% dplyr::slice(which.max(DAH)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY) dplyr::mutate(CI_per_day = cumulative_CI / DAH)
# Define predictors and response variables # Define predictors and response variables
predictors <- c("cumulative_CI", "DOY", "CI_per_day") predictors <- c("cumulative_CI", "DAH", "CI_per_day")
response <- "tonnage_ha" response <- "tonnage_ha"
# Prepare training dataset (fields with harvest data) # Prepare training dataset (fields with harvest data)
@ -1600,7 +1600,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
# Prepare prediction dataset (fields without harvest data, mature fields only) # Prepare prediction dataset (fields without harvest data, mature fields only)
prediction_yields <- CI_and_yield %>% prediction_yields <- CI_and_yield %>%
as.data.frame() %>% as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Mature fields only dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only
# Configure model training parameters # Configure model training parameters
ctrl <- caret::trainControl( ctrl <- caret::trainControl(
@ -1643,13 +1643,13 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha")) safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
} }
# Predict yields for current season (mature fields >= 240 days) # Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
if (nrow(prediction_yields) > 0) { if (nrow(prediction_yields) > 0) {
pred_rf_current_season <- prepare_predictions( pred_rf_current_season <- prepare_predictions(
stats::predict(model_ffs_rf, newdata = prediction_yields), stats::predict(model_ffs_rf, newdata = prediction_yields),
prediction_yields prediction_yields
) %>% ) %>%
dplyr::filter(Age_days >= 240) %>% dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
} else { } else {
pred_rf_current_season <- data.frame() pred_rf_current_season <- data.frame()
@ -1710,7 +1710,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
safe_log("✓ Yield prediction complete") safe_log("✓ Yield prediction complete")
return(list(summary = result, field_results = field_level_results)) return(list(summary = result, field_results = field_level_results))
} else { } else {
safe_log("No fields meet maturity threshold (DOY >= 240) for prediction", "WARNING") safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
return(list(summary = create_fallback_result(field_boundaries)$summary, return(list(summary = create_fallback_result(field_boundaries)$summary,
field_results = data.frame())) field_results = data.frame()))
} }

View file

@ -300,7 +300,7 @@ if (dir.exists(kpi_data_dir)) {
} }
} }
# 6. Gap filling summary - GROUP BY Gap_Level and COUNT # 6. Gaps summary - GROUP BY Gap_Level and COUNT
if ("Gap_Level" %in% names(field_details_table)) { if ("Gap_Level" %in% names(field_details_table)) {
summary_tables$gap_filling <- field_details_table %>% summary_tables$gap_filling <- field_details_table %>%
group_by(gap_level = Gap_Level) %>% group_by(gap_level = Gap_Level) %>%
@ -581,7 +581,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"), growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"),
patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"), patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"),
tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"), tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"),
gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count") gap_filling = list(display = "Gaps", level_col = "gap_level", count_col = "field_count")
) )
standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) { standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) {
@ -1349,8 +1349,8 @@ tryCatch({
ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") { ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") {
CI_quadrant %>% CI_quadrant %>%
dplyr::filter(field == "00F25") %>% dplyr::filter(field == "00F25") %>%
dplyr::arrange(DOY) %>% dplyr::arrange(DAH) %>%
dplyr::group_by(DOY) %>% dplyr::group_by(DAH) %>%
dplyr::slice(1) %>% dplyr::slice(1) %>%
dplyr::ungroup() dplyr::ungroup()
} else { } else {
@ -1494,10 +1494,10 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
CI_quadrant %>% CI_quadrant %>%
filter(Date <= as.Date(report_date)) %>% filter(Date <= as.Date(report_date)) %>%
group_by(field, season) %>% group_by(field, season) %>%
summarise(last_date = max(Date), last_doy = max(DOY), .groups = 'drop') %>% summarise(last_date = max(Date), last_dah = max(DAH), .groups = 'drop') %>%
group_by(field) %>% group_by(field) %>%
filter(season == max(season)) %>% filter(season == max(season)) %>%
select(field, Age_days = last_doy) select(field, Age_days = last_dah)
}, error = function(e) { }, error = function(e) {
data.frame(field = character(), Age_days = numeric()) data.frame(field = character(), Age_days = numeric())
}) })
@ -1640,7 +1640,7 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
- **High:** Gini > 0.12 (poor uniformity, recommend field scouting) - **High:** Gini > 0.12 (poor uniformity, recommend field scouting)
- **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure. - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure.
- **Gap Filling Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. - **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
2. **Overview Map: Growth on Farm:** 2. **Overview Map: Growth on Farm:**
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.

View file

@ -239,7 +239,7 @@
# #
# OUTPUT: # OUTPUT:
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv # - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
# - Columns: field, sub_field, Date, FitData, DOY, value # - Columns: field, sub_field, Date, FitData, DAH, value
# #
# PARAMETERS: # PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba # PROJECT: angata, chemba, xinavane, esa, simba

View file

@ -33,8 +33,14 @@ suppressPackageStartupMessages({
}) })
# ============================================================================== # ==============================================================================
# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION # SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
# ============================================================================== # ==============================================================================
# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
# ~240 days ≈ 8 months, typical sugarcane maturity window
DAH_MATURITY_THRESHOLD <- 240
# Maps project names to client types for pipeline control # Maps project names to client types for pipeline control
# This determines which scripts run and what outputs they produce # This determines which scripts run and what outputs they produce

View file

@ -357,10 +357,10 @@ ci_plot <- function(pivotName,
#' Creates a plot showing Chlorophyll Index data over time for a pivot field #' Creates a plot showing Chlorophyll Index data over time for a pivot field
#' #'
#' @param pivotName The name or ID of the pivot field to visualize #' @param pivotName The name or ID of the pivot field to visualize
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns #' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both" #' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE) #' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days") #' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days")
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE) #' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE) #' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE) #' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
@ -393,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
# Process data # Process data
data_ci2 <- data_ci %>% data_ci2 <- data_ci %>%
dplyr::mutate(CI_rate = cumulative_CI / DOY, dplyr::mutate(CI_rate = cumulative_CI / DAH,
week = lubridate::week(Date)) %>% week = lubridate::week(Date)) %>%
dplyr::group_by(field) %>% dplyr::group_by(field) %>%
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE), dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
@ -448,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
# Determine x-axis variable based on x_unit parameter # Determine x-axis variable based on x_unit parameter
x_var <- if (x_unit == "days") { x_var <- if (x_unit == "days") {
if (facet_on) "Date" else "DOY" if (facet_on) "Date" else "DAH"
} else { } else {
"week" "week"
} }
@ -457,7 +457,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
"days" = if (facet_on) "Date" else "Age of Crop (Days)", "days" = if (facet_on) "Date" else "Age of Crop (Days)",
"weeks" = "Week Number") "weeks" = "Week Number")
# Create plot with either facets by season or overlay by DOY/week # Create plot with either facets by season or overlay by DAH/week
if (facet_on) { if (facet_on) {
g <- ggplot2::ggplot(data = plot_data) + g <- ggplot2::ggplot(data = plot_data) +
ggplot2::facet_wrap(~season, scales = "free_x") + ggplot2::facet_wrap(~season, scales = "free_x") +
@ -502,12 +502,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
benchmark_subset <- benchmark_data %>% benchmark_subset <- benchmark_data %>%
dplyr::filter(ci_type == ci_type_filter) %>% dplyr::filter(ci_type == ci_type_filter) %>%
dplyr::mutate( dplyr::mutate(
benchmark_x = if (x_var == "DOY") { benchmark_x = if (x_var == "DAH") {
DOY DAH
} else if (x_var == "week") { } else if (x_var == "week") {
DOY / 7 # Approximate conversion DAH / 7 # Approximate conversion
} else { } else {
DOY # For Date, use DOY as is (may not align perfectly) DAH # For Date, use DAH as is (may not align perfectly)
} }
) )
ggplot2::geom_smooth( ggplot2::geom_smooth(
@ -549,7 +549,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
x = x_label) + x = x_label) +
color_scale + color_scale +
{ {
if (x_var == "DOY") { if (x_var == "DAH") {
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1))) ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "week") { } else if (x_var == "week") {
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1))) ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
@ -597,7 +597,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
# Determine x-axis variable based on x_unit parameter # Determine x-axis variable based on x_unit parameter
x_var <- if (x_unit == "days") { x_var <- if (x_unit == "days") {
if (facet_on) "Date" else "DOY" if (facet_on) "Date" else "DAH"
} else { } else {
"week" "week"
} }
@ -620,12 +620,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
if (!is.null(benchmark_data)) { if (!is.null(benchmark_data)) {
benchmark_subset <- benchmark_data %>% benchmark_subset <- benchmark_data %>%
dplyr::mutate( dplyr::mutate(
benchmark_x = if (x_var == "DOY") { benchmark_x = if (x_var == "DAH") {
DOY DAH
} else if (x_var == "week") { } else if (x_var == "week") {
DOY / 7 DAH / 7
} else { } else {
DOY DAH
}, },
ci_type_label = case_when( ci_type_label = case_when(
ci_type == "value" ~ "10-Day Rolling Mean CI", ci_type == "value" ~ "10-Day Rolling Mean CI",
@ -673,7 +673,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
x = x_label) + x = x_label) +
color_scale + color_scale +
{ {
if (x_var == "DOY") { if (x_var == "DAH") {
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1))) ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "week") { } else if (x_var == "week") {
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1))) ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
@ -840,11 +840,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
#' Computes historical percentile benchmarks for CI data per estate #' Computes historical percentile benchmarks for CI data per estate
#' #'
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns #' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns
#' @param estate_name Name of the estate/client to filter data for #' @param estate_name Name of the estate/client to filter data for
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90)) #' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3) #' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data #' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data
#' #'
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) { compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
# Input validation # Input validation
@ -873,7 +873,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
# Prepare data for both CI types # Prepare data for both CI types
data_prepared <- data_filtered %>% data_prepared <- data_filtered %>%
dplyr::ungroup() %>% # Ensure no existing groupings dplyr::ungroup() %>% # Ensure no existing groupings
dplyr::select(DOY, value, cumulative_CI, season) %>% dplyr::select(DAH, value, cumulative_CI, season) %>%
tidyr::pivot_longer( tidyr::pivot_longer(
cols = c("value", "cumulative_CI"), cols = c("value", "cumulative_CI"),
names_to = "ci_type", names_to = "ci_type",
@ -881,9 +881,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
) %>% ) %>%
dplyr::filter(!is.na(ci_value)) # Remove NA values dplyr::filter(!is.na(ci_value)) # Remove NA values
# Compute percentiles for each DOY and ci_type # Compute percentiles for each DAH and ci_type
benchmarks <- data_prepared %>% benchmarks <- data_prepared %>%
dplyr::group_by(DOY, ci_type) %>% dplyr::group_by(DAH, ci_type) %>%
dplyr::summarise( dplyr::summarise(
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_), p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_), p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
@ -891,7 +891,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
n_observations = n(), n_observations = n(),
.groups = 'drop' .groups = 'drop'
) %>% ) %>%
dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data dplyr::filter(n_observations >= min_seasons) %>% # Only include DAHs with sufficient data
tidyr::pivot_longer( tidyr::pivot_longer(
cols = c(p10, p50, p90), cols = c(p10, p50, p90),
names_to = "percentile", names_to = "percentile",
@ -908,7 +908,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
# Rename columns for clarity # Rename columns for clarity
benchmarks <- benchmarks %>% benchmarks <- benchmarks %>%
dplyr::select(DOY, ci_type, percentile, benchmark_value) dplyr::select(DAH, ci_type, percentile, benchmark_value)
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO") safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
@ -1095,7 +1095,7 @@ get_field_priority_level <- function(cv, morans_i) {
#' #'
#' @param field_name Name of the field to summarize #' @param field_name Name of the field to summarize
#' @param field_details_table Data frame with field-level KPI details #' @param field_details_table Data frame with field-level KPI details
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns #' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns
#' @param report_date Report date (used for filtering current season data) #' @param report_date Report date (used for filtering current season data)
#' @return Formatted text string with field KPI summary #' @return Formatted text string with field KPI summary
#' #'
@ -1116,10 +1116,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
} }
current_season <- current_season_data %>% pull(season) current_season <- current_season_data %>% pull(season)
# Get the most recent DOY from the current season # Get the most recent DAH from the current season
field_age_data <- CI_quadrant %>% field_age_data <- CI_quadrant %>%
filter(field == field_name, season == current_season) %>% filter(field == field_name, season == current_season) %>%
pull(DOY) pull(DAH)
field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_ field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_
# Filter data for this specific field # Filter data for this specific field