Update terminology from DOY to DAH across multiple scripts and reports for consistency in crop age calculations
This commit is contained in:
parent
f0a3afad52
commit
1f677f2626
|
|
@ -111,7 +111,7 @@ def main():
|
|||
|
||||
# [3/4] Run model predictions with two-step detection
|
||||
print("\n[3/4] Running two-step harvest detection...")
|
||||
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)")
|
||||
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)")
|
||||
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
|
||||
phase1_threshold=0.3, phase1_consecutive=2)
|
||||
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128,
|
|||
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
|
||||
# ============================================================================
|
||||
|
||||
def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
|
||||
def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame:
|
||||
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
|
||||
features = pd.DataFrame(index=ci_series.index)
|
||||
|
||||
|
|
@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
|
|||
ma = ci_series.rolling(window=window, min_periods=1).mean()
|
||||
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
|
||||
|
||||
# DOY normalized
|
||||
if doy_series is not None:
|
||||
features['DOY_normalized'] = doy_series / 450.0
|
||||
# DAH normalized (Days After Harvest)
|
||||
if dah_series is not None:
|
||||
features['DAH_normalized'] = dah_series / 450.0
|
||||
|
||||
return features.fillna(0)
|
||||
|
||||
|
|
@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
|
|||
data_df: DataFrame with Date and CI data (may be a window after a harvest)
|
||||
feature_names: List of feature names to extract
|
||||
ci_column: Name of CI column
|
||||
season_anchor_day: Day in FULL sequence where this season started (for DOY reset)
|
||||
DOY will be recalculated as: 1, 2, 3, ... from this point
|
||||
season_anchor_day: Day in FULL sequence where this season started (for DAH reset)
|
||||
DAH will be recalculated as: 1, 2, 3, ... from this point
|
||||
lookback_start: Starting index in original full data (for season reset calculation)
|
||||
|
||||
Returns:
|
||||
|
|
@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
|
|||
# Compute all CI features
|
||||
ci_series = data_df[ci_column].astype(float)
|
||||
|
||||
# Compute DOY (age/days since season start) - NOT day-of-year!
|
||||
# DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
|
||||
# Compute DAH (age/days since season start) - NOT day-of-year!
|
||||
# DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
|
||||
# It only resets to 1 after a harvest is detected (new season)
|
||||
doy_series = None
|
||||
if 'DOY_normalized' in feature_names:
|
||||
dah_series = None
|
||||
if 'DAH_normalized' in feature_names:
|
||||
if season_anchor_day is not None and lookback_start >= season_anchor_day:
|
||||
# Season was reset after harvest. Recalculate DOY as simple counter from 1
|
||||
# This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ...
|
||||
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||
elif 'DOY' in data_df.columns:
|
||||
# Use DOY directly from CSV - already calculated as continuous age counter
|
||||
doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index)
|
||||
# Season was reset after harvest. Recalculate DAH as simple counter from 1
|
||||
# This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ...
|
||||
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||
elif 'DAH' in data_df.columns:
|
||||
# Use DAH directly from CSV - already calculated as continuous age counter
|
||||
dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index)
|
||||
else:
|
||||
# Fallback: create continuous age counter (1, 2, 3, ...)
|
||||
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||
|
||||
all_features = compute_ci_features(ci_series, doy_series)
|
||||
all_features = compute_ci_features(ci_series, dah_series)
|
||||
|
||||
# Select requested features
|
||||
requested = [f for f in feature_names if f in all_features.columns]
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
# OUTPUT DATA:
|
||||
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
||||
# - Format: CSV (long format)
|
||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
||||
# - Columns: field, sub_field, Date, FitData, DAH, value
|
||||
#
|
||||
# USAGE:
|
||||
# Rscript 21_convert_ci_rds_to_csv.R [project]
|
||||
|
|
@ -38,7 +38,7 @@
|
|||
# NOTES:
|
||||
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
||||
# - Handles both wide format and long format inputs from growth model
|
||||
# - DOY (Day of Year): Calculated from date for seasonal analysis
|
||||
# - DAH (Days After Harvest): Calculated from date; represents crop age in days
|
||||
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
||||
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
||||
# - Exports complete growth curves with interpolated values for ML training
|
||||
|
|
@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) {
|
|||
filter(!is.na(FitData))
|
||||
}
|
||||
|
||||
#' Create daily interpolated sequences with DOY for each field
|
||||
#' Create daily interpolated sequences with DAH for each field
|
||||
#'
|
||||
#' For each field/sub_field combination, creates complete daily sequences from first to last date,
|
||||
#' fills in measurements, and interpolates missing dates.
|
||||
#'
|
||||
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
|
||||
#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
|
||||
#' @return Tibble with: field, sub_field, Date, FitData, DAH, value
|
||||
create_interpolated_daily_sequences <- function(ci_data_long) {
|
||||
ci_data_long %>%
|
||||
group_by(field, sub_field) %>%
|
||||
|
|
@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
|
|||
Date = date_seq,
|
||||
value = NA_real_,
|
||||
FitData = NA_real_,
|
||||
DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
|
||||
DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
|
||||
)
|
||||
|
||||
# Fill in actual measurement values
|
||||
|
|
@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
|
|||
})
|
||||
) %>%
|
||||
unnest(data) %>%
|
||||
select(field, sub_field, Date, FitData, DOY, value) %>%
|
||||
select(field, sub_field, Date, FitData, DAH, value) %>%
|
||||
arrange(field, Date)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
|
|||
# Add additional columns
|
||||
CI <- CI %>%
|
||||
dplyr::mutate(
|
||||
DOY = seq(1, n(), 1),
|
||||
DAH = seq(1, n(), 1),
|
||||
model = paste0("Data", season, " : ", field_name),
|
||||
season = season,
|
||||
subField = field_name
|
||||
|
|
|
|||
|
|
@ -173,10 +173,10 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me
|
|||
#'
|
||||
#' Uses Random Forest with Forward Feature Selection trained on:
|
||||
#' - Cumulative Canopy Index (CI) from growth model
|
||||
#' - Days of Year (DOY) / crop age
|
||||
#' - Days After Harvest (DAH) / crop age
|
||||
#' - CI-per-day (growth velocity)
|
||||
#'
|
||||
#' Predicts yields for mature fields (DOY >= 240, ~8 months) into quartiles:
|
||||
#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
|
||||
#' - Top 25%: High-yield fields
|
||||
#' - Average: Mid-range yield fields
|
||||
#' - Lowest 25%: Lower-yield fields
|
||||
|
|
|
|||
|
|
@ -1457,7 +1457,7 @@ prepare_predictions <- function(predictions, newdata) {
|
|||
dplyr::mutate(
|
||||
sub_field = newdata$sub_field,
|
||||
field = newdata$field,
|
||||
Age_days = newdata$DOY,
|
||||
Age_days = newdata$DAH,
|
||||
total_CI = round(newdata$cumulative_CI, 0),
|
||||
predicted_Tcha = round(predicted_Tcha, 0),
|
||||
season = newdata$season
|
||||
|
|
@ -1506,8 +1506,8 @@ create_fallback_result <- function(field_boundaries) {
|
|||
#' Calculate yield prediction KPI using Random Forest with Feature Selection
|
||||
#'
|
||||
#' Trains a Random Forest model on historical harvest data with cumulative CI,
|
||||
#' days of year (DOY), and CI-per-day as predictors. Uses CAST::ffs() for
|
||||
#' Forward Feature Selection. Predicts yields for mature fields (DOY >= 240).
|
||||
#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
|
||||
#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
|
||||
#'
|
||||
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||||
#' @param harvesting_data Data frame with harvest data including tonnage_ha column
|
||||
|
|
@ -1527,8 +1527,8 @@ create_fallback_result <- function(field_boundaries) {
|
|||
#' - Algorithm: Random Forest (caret + CAST)
|
||||
#' - Feature Selection: Forward Feature Selection (CAST::ffs)
|
||||
#' - Cross-validation: 5-fold CV
|
||||
#' - Predictors: cumulative_CI, DOY, CI_per_day
|
||||
#' - Mature field threshold: DOY >= 240 (8 months)
|
||||
#' - Predictors: cumulative_CI, DAH, CI_per_day
|
||||
#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
|
||||
#' - Output: Field-level yield forecasts grouped by quartile
|
||||
#'
|
||||
#' **Error Handling:**
|
||||
|
|
@ -1568,12 +1568,12 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
|||
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed,
|
||||
by = c("field", "sub_field", "season")) %>%
|
||||
dplyr::group_by(sub_field, season) %>%
|
||||
dplyr::slice(which.max(DOY)) %>%
|
||||
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
|
||||
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
|
||||
dplyr::slice(which.max(DAH)) %>%
|
||||
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
|
||||
dplyr::mutate(CI_per_day = cumulative_CI / DAH)
|
||||
|
||||
# Define predictors and response variables
|
||||
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
|
||||
predictors <- c("cumulative_CI", "DAH", "CI_per_day")
|
||||
response <- "tonnage_ha"
|
||||
|
||||
# Prepare training dataset (fields with harvest data)
|
||||
|
|
@ -1600,7 +1600,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
|||
# Prepare prediction dataset (fields without harvest data, mature fields only)
|
||||
prediction_yields <- CI_and_yield %>%
|
||||
as.data.frame() %>%
|
||||
dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Mature fields only
|
||||
dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only
|
||||
|
||||
# Configure model training parameters
|
||||
ctrl <- caret::trainControl(
|
||||
|
|
@ -1643,13 +1643,13 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
|||
safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
|
||||
}
|
||||
|
||||
# Predict yields for current season (mature fields >= 240 days)
|
||||
# Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
|
||||
if (nrow(prediction_yields) > 0) {
|
||||
pred_rf_current_season <- prepare_predictions(
|
||||
stats::predict(model_ffs_rf, newdata = prediction_yields),
|
||||
prediction_yields
|
||||
) %>%
|
||||
dplyr::filter(Age_days >= 240) %>%
|
||||
dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
|
||||
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
|
||||
} else {
|
||||
pred_rf_current_season <- data.frame()
|
||||
|
|
@ -1710,7 +1710,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
|||
safe_log("✓ Yield prediction complete")
|
||||
return(list(summary = result, field_results = field_level_results))
|
||||
} else {
|
||||
safe_log("No fields meet maturity threshold (DOY >= 240) for prediction", "WARNING")
|
||||
safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
|
||||
return(list(summary = create_fallback_result(field_boundaries)$summary,
|
||||
field_results = data.frame()))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -300,7 +300,7 @@ if (dir.exists(kpi_data_dir)) {
|
|||
}
|
||||
}
|
||||
|
||||
# 6. Gap filling summary - GROUP BY Gap_Level and COUNT
|
||||
# 6. Gaps summary - GROUP BY Gap_Level and COUNT
|
||||
if ("Gap_Level" %in% names(field_details_table)) {
|
||||
summary_tables$gap_filling <- field_details_table %>%
|
||||
group_by(gap_level = Gap_Level) %>%
|
||||
|
|
@ -581,7 +581,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
|
|||
growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"),
|
||||
patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"),
|
||||
tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"),
|
||||
gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count")
|
||||
gap_filling = list(display = "Gaps", level_col = "gap_level", count_col = "field_count")
|
||||
)
|
||||
|
||||
standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) {
|
||||
|
|
@ -1349,8 +1349,8 @@ tryCatch({
|
|||
ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") {
|
||||
CI_quadrant %>%
|
||||
dplyr::filter(field == "00F25") %>%
|
||||
dplyr::arrange(DOY) %>%
|
||||
dplyr::group_by(DOY) %>%
|
||||
dplyr::arrange(DAH) %>%
|
||||
dplyr::group_by(DAH) %>%
|
||||
dplyr::slice(1) %>%
|
||||
dplyr::ungroup()
|
||||
} else {
|
||||
|
|
@ -1494,10 +1494,10 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
|||
CI_quadrant %>%
|
||||
filter(Date <= as.Date(report_date)) %>%
|
||||
group_by(field, season) %>%
|
||||
summarise(last_date = max(Date), last_doy = max(DOY), .groups = 'drop') %>%
|
||||
summarise(last_date = max(Date), last_dah = max(DAH), .groups = 'drop') %>%
|
||||
group_by(field) %>%
|
||||
filter(season == max(season)) %>%
|
||||
select(field, Age_days = last_doy)
|
||||
select(field, Age_days = last_dah)
|
||||
}, error = function(e) {
|
||||
data.frame(field = character(), Age_days = numeric())
|
||||
})
|
||||
|
|
@ -1640,7 +1640,7 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
|
|||
- **High:** Gini > 0.12 (poor uniformity, recommend field scouting)
|
||||
- **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure.
|
||||
|
||||
- **Gap Filling Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
|
||||
- **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
|
||||
|
||||
2. **Overview Map: Growth on Farm:**
|
||||
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@
|
|||
#
|
||||
# OUTPUT:
|
||||
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
||||
# - Columns: field, sub_field, Date, FitData, DAH, value
|
||||
#
|
||||
# PARAMETERS:
|
||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||
|
|
|
|||
|
|
@ -33,8 +33,14 @@ suppressPackageStartupMessages({
|
|||
})
|
||||
|
||||
# ==============================================================================
|
||||
# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION
|
||||
# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
|
||||
# ==============================================================================
|
||||
|
||||
# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
|
||||
# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
|
||||
# ~240 days ≈ 8 months, typical sugarcane maturity window
|
||||
DAH_MATURITY_THRESHOLD <- 240
|
||||
|
||||
# Maps project names to client types for pipeline control
|
||||
# This determines which scripts run and what outputs they produce
|
||||
|
||||
|
|
|
|||
|
|
@ -357,10 +357,10 @@ ci_plot <- function(pivotName,
|
|||
#' Creates a plot showing Chlorophyll Index data over time for a pivot field
|
||||
#'
|
||||
#' @param pivotName The name or ID of the pivot field to visualize
|
||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns
|
||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns
|
||||
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
|
||||
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
|
||||
#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days")
|
||||
#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days")
|
||||
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
|
||||
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
|
||||
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
|
||||
|
|
@ -393,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
|
||||
# Process data
|
||||
data_ci2 <- data_ci %>%
|
||||
dplyr::mutate(CI_rate = cumulative_CI / DOY,
|
||||
dplyr::mutate(CI_rate = cumulative_CI / DAH,
|
||||
week = lubridate::week(Date)) %>%
|
||||
dplyr::group_by(field) %>%
|
||||
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
|
||||
|
|
@ -448,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
|
||||
# Determine x-axis variable based on x_unit parameter
|
||||
x_var <- if (x_unit == "days") {
|
||||
if (facet_on) "Date" else "DOY"
|
||||
if (facet_on) "Date" else "DAH"
|
||||
} else {
|
||||
"week"
|
||||
}
|
||||
|
|
@ -457,7 +457,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
"days" = if (facet_on) "Date" else "Age of Crop (Days)",
|
||||
"weeks" = "Week Number")
|
||||
|
||||
# Create plot with either facets by season or overlay by DOY/week
|
||||
# Create plot with either facets by season or overlay by DAH/week
|
||||
if (facet_on) {
|
||||
g <- ggplot2::ggplot(data = plot_data) +
|
||||
ggplot2::facet_wrap(~season, scales = "free_x") +
|
||||
|
|
@ -502,12 +502,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
benchmark_subset <- benchmark_data %>%
|
||||
dplyr::filter(ci_type == ci_type_filter) %>%
|
||||
dplyr::mutate(
|
||||
benchmark_x = if (x_var == "DOY") {
|
||||
DOY
|
||||
benchmark_x = if (x_var == "DAH") {
|
||||
DAH
|
||||
} else if (x_var == "week") {
|
||||
DOY / 7 # Approximate conversion
|
||||
DAH / 7 # Approximate conversion
|
||||
} else {
|
||||
DOY # For Date, use DOY as is (may not align perfectly)
|
||||
DAH # For Date, use DAH as is (may not align perfectly)
|
||||
}
|
||||
)
|
||||
ggplot2::geom_smooth(
|
||||
|
|
@ -549,7 +549,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
x = x_label) +
|
||||
color_scale +
|
||||
{
|
||||
if (x_var == "DOY") {
|
||||
if (x_var == "DAH") {
|
||||
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||
} else if (x_var == "week") {
|
||||
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||
|
|
@ -597,7 +597,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
|
||||
# Determine x-axis variable based on x_unit parameter
|
||||
x_var <- if (x_unit == "days") {
|
||||
if (facet_on) "Date" else "DOY"
|
||||
if (facet_on) "Date" else "DAH"
|
||||
} else {
|
||||
"week"
|
||||
}
|
||||
|
|
@ -620,12 +620,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
if (!is.null(benchmark_data)) {
|
||||
benchmark_subset <- benchmark_data %>%
|
||||
dplyr::mutate(
|
||||
benchmark_x = if (x_var == "DOY") {
|
||||
DOY
|
||||
benchmark_x = if (x_var == "DAH") {
|
||||
DAH
|
||||
} else if (x_var == "week") {
|
||||
DOY / 7
|
||||
DAH / 7
|
||||
} else {
|
||||
DOY
|
||||
DAH
|
||||
},
|
||||
ci_type_label = case_when(
|
||||
ci_type == "value" ~ "10-Day Rolling Mean CI",
|
||||
|
|
@ -673,7 +673,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
x = x_label) +
|
||||
color_scale +
|
||||
{
|
||||
if (x_var == "DOY") {
|
||||
if (x_var == "DAH") {
|
||||
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||
} else if (x_var == "week") {
|
||||
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||
|
|
@ -840,11 +840,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
|
|||
|
||||
#' Computes historical percentile benchmarks for CI data per estate
|
||||
#'
|
||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns
|
||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns
|
||||
#' @param estate_name Name of the estate/client to filter data for
|
||||
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
|
||||
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
|
||||
#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data
|
||||
#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data
|
||||
#'
|
||||
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
|
||||
# Input validation
|
||||
|
|
@ -873,7 +873,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
|||
# Prepare data for both CI types
|
||||
data_prepared <- data_filtered %>%
|
||||
dplyr::ungroup() %>% # Ensure no existing groupings
|
||||
dplyr::select(DOY, value, cumulative_CI, season) %>%
|
||||
dplyr::select(DAH, value, cumulative_CI, season) %>%
|
||||
tidyr::pivot_longer(
|
||||
cols = c("value", "cumulative_CI"),
|
||||
names_to = "ci_type",
|
||||
|
|
@ -881,9 +881,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
|||
) %>%
|
||||
dplyr::filter(!is.na(ci_value)) # Remove NA values
|
||||
|
||||
# Compute percentiles for each DOY and ci_type
|
||||
# Compute percentiles for each DAH and ci_type
|
||||
benchmarks <- data_prepared %>%
|
||||
dplyr::group_by(DOY, ci_type) %>%
|
||||
dplyr::group_by(DAH, ci_type) %>%
|
||||
dplyr::summarise(
|
||||
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
|
||||
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
|
||||
|
|
@ -891,7 +891,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
|||
n_observations = n(),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data
|
||||
dplyr::filter(n_observations >= min_seasons) %>% # Only include DAHs with sufficient data
|
||||
tidyr::pivot_longer(
|
||||
cols = c(p10, p50, p90),
|
||||
names_to = "percentile",
|
||||
|
|
@ -908,7 +908,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
|||
|
||||
# Rename columns for clarity
|
||||
benchmarks <- benchmarks %>%
|
||||
dplyr::select(DOY, ci_type, percentile, benchmark_value)
|
||||
dplyr::select(DAH, ci_type, percentile, benchmark_value)
|
||||
|
||||
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
|
||||
|
||||
|
|
@ -1095,7 +1095,7 @@ get_field_priority_level <- function(cv, morans_i) {
|
|||
#'
|
||||
#' @param field_name Name of the field to summarize
|
||||
#' @param field_details_table Data frame with field-level KPI details
|
||||
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns
|
||||
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns
|
||||
#' @param report_date Report date (used for filtering current season data)
|
||||
#' @return Formatted text string with field KPI summary
|
||||
#'
|
||||
|
|
@ -1116,10 +1116,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
|
|||
}
|
||||
current_season <- current_season_data %>% pull(season)
|
||||
|
||||
# Get the most recent DOY from the current season
|
||||
# Get the most recent DAH from the current season
|
||||
field_age_data <- CI_quadrant %>%
|
||||
filter(field == field_name, season == current_season) %>%
|
||||
pull(DOY)
|
||||
pull(DAH)
|
||||
|
||||
field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_
|
||||
# Filter data for this specific field
|
||||
|
|
|
|||
Loading…
Reference in a new issue