Update terminology from DOY to DAH across multiple scripts and reports for consistency in crop age calculations
This commit is contained in:
parent
f0a3afad52
commit
1f677f2626
|
|
@ -111,7 +111,7 @@ def main():
|
||||||
|
|
||||||
# [3/4] Run model predictions with two-step detection
|
# [3/4] Run model predictions with two-step detection
|
||||||
print("\n[3/4] Running two-step harvest detection...")
|
print("\n[3/4] Running two-step harvest detection...")
|
||||||
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)")
|
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)")
|
||||||
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
|
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
|
||||||
phase1_threshold=0.3, phase1_consecutive=2)
|
phase1_threshold=0.3, phase1_consecutive=2)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128,
|
||||||
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
|
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
|
def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame:
|
||||||
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
|
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
|
||||||
features = pd.DataFrame(index=ci_series.index)
|
features = pd.DataFrame(index=ci_series.index)
|
||||||
|
|
||||||
|
|
@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
|
||||||
ma = ci_series.rolling(window=window, min_periods=1).mean()
|
ma = ci_series.rolling(window=window, min_periods=1).mean()
|
||||||
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
|
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
|
||||||
|
|
||||||
# DOY normalized
|
# DAH normalized (Days After Harvest)
|
||||||
if doy_series is not None:
|
if dah_series is not None:
|
||||||
features['DOY_normalized'] = doy_series / 450.0
|
features['DAH_normalized'] = dah_series / 450.0
|
||||||
|
|
||||||
return features.fillna(0)
|
return features.fillna(0)
|
||||||
|
|
||||||
|
|
@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
|
||||||
data_df: DataFrame with Date and CI data (may be a window after a harvest)
|
data_df: DataFrame with Date and CI data (may be a window after a harvest)
|
||||||
feature_names: List of feature names to extract
|
feature_names: List of feature names to extract
|
||||||
ci_column: Name of CI column
|
ci_column: Name of CI column
|
||||||
season_anchor_day: Day in FULL sequence where this season started (for DOY reset)
|
season_anchor_day: Day in FULL sequence where this season started (for DAH reset)
|
||||||
DOY will be recalculated as: 1, 2, 3, ... from this point
|
DAH will be recalculated as: 1, 2, 3, ... from this point
|
||||||
lookback_start: Starting index in original full data (for season reset calculation)
|
lookback_start: Starting index in original full data (for season reset calculation)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
|
||||||
# Compute all CI features
|
# Compute all CI features
|
||||||
ci_series = data_df[ci_column].astype(float)
|
ci_series = data_df[ci_column].astype(float)
|
||||||
|
|
||||||
# Compute DOY (age/days since season start) - NOT day-of-year!
|
# Compute DAH (age/days since season start) - NOT day-of-year!
|
||||||
# DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
|
# DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
|
||||||
# It only resets to 1 after a harvest is detected (new season)
|
# It only resets to 1 after a harvest is detected (new season)
|
||||||
doy_series = None
|
dah_series = None
|
||||||
if 'DOY_normalized' in feature_names:
|
if 'DAH_normalized' in feature_names:
|
||||||
if season_anchor_day is not None and lookback_start >= season_anchor_day:
|
if season_anchor_day is not None and lookback_start >= season_anchor_day:
|
||||||
# Season was reset after harvest. Recalculate DOY as simple counter from 1
|
# Season was reset after harvest. Recalculate DAH as simple counter from 1
|
||||||
# This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ...
|
# This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ...
|
||||||
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||||
elif 'DOY' in data_df.columns:
|
elif 'DAH' in data_df.columns:
|
||||||
# Use DOY directly from CSV - already calculated as continuous age counter
|
# Use DAH directly from CSV - already calculated as continuous age counter
|
||||||
doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index)
|
dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index)
|
||||||
else:
|
else:
|
||||||
# Fallback: create continuous age counter (1, 2, 3, ...)
|
# Fallback: create continuous age counter (1, 2, 3, ...)
|
||||||
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||||
|
|
||||||
all_features = compute_ci_features(ci_series, doy_series)
|
all_features = compute_ci_features(ci_series, dah_series)
|
||||||
|
|
||||||
# Select requested features
|
# Select requested features
|
||||||
requested = [f for f in feature_names if f in all_features.columns]
|
requested = [f for f in feature_names if f in all_features.columns]
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@
|
||||||
# OUTPUT DATA:
|
# OUTPUT DATA:
|
||||||
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
||||||
# - Format: CSV (long format)
|
# - Format: CSV (long format)
|
||||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
# - Columns: field, sub_field, Date, FitData, DAH, value
|
||||||
#
|
#
|
||||||
# USAGE:
|
# USAGE:
|
||||||
# Rscript 21_convert_ci_rds_to_csv.R [project]
|
# Rscript 21_convert_ci_rds_to_csv.R [project]
|
||||||
|
|
@ -38,7 +38,7 @@
|
||||||
# NOTES:
|
# NOTES:
|
||||||
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
||||||
# - Handles both wide format and long format inputs from growth model
|
# - Handles both wide format and long format inputs from growth model
|
||||||
# - DOY (Day of Year): Calculated from date for seasonal analysis
|
# - DAH (Days After Harvest): Calculated from date; represents crop age in days
|
||||||
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
||||||
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
||||||
# - Exports complete growth curves with interpolated values for ML training
|
# - Exports complete growth curves with interpolated values for ML training
|
||||||
|
|
@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) {
|
||||||
filter(!is.na(FitData))
|
filter(!is.na(FitData))
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Create daily interpolated sequences with DOY for each field
|
#' Create daily interpolated sequences with DAH for each field
|
||||||
#'
|
#'
|
||||||
#' For each field/sub_field combination, creates complete daily sequences from first to last date,
|
#' For each field/sub_field combination, creates complete daily sequences from first to last date,
|
||||||
#' fills in measurements, and interpolates missing dates.
|
#' fills in measurements, and interpolates missing dates.
|
||||||
#'
|
#'
|
||||||
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
|
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
|
||||||
#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
|
#' @return Tibble with: field, sub_field, Date, FitData, DAH, value
|
||||||
create_interpolated_daily_sequences <- function(ci_data_long) {
|
create_interpolated_daily_sequences <- function(ci_data_long) {
|
||||||
ci_data_long %>%
|
ci_data_long %>%
|
||||||
group_by(field, sub_field) %>%
|
group_by(field, sub_field) %>%
|
||||||
|
|
@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
|
||||||
Date = date_seq,
|
Date = date_seq,
|
||||||
value = NA_real_,
|
value = NA_real_,
|
||||||
FitData = NA_real_,
|
FitData = NA_real_,
|
||||||
DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
|
DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fill in actual measurement values
|
# Fill in actual measurement values
|
||||||
|
|
@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
|
||||||
})
|
})
|
||||||
) %>%
|
) %>%
|
||||||
unnest(data) %>%
|
unnest(data) %>%
|
||||||
select(field, sub_field, Date, FitData, DOY, value) %>%
|
select(field, sub_field, Date, FitData, DAH, value) %>%
|
||||||
arrange(field, Date)
|
arrange(field, Date)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
|
||||||
# Add additional columns
|
# Add additional columns
|
||||||
CI <- CI %>%
|
CI <- CI %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
DOY = seq(1, n(), 1),
|
DAH = seq(1, n(), 1),
|
||||||
model = paste0("Data", season, " : ", field_name),
|
model = paste0("Data", season, " : ", field_name),
|
||||||
season = season,
|
season = season,
|
||||||
subField = field_name
|
subField = field_name
|
||||||
|
|
|
||||||
|
|
@ -173,10 +173,10 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me
|
||||||
#'
|
#'
|
||||||
#' Uses Random Forest with Forward Feature Selection trained on:
|
#' Uses Random Forest with Forward Feature Selection trained on:
|
||||||
#' - Cumulative Canopy Index (CI) from growth model
|
#' - Cumulative Canopy Index (CI) from growth model
|
||||||
#' - Days of Year (DOY) / crop age
|
#' - Days After Harvest (DAH) / crop age
|
||||||
#' - CI-per-day (growth velocity)
|
#' - CI-per-day (growth velocity)
|
||||||
#'
|
#'
|
||||||
#' Predicts yields for mature fields (DOY >= 240, ~8 months) into quartiles:
|
#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
|
||||||
#' - Top 25%: High-yield fields
|
#' - Top 25%: High-yield fields
|
||||||
#' - Average: Mid-range yield fields
|
#' - Average: Mid-range yield fields
|
||||||
#' - Lowest 25%: Lower-yield fields
|
#' - Lowest 25%: Lower-yield fields
|
||||||
|
|
|
||||||
|
|
@ -1457,7 +1457,7 @@ prepare_predictions <- function(predictions, newdata) {
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
sub_field = newdata$sub_field,
|
sub_field = newdata$sub_field,
|
||||||
field = newdata$field,
|
field = newdata$field,
|
||||||
Age_days = newdata$DOY,
|
Age_days = newdata$DAH,
|
||||||
total_CI = round(newdata$cumulative_CI, 0),
|
total_CI = round(newdata$cumulative_CI, 0),
|
||||||
predicted_Tcha = round(predicted_Tcha, 0),
|
predicted_Tcha = round(predicted_Tcha, 0),
|
||||||
season = newdata$season
|
season = newdata$season
|
||||||
|
|
@ -1506,8 +1506,8 @@ create_fallback_result <- function(field_boundaries) {
|
||||||
#' Calculate yield prediction KPI using Random Forest with Feature Selection
|
#' Calculate yield prediction KPI using Random Forest with Feature Selection
|
||||||
#'
|
#'
|
||||||
#' Trains a Random Forest model on historical harvest data with cumulative CI,
|
#' Trains a Random Forest model on historical harvest data with cumulative CI,
|
||||||
#' days of year (DOY), and CI-per-day as predictors. Uses CAST::ffs() for
|
#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
|
||||||
#' Forward Feature Selection. Predicts yields for mature fields (DOY >= 240).
|
#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
|
||||||
#'
|
#'
|
||||||
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||||||
#' @param harvesting_data Data frame with harvest data including tonnage_ha column
|
#' @param harvesting_data Data frame with harvest data including tonnage_ha column
|
||||||
|
|
@ -1527,8 +1527,8 @@ create_fallback_result <- function(field_boundaries) {
|
||||||
#' - Algorithm: Random Forest (caret + CAST)
|
#' - Algorithm: Random Forest (caret + CAST)
|
||||||
#' - Feature Selection: Forward Feature Selection (CAST::ffs)
|
#' - Feature Selection: Forward Feature Selection (CAST::ffs)
|
||||||
#' - Cross-validation: 5-fold CV
|
#' - Cross-validation: 5-fold CV
|
||||||
#' - Predictors: cumulative_CI, DOY, CI_per_day
|
#' - Predictors: cumulative_CI, DAH, CI_per_day
|
||||||
#' - Mature field threshold: DOY >= 240 (8 months)
|
#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
|
||||||
#' - Output: Field-level yield forecasts grouped by quartile
|
#' - Output: Field-level yield forecasts grouped by quartile
|
||||||
#'
|
#'
|
||||||
#' **Error Handling:**
|
#' **Error Handling:**
|
||||||
|
|
@ -1568,12 +1568,12 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
||||||
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed,
|
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed,
|
||||||
by = c("field", "sub_field", "season")) %>%
|
by = c("field", "sub_field", "season")) %>%
|
||||||
dplyr::group_by(sub_field, season) %>%
|
dplyr::group_by(sub_field, season) %>%
|
||||||
dplyr::slice(which.max(DOY)) %>%
|
dplyr::slice(which.max(DAH)) %>%
|
||||||
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
|
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
|
||||||
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
|
dplyr::mutate(CI_per_day = cumulative_CI / DAH)
|
||||||
|
|
||||||
# Define predictors and response variables
|
# Define predictors and response variables
|
||||||
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
|
predictors <- c("cumulative_CI", "DAH", "CI_per_day")
|
||||||
response <- "tonnage_ha"
|
response <- "tonnage_ha"
|
||||||
|
|
||||||
# Prepare training dataset (fields with harvest data)
|
# Prepare training dataset (fields with harvest data)
|
||||||
|
|
@ -1600,7 +1600,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
||||||
# Prepare prediction dataset (fields without harvest data, mature fields only)
|
# Prepare prediction dataset (fields without harvest data, mature fields only)
|
||||||
prediction_yields <- CI_and_yield %>%
|
prediction_yields <- CI_and_yield %>%
|
||||||
as.data.frame() %>%
|
as.data.frame() %>%
|
||||||
dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Mature fields only
|
dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only
|
||||||
|
|
||||||
# Configure model training parameters
|
# Configure model training parameters
|
||||||
ctrl <- caret::trainControl(
|
ctrl <- caret::trainControl(
|
||||||
|
|
@ -1643,13 +1643,13 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
||||||
safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
|
safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Predict yields for current season (mature fields >= 240 days)
|
# Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
|
||||||
if (nrow(prediction_yields) > 0) {
|
if (nrow(prediction_yields) > 0) {
|
||||||
pred_rf_current_season <- prepare_predictions(
|
pred_rf_current_season <- prepare_predictions(
|
||||||
stats::predict(model_ffs_rf, newdata = prediction_yields),
|
stats::predict(model_ffs_rf, newdata = prediction_yields),
|
||||||
prediction_yields
|
prediction_yields
|
||||||
) %>%
|
) %>%
|
||||||
dplyr::filter(Age_days >= 240) %>%
|
dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
|
||||||
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
|
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
|
||||||
} else {
|
} else {
|
||||||
pred_rf_current_season <- data.frame()
|
pred_rf_current_season <- data.frame()
|
||||||
|
|
@ -1710,7 +1710,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
||||||
safe_log("✓ Yield prediction complete")
|
safe_log("✓ Yield prediction complete")
|
||||||
return(list(summary = result, field_results = field_level_results))
|
return(list(summary = result, field_results = field_level_results))
|
||||||
} else {
|
} else {
|
||||||
safe_log("No fields meet maturity threshold (DOY >= 240) for prediction", "WARNING")
|
safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
|
||||||
return(list(summary = create_fallback_result(field_boundaries)$summary,
|
return(list(summary = create_fallback_result(field_boundaries)$summary,
|
||||||
field_results = data.frame()))
|
field_results = data.frame()))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -300,7 +300,7 @@ if (dir.exists(kpi_data_dir)) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# 6. Gap filling summary - GROUP BY Gap_Level and COUNT
|
# 6. Gaps summary - GROUP BY Gap_Level and COUNT
|
||||||
if ("Gap_Level" %in% names(field_details_table)) {
|
if ("Gap_Level" %in% names(field_details_table)) {
|
||||||
summary_tables$gap_filling <- field_details_table %>%
|
summary_tables$gap_filling <- field_details_table %>%
|
||||||
group_by(gap_level = Gap_Level) %>%
|
group_by(gap_level = Gap_Level) %>%
|
||||||
|
|
@ -581,7 +581,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
|
||||||
growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"),
|
growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"),
|
||||||
patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"),
|
patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"),
|
||||||
tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"),
|
tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"),
|
||||||
gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count")
|
gap_filling = list(display = "Gaps", level_col = "gap_level", count_col = "field_count")
|
||||||
)
|
)
|
||||||
|
|
||||||
standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) {
|
standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) {
|
||||||
|
|
@ -1349,8 +1349,8 @@ tryCatch({
|
||||||
ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") {
|
ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") {
|
||||||
CI_quadrant %>%
|
CI_quadrant %>%
|
||||||
dplyr::filter(field == "00F25") %>%
|
dplyr::filter(field == "00F25") %>%
|
||||||
dplyr::arrange(DOY) %>%
|
dplyr::arrange(DAH) %>%
|
||||||
dplyr::group_by(DOY) %>%
|
dplyr::group_by(DAH) %>%
|
||||||
dplyr::slice(1) %>%
|
dplyr::slice(1) %>%
|
||||||
dplyr::ungroup()
|
dplyr::ungroup()
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1494,10 +1494,10 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
||||||
CI_quadrant %>%
|
CI_quadrant %>%
|
||||||
filter(Date <= as.Date(report_date)) %>%
|
filter(Date <= as.Date(report_date)) %>%
|
||||||
group_by(field, season) %>%
|
group_by(field, season) %>%
|
||||||
summarise(last_date = max(Date), last_doy = max(DOY), .groups = 'drop') %>%
|
summarise(last_date = max(Date), last_dah = max(DAH), .groups = 'drop') %>%
|
||||||
group_by(field) %>%
|
group_by(field) %>%
|
||||||
filter(season == max(season)) %>%
|
filter(season == max(season)) %>%
|
||||||
select(field, Age_days = last_doy)
|
select(field, Age_days = last_dah)
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
data.frame(field = character(), Age_days = numeric())
|
data.frame(field = character(), Age_days = numeric())
|
||||||
})
|
})
|
||||||
|
|
@ -1640,7 +1640,7 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
|
||||||
- **High:** Gini > 0.12 (poor uniformity, recommend field scouting)
|
- **High:** Gini > 0.12 (poor uniformity, recommend field scouting)
|
||||||
- **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure.
|
- **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure.
|
||||||
|
|
||||||
- **Gap Filling Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
|
- **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
|
||||||
|
|
||||||
2. **Overview Map: Growth on Farm:**
|
2. **Overview Map: Growth on Farm:**
|
||||||
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
|
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
|
||||||
|
|
|
||||||
|
|
@ -239,7 +239,7 @@
|
||||||
#
|
#
|
||||||
# OUTPUT:
|
# OUTPUT:
|
||||||
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
# - Columns: field, sub_field, Date, FitData, DAH, value
|
||||||
#
|
#
|
||||||
# PARAMETERS:
|
# PARAMETERS:
|
||||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
|
|
||||||
|
|
@ -33,8 +33,14 @@ suppressPackageStartupMessages({
|
||||||
})
|
})
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION
|
# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
|
||||||
|
# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
|
||||||
|
# ~240 days ≈ 8 months, typical sugarcane maturity window
|
||||||
|
DAH_MATURITY_THRESHOLD <- 240
|
||||||
|
|
||||||
# Maps project names to client types for pipeline control
|
# Maps project names to client types for pipeline control
|
||||||
# This determines which scripts run and what outputs they produce
|
# This determines which scripts run and what outputs they produce
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -357,10 +357,10 @@ ci_plot <- function(pivotName,
|
||||||
#' Creates a plot showing Chlorophyll Index data over time for a pivot field
|
#' Creates a plot showing Chlorophyll Index data over time for a pivot field
|
||||||
#'
|
#'
|
||||||
#' @param pivotName The name or ID of the pivot field to visualize
|
#' @param pivotName The name or ID of the pivot field to visualize
|
||||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns
|
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns
|
||||||
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
|
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
|
||||||
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
|
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
|
||||||
#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days")
|
#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days")
|
||||||
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
|
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
|
||||||
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
|
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
|
||||||
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
|
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
|
||||||
|
|
@ -393,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
|
|
||||||
# Process data
|
# Process data
|
||||||
data_ci2 <- data_ci %>%
|
data_ci2 <- data_ci %>%
|
||||||
dplyr::mutate(CI_rate = cumulative_CI / DOY,
|
dplyr::mutate(CI_rate = cumulative_CI / DAH,
|
||||||
week = lubridate::week(Date)) %>%
|
week = lubridate::week(Date)) %>%
|
||||||
dplyr::group_by(field) %>%
|
dplyr::group_by(field) %>%
|
||||||
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
|
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
|
||||||
|
|
@ -448,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
|
|
||||||
# Determine x-axis variable based on x_unit parameter
|
# Determine x-axis variable based on x_unit parameter
|
||||||
x_var <- if (x_unit == "days") {
|
x_var <- if (x_unit == "days") {
|
||||||
if (facet_on) "Date" else "DOY"
|
if (facet_on) "Date" else "DAH"
|
||||||
} else {
|
} else {
|
||||||
"week"
|
"week"
|
||||||
}
|
}
|
||||||
|
|
@ -457,7 +457,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
"days" = if (facet_on) "Date" else "Age of Crop (Days)",
|
"days" = if (facet_on) "Date" else "Age of Crop (Days)",
|
||||||
"weeks" = "Week Number")
|
"weeks" = "Week Number")
|
||||||
|
|
||||||
# Create plot with either facets by season or overlay by DOY/week
|
# Create plot with either facets by season or overlay by DAH/week
|
||||||
if (facet_on) {
|
if (facet_on) {
|
||||||
g <- ggplot2::ggplot(data = plot_data) +
|
g <- ggplot2::ggplot(data = plot_data) +
|
||||||
ggplot2::facet_wrap(~season, scales = "free_x") +
|
ggplot2::facet_wrap(~season, scales = "free_x") +
|
||||||
|
|
@ -502,12 +502,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
benchmark_subset <- benchmark_data %>%
|
benchmark_subset <- benchmark_data %>%
|
||||||
dplyr::filter(ci_type == ci_type_filter) %>%
|
dplyr::filter(ci_type == ci_type_filter) %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
benchmark_x = if (x_var == "DOY") {
|
benchmark_x = if (x_var == "DAH") {
|
||||||
DOY
|
DAH
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
DOY / 7 # Approximate conversion
|
DAH / 7 # Approximate conversion
|
||||||
} else {
|
} else {
|
||||||
DOY # For Date, use DOY as is (may not align perfectly)
|
DAH # For Date, use DAH as is (may not align perfectly)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
ggplot2::geom_smooth(
|
ggplot2::geom_smooth(
|
||||||
|
|
@ -549,7 +549,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
x = x_label) +
|
x = x_label) +
|
||||||
color_scale +
|
color_scale +
|
||||||
{
|
{
|
||||||
if (x_var == "DOY") {
|
if (x_var == "DAH") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
|
|
@ -597,7 +597,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
|
|
||||||
# Determine x-axis variable based on x_unit parameter
|
# Determine x-axis variable based on x_unit parameter
|
||||||
x_var <- if (x_unit == "days") {
|
x_var <- if (x_unit == "days") {
|
||||||
if (facet_on) "Date" else "DOY"
|
if (facet_on) "Date" else "DAH"
|
||||||
} else {
|
} else {
|
||||||
"week"
|
"week"
|
||||||
}
|
}
|
||||||
|
|
@ -620,12 +620,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
if (!is.null(benchmark_data)) {
|
if (!is.null(benchmark_data)) {
|
||||||
benchmark_subset <- benchmark_data %>%
|
benchmark_subset <- benchmark_data %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
benchmark_x = if (x_var == "DOY") {
|
benchmark_x = if (x_var == "DAH") {
|
||||||
DOY
|
DAH
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
DOY / 7
|
DAH / 7
|
||||||
} else {
|
} else {
|
||||||
DOY
|
DAH
|
||||||
},
|
},
|
||||||
ci_type_label = case_when(
|
ci_type_label = case_when(
|
||||||
ci_type == "value" ~ "10-Day Rolling Mean CI",
|
ci_type == "value" ~ "10-Day Rolling Mean CI",
|
||||||
|
|
@ -673,7 +673,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
x = x_label) +
|
x = x_label) +
|
||||||
color_scale +
|
color_scale +
|
||||||
{
|
{
|
||||||
if (x_var == "DOY") {
|
if (x_var == "DAH") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
|
|
@ -840,11 +840,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
|
||||||
|
|
||||||
#' Computes historical percentile benchmarks for CI data per estate
|
#' Computes historical percentile benchmarks for CI data per estate
|
||||||
#'
|
#'
|
||||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns
|
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns
|
||||||
#' @param estate_name Name of the estate/client to filter data for
|
#' @param estate_name Name of the estate/client to filter data for
|
||||||
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
|
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
|
||||||
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
|
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
|
||||||
#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data
|
#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data
|
||||||
#'
|
#'
|
||||||
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
|
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
|
||||||
# Input validation
|
# Input validation
|
||||||
|
|
@ -873,7 +873,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
# Prepare data for both CI types
|
# Prepare data for both CI types
|
||||||
data_prepared <- data_filtered %>%
|
data_prepared <- data_filtered %>%
|
||||||
dplyr::ungroup() %>% # Ensure no existing groupings
|
dplyr::ungroup() %>% # Ensure no existing groupings
|
||||||
dplyr::select(DOY, value, cumulative_CI, season) %>%
|
dplyr::select(DAH, value, cumulative_CI, season) %>%
|
||||||
tidyr::pivot_longer(
|
tidyr::pivot_longer(
|
||||||
cols = c("value", "cumulative_CI"),
|
cols = c("value", "cumulative_CI"),
|
||||||
names_to = "ci_type",
|
names_to = "ci_type",
|
||||||
|
|
@ -881,9 +881,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
) %>%
|
) %>%
|
||||||
dplyr::filter(!is.na(ci_value)) # Remove NA values
|
dplyr::filter(!is.na(ci_value)) # Remove NA values
|
||||||
|
|
||||||
# Compute percentiles for each DOY and ci_type
|
# Compute percentiles for each DAH and ci_type
|
||||||
benchmarks <- data_prepared %>%
|
benchmarks <- data_prepared %>%
|
||||||
dplyr::group_by(DOY, ci_type) %>%
|
dplyr::group_by(DAH, ci_type) %>%
|
||||||
dplyr::summarise(
|
dplyr::summarise(
|
||||||
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
|
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
|
||||||
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
|
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
|
||||||
|
|
@ -891,7 +891,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
n_observations = n(),
|
n_observations = n(),
|
||||||
.groups = 'drop'
|
.groups = 'drop'
|
||||||
) %>%
|
) %>%
|
||||||
dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data
|
dplyr::filter(n_observations >= min_seasons) %>% # Only include DAHs with sufficient data
|
||||||
tidyr::pivot_longer(
|
tidyr::pivot_longer(
|
||||||
cols = c(p10, p50, p90),
|
cols = c(p10, p50, p90),
|
||||||
names_to = "percentile",
|
names_to = "percentile",
|
||||||
|
|
@ -908,7 +908,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
|
|
||||||
# Rename columns for clarity
|
# Rename columns for clarity
|
||||||
benchmarks <- benchmarks %>%
|
benchmarks <- benchmarks %>%
|
||||||
dplyr::select(DOY, ci_type, percentile, benchmark_value)
|
dplyr::select(DAH, ci_type, percentile, benchmark_value)
|
||||||
|
|
||||||
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
|
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
|
||||||
|
|
||||||
|
|
@ -1095,7 +1095,7 @@ get_field_priority_level <- function(cv, morans_i) {
|
||||||
#'
|
#'
|
||||||
#' @param field_name Name of the field to summarize
|
#' @param field_name Name of the field to summarize
|
||||||
#' @param field_details_table Data frame with field-level KPI details
|
#' @param field_details_table Data frame with field-level KPI details
|
||||||
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns
|
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns
|
||||||
#' @param report_date Report date (used for filtering current season data)
|
#' @param report_date Report date (used for filtering current season data)
|
||||||
#' @return Formatted text string with field KPI summary
|
#' @return Formatted text string with field KPI summary
|
||||||
#'
|
#'
|
||||||
|
|
@ -1116,10 +1116,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
|
||||||
}
|
}
|
||||||
current_season <- current_season_data %>% pull(season)
|
current_season <- current_season_data %>% pull(season)
|
||||||
|
|
||||||
# Get the most recent DOY from the current season
|
# Get the most recent DAH from the current season
|
||||||
field_age_data <- CI_quadrant %>%
|
field_age_data <- CI_quadrant %>%
|
||||||
filter(field == field_name, season == current_season) %>%
|
filter(field == field_name, season == current_season) %>%
|
||||||
pull(DOY)
|
pull(DAH)
|
||||||
|
|
||||||
field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_
|
field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_
|
||||||
# Filter data for this specific field
|
# Filter data for this specific field
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue