From 1500bbcb1ce4dcf49bbda253a53cdb376fc11208 Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 18 Feb 2026 13:26:32 +0100 Subject: [PATCH] Refactor KPI calculations and reporting utilities; normalize field details columns, update area change metrics, and enhance .gitignore for PNG exceptions. --- python_app/harvest_date_pred_utils.py | 4 +- r_app/.gitignore | 3 +- r_app/80_utils_agronomic_support.R | 37 +++++----- r_app/80_utils_cane_supply.R | 34 +++------ r_app/80_utils_common.R | 8 ++- ..._CI_report_with_kpis_agronomic_support.Rmd | 69 +++++++------------ r_app/report_utils.R | 31 ++++++++- 7 files changed, 93 insertions(+), 93 deletions(-) diff --git a/python_app/harvest_date_pred_utils.py b/python_app/harvest_date_pred_utils.py index eacb11f..d746e3a 100644 --- a/python_app/harvest_date_pred_utils.py +++ b/python_app/harvest_date_pred_utils.py @@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame: def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device, threshold=0.45, consecutive_days=2): """ - Phase 1: Growing window detection with DOY season reset. + Phase 1: Growing window detection with DAH season reset. - For each detected harvest, reset DOY counter for the next season. + For each detected harvest, reset DAH counter for the next season. This allows the model to detect multiple consecutive harvests in multi-year data. """ harvest_dates = [] diff --git a/r_app/.gitignore b/r_app/.gitignore index 7cd7b7e..3efde43 100644 --- a/r_app/.gitignore +++ b/r_app/.gitignore @@ -14,8 +14,7 @@ renv # EXCEPTIONS: Explicitly track intentional PNG assets # Uncomment or add lines below for PNG files that should be committed to git -!r_app/CI_graph_example.png - +!CI_graph_example.png # Ignore files related to Rproj .Rproj.user/ .Rhistory diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 9dcf3db..9354d44 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -181,12 +181,18 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ #' @param previous_stats Previous week field statistics #' #' @return Data frame with field-level CI changes -calculate_area_change_kpi <- function(current_stats, previous_stats) { +calculate_area_change_kpi <- function(current_stats, previous_stats, field_boundaries_sf = NULL) { + + # Initialize field index vector + field_idx_vec <- seq_len(nrow(current_stats)) + if (!is.null(field_boundaries_sf) && "Field_id" %in% names(current_stats)) { + field_idx_vec <- match(current_stats$Field_id, field_boundaries_sf$field) + } # Initialize result data frame result <- data.frame( - field_idx = seq_len(nrow(current_stats)), - mean_ci_pct_change = NA_real_, + field_idx = field_idx_vec, + mean_ci_abs_change = NA_real_, interpretation = NA_character_, stringsAsFactors = FALSE ) @@ -223,19 +229,19 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) { # Find matching previous CI value prev_ci <- prev_lookup[[as.character(current_field_id)]] - if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci) && prev_ci > 0) { - # Calculate percentage change - pct_change <- ((current_ci - prev_ci) / prev_ci) * 100 - result$mean_ci_pct_change[i] <- round(pct_change, 2) + if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci)) { + # Calculate absolute change (CI units) + abs_change <- current_ci - prev_ci + result$mean_ci_abs_change[i] <- round(abs_change, 2) # Add interpretation - if (pct_change > 15) { + if (abs_change > 0.5) { result$interpretation[i] <- "Rapid growth" - } else if (pct_change > 5) { + } else if (abs_change > 0.2) { result$interpretation[i] <- "Positive growth" - } else if (pct_change > -5) { + } else if (abs_change >= -0.2) { result$interpretation[i] <- "Stable" - } else if (pct_change > -15) { + } else if (abs_change >= -0.5) { result$interpretation[i] <- "Declining" } else { result$interpretation[i] <- "Rapid decline" @@ -243,8 +249,7 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) { } else { result$interpretation[i] <- "No previous data" } - } - + } return(result) } @@ -557,7 +562,7 @@ create_summary_tables <- function(all_kpis) { select(field_idx, cv_value, uniformity_category, interpretation), area_change = all_kpis$area_change %>% - select(field_idx, mean_ci_pct_change, interpretation), + select(field_idx, mean_ci_abs_change, interpretation), tch_forecast = all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted), @@ -633,7 +638,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee result <- result %>% left_join( all_kpis$area_change %>% - select(field_idx, Weekly_CI_Change = mean_ci_pct_change, + select(field_idx, Weekly_CI_Change = mean_ci_abs_change, Area_Change_Interpretation = interpretation), by = "field_idx" ) @@ -918,7 +923,7 @@ calculate_all_field_analysis_agronomic_support <- function( } else { area_change_kpi <- data.frame( field_idx = seq_len(nrow(field_boundaries_sf)), - mean_ci_pct_change = NA_real_, + mean_ci_abs_change = NA_real_, interpretation = rep("No previous data", nrow(field_boundaries_sf)) ) } diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 6de2778..693fe0f 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -45,13 +45,16 @@ CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase #' @return data.frame with field and acreage columns calculate_field_acreages <- function(field_boundaries_sf) { tryCatch({ - lookup_df <- field_boundaries_sf %>% + # Project to equal-area CRS (EPSG:6933) for accurate area calculations + field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933") + + lookup_df <- field_boundaries_proj %>% sf::st_drop_geometry() %>% as.data.frame() %>% mutate( - geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { + geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) { tryCatch({ - sf::st_is_valid(field_boundaries_sf[idx, ]) + sf::st_is_valid(field_boundaries_proj[idx, ]) }, error = function(e) FALSE) }), area_ha = 0 @@ -61,7 +64,7 @@ calculate_field_acreages <- function(field_boundaries_sf) { valid_indices <- which(lookup_df$geometry_valid) areas_ha <- vapply(valid_indices, function(idx) { tryCatch({ - area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) + area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ])) area_m2 / 10000 }, error = function(e) NA_real_) }, numeric(1)) @@ -555,23 +558,7 @@ calculate_field_analysis_cane_supply <- function(setup, # ========== PHASE 3: LOAD PLANTING DATES ========== message("\nLoading harvest data from harvest.xlsx for planting dates...") - harvest_file_path <- file.path(data_dir, "harvest.xlsx") - - harvesting_data <- tryCatch({ - if (file.exists(harvest_file_path)) { - harvest_raw <- readxl::read_excel(harvest_file_path) - harvest_raw$season_start <- as.Date(harvest_raw$season_start) - harvest_raw$season_end <- as.Date(harvest_raw$season_end) - message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) - harvest_raw - } else { - message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) - NULL - } - }, error = function(e) { - message(paste(" ERROR loading harvest.xlsx:", e$message)) - NULL - }) + harvesting_data <- load_harvesting_data(data_dir) planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) @@ -628,9 +615,8 @@ calculate_field_analysis_cane_supply <- function(setup, # ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== message("\n4. Loading harvest probabilities from script 31...") - harvest_prob_dir <- setup$kpi_field_stats_dir - harvest_prob_file <- file.path(harvest_prob_dir, - sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) + # Use get_harvest_output_path() to safely build path (avoids NULL setup$kpi_field_stats_dir) + harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year) message(paste(" Looking for:", harvest_prob_file)) imminent_prob_data <- tryCatch({ diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index d3d2ca2..071b025 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -406,7 +406,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { # Gap score using 2σ below median to detect outliers median_ci <- median(valid_values) sd_ci <- sd(valid_values) - outlier_threshold <- median_ci - (2 * sd_ci) + outlier_threshold <- median_ci - (1 * sd_ci) low_ci_pixels <- sum(valid_values < outlier_threshold) total_pixels <- length(valid_values) gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) @@ -415,7 +415,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { gap_level <- dplyr::case_when( gap_score < 10 ~ "Minimal", gap_score < 25 ~ "Moderate", - TRUE ~ "Significant" + gap_score >= 25 ~ "Significant", + TRUE ~ NA_character_ ) field_results <- rbind(field_results, data.frame( @@ -1603,7 +1604,8 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu dplyr::group_by(sub_field, season) %>% dplyr::slice(which.max(DAH)) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>% - dplyr::mutate(CI_per_day = cumulative_CI / DAH) + dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_)) + # Define predictors and response variables predictors <- c("cumulative_CI", "DAH", "CI_per_day") diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 60f9d93..5b8f268 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -179,24 +179,15 @@ if (dir.exists(kpi_data_dir)) { safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields")) safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", "))) - # NORMALIZATION: Ensure critical column names match downstream expectations - # Handle both "Field" and "Field_id" naming conventions - if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { - field_details_table <- field_details_table %>% - dplyr::rename(Field_id = Field) - safe_log(" ✓ Normalized: renamed Field → Field_id") - } + # NORMALIZATION: Normalize column structure (Field→Field_id rename + expected_cols injection) + field_details_table <- normalize_field_details_columns(field_details_table) # Normalize other common column name variations column_mappings <- list( c("CV Value", "CV"), - c("CV", "CV"), # Keep as-is c("Mean CI", "Mean_CI"), - c("Mean_CI", "Mean_CI"), # Keep as-is c("Yield Forecast (t/ha)", "TCH_Forecasted"), - c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is c("Gap Score", "Gap_Score"), - c("Gap_Score", "Gap_Score"), # Keep as-is c("Growth Uniformity", "Growth_Uniformity"), c("Decline Risk", "Decline_Risk"), c("Patchiness Risk", "Patchiness_Risk"), @@ -721,6 +712,7 @@ generate_field_alerts <- function(field_details_table) { highest_patchiness_risk = case_when( any(`Patchiness Risk` == "High") ~ "High", any(`Patchiness Risk` == "Medium") ~ "Medium", + any(`Patchiness Risk` == "Low") ~ "Low", any(`Patchiness Risk` == "Minimal") ~ "Minimal", TRUE ~ "Unknown" ), @@ -1510,25 +1502,9 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field } # Join field sizes and ages to KPI data, simplified column selection - # DEFENSIVE: Normalize field_details_table column names one more time before joining - # Ensure it has Field_id column (regardless of whether it was from RDS or fallback) - if (!is.null(field_details_table) && nrow(field_details_table) > 0) { - # If Field exists but Field_id doesn't, rename it - if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { - field_details_table <- field_details_table %>% - dplyr::rename(Field_id = Field) - } - - # Ensure all expected KPI columns exist; add as NA if missing - expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", - "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation", - "Decline_Severity", "Patchiness_Risk") - for (col in expected_cols) { - if (!col %in% names(field_details_table)) { - field_details_table[[col]] <- NA - } - } - } + # DEFENSIVE: Normalize field_details_table column structure before joining + # Uses shared normalization function to ensure Field_id exists and all expected columns are present + field_details_table <- normalize_field_details_columns(field_details_table) field_details_clean <- field_details_table %>% left_join(field_sizes_df, by = c("Field_id" = "field")) %>% @@ -1540,7 +1516,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field field_size_acres = round(field_size_acres, 1), Mean_CI = round(Mean_CI, 2), CV = round(CV, 2), - Gap_Score = round(Gap_Score, 0), + Gap_Score = round(Gap_Score, 2), TCH_Forecasted = round(TCH_Forecasted, 1) ) @@ -1554,7 +1530,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field `Mean CI` = Mean_CI, `Weekly CI Change` = Weekly_CI_Change, `Yield Forecast (t/ha)` = TCH_Forecasted, - `Gap Score` = Gap_Score, + `Gap Score %` = Gap_Score, `Decline Risk` = Decline_Severity, `Patchiness Risk` = Patchiness_Risk, `CV Value` = CV @@ -1566,7 +1542,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field `Field Size (acres)` = field_size_acres, `Mean CI` = Mean_CI, `Yield Forecast (t/ha)` = TCH_Forecasted, - `Gap Score` = Gap_Score, + `Gap Score %` = Gap_Score, `Decline Risk` = Decline_Severity, `Patchiness Risk` = Patchiness_Risk, `CV Value` = CV @@ -1639,13 +1615,13 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to - **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease. - **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines - - **Calculation:** ((Current Mean CI − Previous Mean CI) / Previous Mean CI) × 100 + - **Calculation:** Current Mean CI − Previous Mean CI (absolute change in CI units) - **Categories:** - - **Rapid growth:** > 15% increase (excellent weekly progress) - - **Positive growth:** 5–15% increase (steady improvement) - - **Stable:** −5% to +5% (field maintained, no significant change) - - **Declining:** −5% to −15% (slow decline, warrant closer monitoring) - - **Rapid decline:** < −15% (alert: urgent issue requiring investigation) + - **Rapid growth:** > +0.5 (excellent weekly progress) + - **Positive growth:** +0.2 to +0.5 (steady improvement) + - **Stable:** −0.2 to +0.2 (field maintained, no significant change) + - **Declining:** −0.5 to −0.2 (slow decline, warrant closer monitoring) + - **Rapid decline:** < −0.5 (alert: urgent issue requiring investigation) - **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention. - **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields @@ -1692,12 +1668,15 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.** - **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment - - **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution) - - **Levels:** - - **Minimal:** < 10% of field (good establishment, limited replanting needed) - - **Moderate:** 10–25% of field (monitor gap expansion, coordinate with agronomy) - - **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation) - - **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention. + - **Calculation Method:** Statistical outlier detection (2σ method) + - Identifies pixels with CI below: **Median CI − (2 × Standard Deviation)** + - Calculates: **Gap Score = (Outlier Pixels / Total Pixels) × 100** + - Example: If 2 of 100 pixels fall below threshold → Gap Score = 2% + - **Score Ranges & Interpretation:** + - **0–10%:** Minimal gaps (excellent establishment, healthy field) + - **10–25%:** Moderate gaps (monitor for expansion, coordinate with agronomy) + - **≥ 25%:** Significant gaps (consider targeted replanting or rehabilitation) + - **Why it matters:** Gap scores reveal areas of poor establishment that may indicate early growth problems or harvest-related residue issues. Lower is better (0–3% is typical for healthy fields). 2. **Overview Map: Growth on Farm:** Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 8b3afa1..38ecbd2 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -1165,4 +1165,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr }) } - +#' Normalize field_details_table column structure +#' +#' Standardizes column names and ensures all expected KPI columns exist. +#' Handles Field → Field_id rename and injects missing columns as NA. +#' +#' @param field_details_table data.frame to normalize +#' @return data.frame with standardized column structure +normalize_field_details_columns <- function(field_details_table) { + if (is.null(field_details_table) || nrow(field_details_table) == 0) { + return(field_details_table) + } + + # Rename Field → Field_id if needed + if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { + field_details_table <- field_details_table %>% + dplyr::rename(Field_id = Field) + } + + # Ensure all expected KPI columns exist; add as NA if missing + expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", + "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation", + "Decline_Severity", "Patchiness_Risk") + for (col in expected_cols) { + if (!col %in% names(field_details_table)) { + field_details_table[[col]] <- NA + } + } + + return(field_details_table) +}