Refactor KPI calculations and reporting utilities; normalize field details columns, update area change metrics, and enhance .gitignore for PNG exceptions.

This commit is contained in:
Timon 2026-02-18 13:26:32 +01:00
parent b2d4093601
commit 1500bbcb1c
7 changed files with 93 additions and 93 deletions

View file

@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
threshold=0.45, consecutive_days=2):
"""
Phase 1: Growing window detection with DOY season reset.
Phase 1: Growing window detection with DAH season reset.
For each detected harvest, reset DOY counter for the next season.
For each detected harvest, reset DAH counter for the next season.
This allows the model to detect multiple consecutive harvests in multi-year data.
"""
harvest_dates = []

3
r_app/.gitignore vendored
View file

@ -14,8 +14,7 @@ renv
# EXCEPTIONS: Explicitly track intentional PNG assets
# Uncomment or add lines below for PNG files that should be committed to git
!r_app/CI_graph_example.png
!CI_graph_example.png
# Ignore files related to Rproj
.Rproj.user/
.Rhistory

View file

@ -181,12 +181,18 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
#' @param previous_stats Previous week field statistics
#'
#' @return Data frame with field-level CI changes
calculate_area_change_kpi <- function(current_stats, previous_stats) {
calculate_area_change_kpi <- function(current_stats, previous_stats, field_boundaries_sf = NULL) {
# Initialize field index vector
field_idx_vec <- seq_len(nrow(current_stats))
if (!is.null(field_boundaries_sf) && "Field_id" %in% names(current_stats)) {
field_idx_vec <- match(current_stats$Field_id, field_boundaries_sf$field)
}
# Initialize result data frame
result <- data.frame(
field_idx = seq_len(nrow(current_stats)),
mean_ci_pct_change = NA_real_,
field_idx = field_idx_vec,
mean_ci_abs_change = NA_real_,
interpretation = NA_character_,
stringsAsFactors = FALSE
)
@ -223,19 +229,19 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) {
# Find matching previous CI value
prev_ci <- prev_lookup[[as.character(current_field_id)]]
if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci) && prev_ci > 0) {
# Calculate percentage change
pct_change <- ((current_ci - prev_ci) / prev_ci) * 100
result$mean_ci_pct_change[i] <- round(pct_change, 2)
if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci)) {
# Calculate absolute change (CI units)
abs_change <- current_ci - prev_ci
result$mean_ci_abs_change[i] <- round(abs_change, 2)
# Add interpretation
if (pct_change > 15) {
if (abs_change > 0.5) {
result$interpretation[i] <- "Rapid growth"
} else if (pct_change > 5) {
} else if (abs_change > 0.2) {
result$interpretation[i] <- "Positive growth"
} else if (pct_change > -5) {
} else if (abs_change >= -0.2) {
result$interpretation[i] <- "Stable"
} else if (pct_change > -15) {
} else if (abs_change >= -0.5) {
result$interpretation[i] <- "Declining"
} else {
result$interpretation[i] <- "Rapid decline"
@ -243,8 +249,7 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) {
} else {
result$interpretation[i] <- "No previous data"
}
}
}
return(result)
}
@ -557,7 +562,7 @@ create_summary_tables <- function(all_kpis) {
select(field_idx, cv_value, uniformity_category, interpretation),
area_change = all_kpis$area_change %>%
select(field_idx, mean_ci_pct_change, interpretation),
select(field_idx, mean_ci_abs_change, interpretation),
tch_forecast = all_kpis$tch_forecasted %>%
select(field_idx, tch_forecasted),
@ -633,7 +638,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
result <- result %>%
left_join(
all_kpis$area_change %>%
select(field_idx, Weekly_CI_Change = mean_ci_pct_change,
select(field_idx, Weekly_CI_Change = mean_ci_abs_change,
Area_Change_Interpretation = interpretation),
by = "field_idx"
)
@ -918,7 +923,7 @@ calculate_all_field_analysis_agronomic_support <- function(
} else {
area_change_kpi <- data.frame(
field_idx = seq_len(nrow(field_boundaries_sf)),
mean_ci_pct_change = NA_real_,
mean_ci_abs_change = NA_real_,
interpretation = rep("No previous data", nrow(field_boundaries_sf))
)
}

View file

@ -45,13 +45,16 @@ CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase
#' @return data.frame with field and acreage columns
calculate_field_acreages <- function(field_boundaries_sf) {
tryCatch({
lookup_df <- field_boundaries_sf %>%
# Project to equal-area CRS (EPSG:6933) for accurate area calculations
field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933")
lookup_df <- field_boundaries_proj %>%
sf::st_drop_geometry() %>%
as.data.frame() %>%
mutate(
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) {
tryCatch({
sf::st_is_valid(field_boundaries_sf[idx, ])
sf::st_is_valid(field_boundaries_proj[idx, ])
}, error = function(e) FALSE)
}),
area_ha = 0
@ -61,7 +64,7 @@ calculate_field_acreages <- function(field_boundaries_sf) {
valid_indices <- which(lookup_df$geometry_valid)
areas_ha <- vapply(valid_indices, function(idx) {
tryCatch({
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ]))
area_m2 / 10000
}, error = function(e) NA_real_)
}, numeric(1))
@ -555,23 +558,7 @@ calculate_field_analysis_cane_supply <- function(setup,
# ========== PHASE 3: LOAD PLANTING DATES ==========
message("\nLoading harvest data from harvest.xlsx for planting dates...")
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
harvesting_data <- tryCatch({
if (file.exists(harvest_file_path)) {
harvest_raw <- readxl::read_excel(harvest_file_path)
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
harvest_raw
} else {
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
NULL
}
}, error = function(e) {
message(paste(" ERROR loading harvest.xlsx:", e$message))
NULL
})
harvesting_data <- load_harvesting_data(data_dir)
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
@ -628,9 +615,8 @@ calculate_field_analysis_cane_supply <- function(setup,
# ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_dir <- setup$kpi_field_stats_dir
harvest_prob_file <- file.path(harvest_prob_dir,
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
# Use get_harvest_output_path() to safely build path (avoids NULL setup$kpi_field_stats_dir)
harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year)
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({

View file

@ -406,7 +406,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
# Gap score using 2σ below median to detect outliers
median_ci <- median(valid_values)
sd_ci <- sd(valid_values)
outlier_threshold <- median_ci - (2 * sd_ci)
outlier_threshold <- median_ci - (1 * sd_ci)
low_ci_pixels <- sum(valid_values < outlier_threshold)
total_pixels <- length(valid_values)
gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
@ -415,7 +415,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
gap_level <- dplyr::case_when(
gap_score < 10 ~ "Minimal",
gap_score < 25 ~ "Moderate",
TRUE ~ "Significant"
gap_score >= 25 ~ "Significant",
TRUE ~ NA_character_
)
field_results <- rbind(field_results, data.frame(
@ -1603,7 +1604,8 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DAH)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DAH)
dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_))
# Define predictors and response variables
predictors <- c("cumulative_CI", "DAH", "CI_per_day")

View file

@ -179,24 +179,15 @@ if (dir.exists(kpi_data_dir)) {
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
# NORMALIZATION: Ensure critical column names match downstream expectations
# Handle both "Field" and "Field_id" naming conventions
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
safe_log(" ✓ Normalized: renamed Field → Field_id")
}
# NORMALIZATION: Normalize column structure (Field→Field_id rename + expected_cols injection)
field_details_table <- normalize_field_details_columns(field_details_table)
# Normalize other common column name variations
column_mappings <- list(
c("CV Value", "CV"),
c("CV", "CV"), # Keep as-is
c("Mean CI", "Mean_CI"),
c("Mean_CI", "Mean_CI"), # Keep as-is
c("Yield Forecast (t/ha)", "TCH_Forecasted"),
c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is
c("Gap Score", "Gap_Score"),
c("Gap_Score", "Gap_Score"), # Keep as-is
c("Growth Uniformity", "Growth_Uniformity"),
c("Decline Risk", "Decline_Risk"),
c("Patchiness Risk", "Patchiness_Risk"),
@ -721,6 +712,7 @@ generate_field_alerts <- function(field_details_table) {
highest_patchiness_risk = case_when(
any(`Patchiness Risk` == "High") ~ "High",
any(`Patchiness Risk` == "Medium") ~ "Medium",
any(`Patchiness Risk` == "Low") ~ "Low",
any(`Patchiness Risk` == "Minimal") ~ "Minimal",
TRUE ~ "Unknown"
),
@ -1510,25 +1502,9 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
}
# Join field sizes and ages to KPI data, simplified column selection
# DEFENSIVE: Normalize field_details_table column names one more time before joining
# Ensure it has Field_id column (regardless of whether it was from RDS or fallback)
if (!is.null(field_details_table) && nrow(field_details_table) > 0) {
# If Field exists but Field_id doesn't, rename it
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
}
# Ensure all expected KPI columns exist; add as NA if missing
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Patchiness_Risk")
for (col in expected_cols) {
if (!col %in% names(field_details_table)) {
field_details_table[[col]] <- NA
}
}
}
# DEFENSIVE: Normalize field_details_table column structure before joining
# Uses shared normalization function to ensure Field_id exists and all expected columns are present
field_details_table <- normalize_field_details_columns(field_details_table)
field_details_clean <- field_details_table %>%
left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
@ -1540,7 +1516,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
field_size_acres = round(field_size_acres, 1),
Mean_CI = round(Mean_CI, 2),
CV = round(CV, 2),
Gap_Score = round(Gap_Score, 0),
Gap_Score = round(Gap_Score, 2),
TCH_Forecasted = round(TCH_Forecasted, 1)
)
@ -1554,7 +1530,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
`Mean CI` = Mean_CI,
`Weekly CI Change` = Weekly_CI_Change,
`Yield Forecast (t/ha)` = TCH_Forecasted,
`Gap Score` = Gap_Score,
`Gap Score %` = Gap_Score,
`Decline Risk` = Decline_Severity,
`Patchiness Risk` = Patchiness_Risk,
`CV Value` = CV
@ -1566,7 +1542,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
`Field Size (acres)` = field_size_acres,
`Mean CI` = Mean_CI,
`Yield Forecast (t/ha)` = TCH_Forecasted,
`Gap Score` = Gap_Score,
`Gap Score %` = Gap_Score,
`Decline Risk` = Decline_Severity,
`Patchiness Risk` = Patchiness_Risk,
`CV Value` = CV
@ -1639,13 +1615,13 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
- **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease.
- **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines
- **Calculation:** ((Current Mean CI Previous Mean CI) / Previous Mean CI) × 100
- **Calculation:** Current Mean CI Previous Mean CI (absolute change in CI units)
- **Categories:**
- **Rapid growth:** > 15% increase (excellent weekly progress)
- **Positive growth:** 515% increase (steady improvement)
- **Stable:** 5% to +5% (field maintained, no significant change)
- **Declining:** 5% to 15% (slow decline, warrant closer monitoring)
- **Rapid decline:** < 15% (alert: urgent issue requiring investigation)
- **Rapid growth:** > +0.5 (excellent weekly progress)
- **Positive growth:** +0.2 to +0.5 (steady improvement)
- **Stable:** 0.2 to +0.2 (field maintained, no significant change)
- **Declining:** 0.5 to 0.2 (slow decline, warrant closer monitoring)
- **Rapid decline:** < 0.5 (alert: urgent issue requiring investigation)
- **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention.
- **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields
@ -1692,12 +1668,15 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.**
- **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment
- **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution)
- **Levels:**
- **Minimal:** < 10% of field (good establishment, limited replanting needed)
- **Moderate:** 1025% of field (monitor gap expansion, coordinate with agronomy)
- **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation)
- **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention.
- **Calculation Method:** Statistical outlier detection (2σ method)
- Identifies pixels with CI below: **Median CI (2 × Standard Deviation)**
- Calculates: **Gap Score = (Outlier Pixels / Total Pixels) × 100**
- Example: If 2 of 100 pixels fall below threshold → Gap Score = 2%
- **Score Ranges & Interpretation:**
- **010%:** Minimal gaps (excellent establishment, healthy field)
- **1025%:** Moderate gaps (monitor for expansion, coordinate with agronomy)
- **≥ 25%:** Significant gaps (consider targeted replanting or rehabilitation)
- **Why it matters:** Gap scores reveal areas of poor establishment that may indicate early growth problems or harvest-related residue issues. Lower is better (03% is typical for healthy fields).
2. **Overview Map: Growth on Farm:**
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.

View file

@ -1165,4 +1165,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
})
}
#' Normalize field_details_table column structure
#'
#' Standardizes column names and ensures all expected KPI columns exist.
#' Handles Field → Field_id rename and injects missing columns as NA.
#'
#' @param field_details_table data.frame to normalize
#' @return data.frame with standardized column structure
normalize_field_details_columns <- function(field_details_table) {
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
return(field_details_table)
}
# Rename Field → Field_id if needed
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
}
# Ensure all expected KPI columns exist; add as NA if missing
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Patchiness_Risk")
for (col in expected_cols) {
if (!col %in% names(field_details_table)) {
field_details_table[[col]] <- NA
}
}
return(field_details_table)
}