Refactor KPI calculations and reporting utilities; normalize field details columns, update area change metrics, and enhance .gitignore for PNG exceptions.

This commit is contained in:
Timon 2026-02-18 13:26:32 +01:00
parent b2d4093601
commit 1500bbcb1c
7 changed files with 93 additions and 93 deletions

View file

@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device, def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
threshold=0.45, consecutive_days=2): threshold=0.45, consecutive_days=2):
""" """
Phase 1: Growing window detection with DOY season reset. Phase 1: Growing window detection with DAH season reset.
For each detected harvest, reset DOY counter for the next season. For each detected harvest, reset DAH counter for the next season.
This allows the model to detect multiple consecutive harvests in multi-year data. This allows the model to detect multiple consecutive harvests in multi-year data.
""" """
harvest_dates = [] harvest_dates = []

3
r_app/.gitignore vendored
View file

@ -14,8 +14,7 @@ renv
# EXCEPTIONS: Explicitly track intentional PNG assets # EXCEPTIONS: Explicitly track intentional PNG assets
# Uncomment or add lines below for PNG files that should be committed to git # Uncomment or add lines below for PNG files that should be committed to git
!r_app/CI_graph_example.png !CI_graph_example.png
# Ignore files related to Rproj # Ignore files related to Rproj
.Rproj.user/ .Rproj.user/
.Rhistory .Rhistory

View file

@ -181,12 +181,18 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
#' @param previous_stats Previous week field statistics #' @param previous_stats Previous week field statistics
#' #'
#' @return Data frame with field-level CI changes #' @return Data frame with field-level CI changes
calculate_area_change_kpi <- function(current_stats, previous_stats) { calculate_area_change_kpi <- function(current_stats, previous_stats, field_boundaries_sf = NULL) {
# Initialize field index vector
field_idx_vec <- seq_len(nrow(current_stats))
if (!is.null(field_boundaries_sf) && "Field_id" %in% names(current_stats)) {
field_idx_vec <- match(current_stats$Field_id, field_boundaries_sf$field)
}
# Initialize result data frame # Initialize result data frame
result <- data.frame( result <- data.frame(
field_idx = seq_len(nrow(current_stats)), field_idx = field_idx_vec,
mean_ci_pct_change = NA_real_, mean_ci_abs_change = NA_real_,
interpretation = NA_character_, interpretation = NA_character_,
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
@ -223,19 +229,19 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) {
# Find matching previous CI value # Find matching previous CI value
prev_ci <- prev_lookup[[as.character(current_field_id)]] prev_ci <- prev_lookup[[as.character(current_field_id)]]
if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci) && prev_ci > 0) { if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci)) {
# Calculate percentage change # Calculate absolute change (CI units)
pct_change <- ((current_ci - prev_ci) / prev_ci) * 100 abs_change <- current_ci - prev_ci
result$mean_ci_pct_change[i] <- round(pct_change, 2) result$mean_ci_abs_change[i] <- round(abs_change, 2)
# Add interpretation # Add interpretation
if (pct_change > 15) { if (abs_change > 0.5) {
result$interpretation[i] <- "Rapid growth" result$interpretation[i] <- "Rapid growth"
} else if (pct_change > 5) { } else if (abs_change > 0.2) {
result$interpretation[i] <- "Positive growth" result$interpretation[i] <- "Positive growth"
} else if (pct_change > -5) { } else if (abs_change >= -0.2) {
result$interpretation[i] <- "Stable" result$interpretation[i] <- "Stable"
} else if (pct_change > -15) { } else if (abs_change >= -0.5) {
result$interpretation[i] <- "Declining" result$interpretation[i] <- "Declining"
} else { } else {
result$interpretation[i] <- "Rapid decline" result$interpretation[i] <- "Rapid decline"
@ -243,8 +249,7 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) {
} else { } else {
result$interpretation[i] <- "No previous data" result$interpretation[i] <- "No previous data"
} }
} }
return(result) return(result)
} }
@ -557,7 +562,7 @@ create_summary_tables <- function(all_kpis) {
select(field_idx, cv_value, uniformity_category, interpretation), select(field_idx, cv_value, uniformity_category, interpretation),
area_change = all_kpis$area_change %>% area_change = all_kpis$area_change %>%
select(field_idx, mean_ci_pct_change, interpretation), select(field_idx, mean_ci_abs_change, interpretation),
tch_forecast = all_kpis$tch_forecasted %>% tch_forecast = all_kpis$tch_forecasted %>%
select(field_idx, tch_forecasted), select(field_idx, tch_forecasted),
@ -633,7 +638,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
result <- result %>% result <- result %>%
left_join( left_join(
all_kpis$area_change %>% all_kpis$area_change %>%
select(field_idx, Weekly_CI_Change = mean_ci_pct_change, select(field_idx, Weekly_CI_Change = mean_ci_abs_change,
Area_Change_Interpretation = interpretation), Area_Change_Interpretation = interpretation),
by = "field_idx" by = "field_idx"
) )
@ -918,7 +923,7 @@ calculate_all_field_analysis_agronomic_support <- function(
} else { } else {
area_change_kpi <- data.frame( area_change_kpi <- data.frame(
field_idx = seq_len(nrow(field_boundaries_sf)), field_idx = seq_len(nrow(field_boundaries_sf)),
mean_ci_pct_change = NA_real_, mean_ci_abs_change = NA_real_,
interpretation = rep("No previous data", nrow(field_boundaries_sf)) interpretation = rep("No previous data", nrow(field_boundaries_sf))
) )
} }

View file

@ -45,13 +45,16 @@ CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase
#' @return data.frame with field and acreage columns #' @return data.frame with field and acreage columns
calculate_field_acreages <- function(field_boundaries_sf) { calculate_field_acreages <- function(field_boundaries_sf) {
tryCatch({ tryCatch({
lookup_df <- field_boundaries_sf %>% # Project to equal-area CRS (EPSG:6933) for accurate area calculations
field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933")
lookup_df <- field_boundaries_proj %>%
sf::st_drop_geometry() %>% sf::st_drop_geometry() %>%
as.data.frame() %>% as.data.frame() %>%
mutate( mutate(
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) {
tryCatch({ tryCatch({
sf::st_is_valid(field_boundaries_sf[idx, ]) sf::st_is_valid(field_boundaries_proj[idx, ])
}, error = function(e) FALSE) }, error = function(e) FALSE)
}), }),
area_ha = 0 area_ha = 0
@ -61,7 +64,7 @@ calculate_field_acreages <- function(field_boundaries_sf) {
valid_indices <- which(lookup_df$geometry_valid) valid_indices <- which(lookup_df$geometry_valid)
areas_ha <- vapply(valid_indices, function(idx) { areas_ha <- vapply(valid_indices, function(idx) {
tryCatch({ tryCatch({
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ]))
area_m2 / 10000 area_m2 / 10000
}, error = function(e) NA_real_) }, error = function(e) NA_real_)
}, numeric(1)) }, numeric(1))
@ -555,23 +558,7 @@ calculate_field_analysis_cane_supply <- function(setup,
# ========== PHASE 3: LOAD PLANTING DATES ========== # ========== PHASE 3: LOAD PLANTING DATES ==========
message("\nLoading harvest data from harvest.xlsx for planting dates...") message("\nLoading harvest data from harvest.xlsx for planting dates...")
harvest_file_path <- file.path(data_dir, "harvest.xlsx") harvesting_data <- load_harvesting_data(data_dir)
harvesting_data <- tryCatch({
if (file.exists(harvest_file_path)) {
harvest_raw <- readxl::read_excel(harvest_file_path)
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
harvest_raw
} else {
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
NULL
}
}, error = function(e) {
message(paste(" ERROR loading harvest.xlsx:", e$message))
NULL
})
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
@ -628,9 +615,8 @@ calculate_field_analysis_cane_supply <- function(setup,
# ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== # ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
message("\n4. Loading harvest probabilities from script 31...") message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_dir <- setup$kpi_field_stats_dir # Use get_harvest_output_path() to safely build path (avoids NULL setup$kpi_field_stats_dir)
harvest_prob_file <- file.path(harvest_prob_dir, harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year)
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
message(paste(" Looking for:", harvest_prob_file)) message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({ imminent_prob_data <- tryCatch({

View file

@ -406,7 +406,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
# Gap score using 2σ below median to detect outliers # Gap score using 2σ below median to detect outliers
median_ci <- median(valid_values) median_ci <- median(valid_values)
sd_ci <- sd(valid_values) sd_ci <- sd(valid_values)
outlier_threshold <- median_ci - (2 * sd_ci) outlier_threshold <- median_ci - (1 * sd_ci)
low_ci_pixels <- sum(valid_values < outlier_threshold) low_ci_pixels <- sum(valid_values < outlier_threshold)
total_pixels <- length(valid_values) total_pixels <- length(valid_values)
gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
@ -415,7 +415,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
gap_level <- dplyr::case_when( gap_level <- dplyr::case_when(
gap_score < 10 ~ "Minimal", gap_score < 10 ~ "Minimal",
gap_score < 25 ~ "Moderate", gap_score < 25 ~ "Moderate",
TRUE ~ "Significant" gap_score >= 25 ~ "Significant",
TRUE ~ NA_character_
) )
field_results <- rbind(field_results, data.frame( field_results <- rbind(field_results, data.frame(
@ -1603,7 +1604,8 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
dplyr::group_by(sub_field, season) %>% dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DAH)) %>% dplyr::slice(which.max(DAH)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DAH) dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_))
# Define predictors and response variables # Define predictors and response variables
predictors <- c("cumulative_CI", "DAH", "CI_per_day") predictors <- c("cumulative_CI", "DAH", "CI_per_day")

View file

@ -179,24 +179,15 @@ if (dir.exists(kpi_data_dir)) {
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields")) safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", "))) safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
# NORMALIZATION: Ensure critical column names match downstream expectations # NORMALIZATION: Normalize column structure (Field→Field_id rename + expected_cols injection)
# Handle both "Field" and "Field_id" naming conventions field_details_table <- normalize_field_details_columns(field_details_table)
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
safe_log(" ✓ Normalized: renamed Field → Field_id")
}
# Normalize other common column name variations # Normalize other common column name variations
column_mappings <- list( column_mappings <- list(
c("CV Value", "CV"), c("CV Value", "CV"),
c("CV", "CV"), # Keep as-is
c("Mean CI", "Mean_CI"), c("Mean CI", "Mean_CI"),
c("Mean_CI", "Mean_CI"), # Keep as-is
c("Yield Forecast (t/ha)", "TCH_Forecasted"), c("Yield Forecast (t/ha)", "TCH_Forecasted"),
c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is
c("Gap Score", "Gap_Score"), c("Gap Score", "Gap_Score"),
c("Gap_Score", "Gap_Score"), # Keep as-is
c("Growth Uniformity", "Growth_Uniformity"), c("Growth Uniformity", "Growth_Uniformity"),
c("Decline Risk", "Decline_Risk"), c("Decline Risk", "Decline_Risk"),
c("Patchiness Risk", "Patchiness_Risk"), c("Patchiness Risk", "Patchiness_Risk"),
@ -721,6 +712,7 @@ generate_field_alerts <- function(field_details_table) {
highest_patchiness_risk = case_when( highest_patchiness_risk = case_when(
any(`Patchiness Risk` == "High") ~ "High", any(`Patchiness Risk` == "High") ~ "High",
any(`Patchiness Risk` == "Medium") ~ "Medium", any(`Patchiness Risk` == "Medium") ~ "Medium",
any(`Patchiness Risk` == "Low") ~ "Low",
any(`Patchiness Risk` == "Minimal") ~ "Minimal", any(`Patchiness Risk` == "Minimal") ~ "Minimal",
TRUE ~ "Unknown" TRUE ~ "Unknown"
), ),
@ -1510,25 +1502,9 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
} }
# Join field sizes and ages to KPI data, simplified column selection # Join field sizes and ages to KPI data, simplified column selection
# DEFENSIVE: Normalize field_details_table column names one more time before joining # DEFENSIVE: Normalize field_details_table column structure before joining
# Ensure it has Field_id column (regardless of whether it was from RDS or fallback) # Uses shared normalization function to ensure Field_id exists and all expected columns are present
if (!is.null(field_details_table) && nrow(field_details_table) > 0) { field_details_table <- normalize_field_details_columns(field_details_table)
# If Field exists but Field_id doesn't, rename it
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
}
# Ensure all expected KPI columns exist; add as NA if missing
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Patchiness_Risk")
for (col in expected_cols) {
if (!col %in% names(field_details_table)) {
field_details_table[[col]] <- NA
}
}
}
field_details_clean <- field_details_table %>% field_details_clean <- field_details_table %>%
left_join(field_sizes_df, by = c("Field_id" = "field")) %>% left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
@ -1540,7 +1516,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
field_size_acres = round(field_size_acres, 1), field_size_acres = round(field_size_acres, 1),
Mean_CI = round(Mean_CI, 2), Mean_CI = round(Mean_CI, 2),
CV = round(CV, 2), CV = round(CV, 2),
Gap_Score = round(Gap_Score, 0), Gap_Score = round(Gap_Score, 2),
TCH_Forecasted = round(TCH_Forecasted, 1) TCH_Forecasted = round(TCH_Forecasted, 1)
) )
@ -1554,7 +1530,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
`Mean CI` = Mean_CI, `Mean CI` = Mean_CI,
`Weekly CI Change` = Weekly_CI_Change, `Weekly CI Change` = Weekly_CI_Change,
`Yield Forecast (t/ha)` = TCH_Forecasted, `Yield Forecast (t/ha)` = TCH_Forecasted,
`Gap Score` = Gap_Score, `Gap Score %` = Gap_Score,
`Decline Risk` = Decline_Severity, `Decline Risk` = Decline_Severity,
`Patchiness Risk` = Patchiness_Risk, `Patchiness Risk` = Patchiness_Risk,
`CV Value` = CV `CV Value` = CV
@ -1566,7 +1542,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
`Field Size (acres)` = field_size_acres, `Field Size (acres)` = field_size_acres,
`Mean CI` = Mean_CI, `Mean CI` = Mean_CI,
`Yield Forecast (t/ha)` = TCH_Forecasted, `Yield Forecast (t/ha)` = TCH_Forecasted,
`Gap Score` = Gap_Score, `Gap Score %` = Gap_Score,
`Decline Risk` = Decline_Severity, `Decline Risk` = Decline_Severity,
`Patchiness Risk` = Patchiness_Risk, `Patchiness Risk` = Patchiness_Risk,
`CV Value` = CV `CV Value` = CV
@ -1639,13 +1615,13 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
- **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease. - **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease.
- **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines - **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines
- **Calculation:** ((Current Mean CI Previous Mean CI) / Previous Mean CI) × 100 - **Calculation:** Current Mean CI Previous Mean CI (absolute change in CI units)
- **Categories:** - **Categories:**
- **Rapid growth:** > 15% increase (excellent weekly progress) - **Rapid growth:** > +0.5 (excellent weekly progress)
- **Positive growth:** 515% increase (steady improvement) - **Positive growth:** +0.2 to +0.5 (steady improvement)
- **Stable:** 5% to +5% (field maintained, no significant change) - **Stable:** 0.2 to +0.2 (field maintained, no significant change)
- **Declining:** 5% to 15% (slow decline, warrant closer monitoring) - **Declining:** 0.5 to 0.2 (slow decline, warrant closer monitoring)
- **Rapid decline:** < 15% (alert: urgent issue requiring investigation) - **Rapid decline:** < 0.5 (alert: urgent issue requiring investigation)
- **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention. - **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention.
- **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields - **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields
@ -1692,12 +1668,15 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.** Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.**
- **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment - **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment
- **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution) - **Calculation Method:** Statistical outlier detection (2σ method)
- **Levels:** - Identifies pixels with CI below: **Median CI (2 × Standard Deviation)**
- **Minimal:** < 10% of field (good establishment, limited replanting needed) - Calculates: **Gap Score = (Outlier Pixels / Total Pixels) × 100**
- **Moderate:** 1025% of field (monitor gap expansion, coordinate with agronomy) - Example: If 2 of 100 pixels fall below threshold → Gap Score = 2%
- **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation) - **Score Ranges & Interpretation:**
- **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention. - **010%:** Minimal gaps (excellent establishment, healthy field)
- **1025%:** Moderate gaps (monitor for expansion, coordinate with agronomy)
- **≥ 25%:** Significant gaps (consider targeted replanting or rehabilitation)
- **Why it matters:** Gap scores reveal areas of poor establishment that may indicate early growth problems or harvest-related residue issues. Lower is better (03% is typical for healthy fields).
2. **Overview Map: Growth on Farm:** 2. **Overview Map: Growth on Farm:**
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.

View file

@ -1165,4 +1165,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
}) })
} }
#' Normalize field_details_table column structure
#'
#' Standardizes column names and ensures all expected KPI columns exist.
#' Handles Field → Field_id rename and injects missing columns as NA.
#'
#' @param field_details_table data.frame to normalize
#' @return data.frame with standardized column structure
normalize_field_details_columns <- function(field_details_table) {
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
return(field_details_table)
}
# Rename Field → Field_id if needed
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
}
# Ensure all expected KPI columns exist; add as NA if missing
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Patchiness_Risk")
for (col in expected_cols) {
if (!col %in% names(field_details_table)) {
field_details_table[[col]] <- NA
}
}
return(field_details_table)
}