Refactor KPI calculations and reporting utilities; normalize field details columns, update area change metrics, and enhance .gitignore for PNG exceptions.
This commit is contained in:
parent
b2d4093601
commit
1500bbcb1c
|
|
@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
|
|||
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
|
||||
threshold=0.45, consecutive_days=2):
|
||||
"""
|
||||
Phase 1: Growing window detection with DOY season reset.
|
||||
Phase 1: Growing window detection with DAH season reset.
|
||||
|
||||
For each detected harvest, reset DOY counter for the next season.
|
||||
For each detected harvest, reset DAH counter for the next season.
|
||||
This allows the model to detect multiple consecutive harvests in multi-year data.
|
||||
"""
|
||||
harvest_dates = []
|
||||
|
|
|
|||
3
r_app/.gitignore
vendored
3
r_app/.gitignore
vendored
|
|
@ -14,8 +14,7 @@ renv
|
|||
|
||||
# EXCEPTIONS: Explicitly track intentional PNG assets
|
||||
# Uncomment or add lines below for PNG files that should be committed to git
|
||||
!r_app/CI_graph_example.png
|
||||
|
||||
!CI_graph_example.png
|
||||
# Ignore files related to Rproj
|
||||
.Rproj.user/
|
||||
.Rhistory
|
||||
|
|
|
|||
|
|
@ -181,12 +181,18 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
|
|||
#' @param previous_stats Previous week field statistics
|
||||
#'
|
||||
#' @return Data frame with field-level CI changes
|
||||
calculate_area_change_kpi <- function(current_stats, previous_stats) {
|
||||
calculate_area_change_kpi <- function(current_stats, previous_stats, field_boundaries_sf = NULL) {
|
||||
|
||||
# Initialize field index vector
|
||||
field_idx_vec <- seq_len(nrow(current_stats))
|
||||
if (!is.null(field_boundaries_sf) && "Field_id" %in% names(current_stats)) {
|
||||
field_idx_vec <- match(current_stats$Field_id, field_boundaries_sf$field)
|
||||
}
|
||||
|
||||
# Initialize result data frame
|
||||
result <- data.frame(
|
||||
field_idx = seq_len(nrow(current_stats)),
|
||||
mean_ci_pct_change = NA_real_,
|
||||
field_idx = field_idx_vec,
|
||||
mean_ci_abs_change = NA_real_,
|
||||
interpretation = NA_character_,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
|
@ -223,19 +229,19 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) {
|
|||
# Find matching previous CI value
|
||||
prev_ci <- prev_lookup[[as.character(current_field_id)]]
|
||||
|
||||
if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci) && prev_ci > 0) {
|
||||
# Calculate percentage change
|
||||
pct_change <- ((current_ci - prev_ci) / prev_ci) * 100
|
||||
result$mean_ci_pct_change[i] <- round(pct_change, 2)
|
||||
if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci)) {
|
||||
# Calculate absolute change (CI units)
|
||||
abs_change <- current_ci - prev_ci
|
||||
result$mean_ci_abs_change[i] <- round(abs_change, 2)
|
||||
|
||||
# Add interpretation
|
||||
if (pct_change > 15) {
|
||||
if (abs_change > 0.5) {
|
||||
result$interpretation[i] <- "Rapid growth"
|
||||
} else if (pct_change > 5) {
|
||||
} else if (abs_change > 0.2) {
|
||||
result$interpretation[i] <- "Positive growth"
|
||||
} else if (pct_change > -5) {
|
||||
} else if (abs_change >= -0.2) {
|
||||
result$interpretation[i] <- "Stable"
|
||||
} else if (pct_change > -15) {
|
||||
} else if (abs_change >= -0.5) {
|
||||
result$interpretation[i] <- "Declining"
|
||||
} else {
|
||||
result$interpretation[i] <- "Rapid decline"
|
||||
|
|
@ -244,7 +250,6 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) {
|
|||
result$interpretation[i] <- "No previous data"
|
||||
}
|
||||
}
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
|
|
@ -557,7 +562,7 @@ create_summary_tables <- function(all_kpis) {
|
|||
select(field_idx, cv_value, uniformity_category, interpretation),
|
||||
|
||||
area_change = all_kpis$area_change %>%
|
||||
select(field_idx, mean_ci_pct_change, interpretation),
|
||||
select(field_idx, mean_ci_abs_change, interpretation),
|
||||
|
||||
tch_forecast = all_kpis$tch_forecasted %>%
|
||||
select(field_idx, tch_forecasted),
|
||||
|
|
@ -633,7 +638,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
|
|||
result <- result %>%
|
||||
left_join(
|
||||
all_kpis$area_change %>%
|
||||
select(field_idx, Weekly_CI_Change = mean_ci_pct_change,
|
||||
select(field_idx, Weekly_CI_Change = mean_ci_abs_change,
|
||||
Area_Change_Interpretation = interpretation),
|
||||
by = "field_idx"
|
||||
)
|
||||
|
|
@ -918,7 +923,7 @@ calculate_all_field_analysis_agronomic_support <- function(
|
|||
} else {
|
||||
area_change_kpi <- data.frame(
|
||||
field_idx = seq_len(nrow(field_boundaries_sf)),
|
||||
mean_ci_pct_change = NA_real_,
|
||||
mean_ci_abs_change = NA_real_,
|
||||
interpretation = rep("No previous data", nrow(field_boundaries_sf))
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,13 +45,16 @@ CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase
|
|||
#' @return data.frame with field and acreage columns
|
||||
calculate_field_acreages <- function(field_boundaries_sf) {
|
||||
tryCatch({
|
||||
lookup_df <- field_boundaries_sf %>%
|
||||
# Project to equal-area CRS (EPSG:6933) for accurate area calculations
|
||||
field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933")
|
||||
|
||||
lookup_df <- field_boundaries_proj %>%
|
||||
sf::st_drop_geometry() %>%
|
||||
as.data.frame() %>%
|
||||
mutate(
|
||||
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
|
||||
geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) {
|
||||
tryCatch({
|
||||
sf::st_is_valid(field_boundaries_sf[idx, ])
|
||||
sf::st_is_valid(field_boundaries_proj[idx, ])
|
||||
}, error = function(e) FALSE)
|
||||
}),
|
||||
area_ha = 0
|
||||
|
|
@ -61,7 +64,7 @@ calculate_field_acreages <- function(field_boundaries_sf) {
|
|||
valid_indices <- which(lookup_df$geometry_valid)
|
||||
areas_ha <- vapply(valid_indices, function(idx) {
|
||||
tryCatch({
|
||||
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
|
||||
area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ]))
|
||||
area_m2 / 10000
|
||||
}, error = function(e) NA_real_)
|
||||
}, numeric(1))
|
||||
|
|
@ -555,23 +558,7 @@ calculate_field_analysis_cane_supply <- function(setup,
|
|||
|
||||
# ========== PHASE 3: LOAD PLANTING DATES ==========
|
||||
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
||||
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
|
||||
|
||||
harvesting_data <- tryCatch({
|
||||
if (file.exists(harvest_file_path)) {
|
||||
harvest_raw <- readxl::read_excel(harvest_file_path)
|
||||
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
|
||||
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
|
||||
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
|
||||
harvest_raw
|
||||
} else {
|
||||
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
|
||||
NULL
|
||||
}
|
||||
}, error = function(e) {
|
||||
message(paste(" ERROR loading harvest.xlsx:", e$message))
|
||||
NULL
|
||||
})
|
||||
harvesting_data <- load_harvesting_data(data_dir)
|
||||
|
||||
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
|
||||
|
||||
|
|
@ -628,9 +615,8 @@ calculate_field_analysis_cane_supply <- function(setup,
|
|||
|
||||
# ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
|
||||
message("\n4. Loading harvest probabilities from script 31...")
|
||||
harvest_prob_dir <- setup$kpi_field_stats_dir
|
||||
harvest_prob_file <- file.path(harvest_prob_dir,
|
||||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
|
||||
# Use get_harvest_output_path() to safely build path (avoids NULL setup$kpi_field_stats_dir)
|
||||
harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year)
|
||||
message(paste(" Looking for:", harvest_prob_file))
|
||||
|
||||
imminent_prob_data <- tryCatch({
|
||||
|
|
|
|||
|
|
@ -406,7 +406,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
|||
# Gap score using 2σ below median to detect outliers
|
||||
median_ci <- median(valid_values)
|
||||
sd_ci <- sd(valid_values)
|
||||
outlier_threshold <- median_ci - (2 * sd_ci)
|
||||
outlier_threshold <- median_ci - (1 * sd_ci)
|
||||
low_ci_pixels <- sum(valid_values < outlier_threshold)
|
||||
total_pixels <- length(valid_values)
|
||||
gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
|
||||
|
|
@ -415,7 +415,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
|||
gap_level <- dplyr::case_when(
|
||||
gap_score < 10 ~ "Minimal",
|
||||
gap_score < 25 ~ "Moderate",
|
||||
TRUE ~ "Significant"
|
||||
gap_score >= 25 ~ "Significant",
|
||||
TRUE ~ NA_character_
|
||||
)
|
||||
|
||||
field_results <- rbind(field_results, data.frame(
|
||||
|
|
@ -1603,7 +1604,8 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu
|
|||
dplyr::group_by(sub_field, season) %>%
|
||||
dplyr::slice(which.max(DAH)) %>%
|
||||
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
|
||||
dplyr::mutate(CI_per_day = cumulative_CI / DAH)
|
||||
dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_))
|
||||
|
||||
|
||||
# Define predictors and response variables
|
||||
predictors <- c("cumulative_CI", "DAH", "CI_per_day")
|
||||
|
|
|
|||
|
|
@ -179,24 +179,15 @@ if (dir.exists(kpi_data_dir)) {
|
|||
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
|
||||
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
|
||||
|
||||
# NORMALIZATION: Ensure critical column names match downstream expectations
|
||||
# Handle both "Field" and "Field_id" naming conventions
|
||||
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||
field_details_table <- field_details_table %>%
|
||||
dplyr::rename(Field_id = Field)
|
||||
safe_log(" ✓ Normalized: renamed Field → Field_id")
|
||||
}
|
||||
# NORMALIZATION: Normalize column structure (Field→Field_id rename + expected_cols injection)
|
||||
field_details_table <- normalize_field_details_columns(field_details_table)
|
||||
|
||||
# Normalize other common column name variations
|
||||
column_mappings <- list(
|
||||
c("CV Value", "CV"),
|
||||
c("CV", "CV"), # Keep as-is
|
||||
c("Mean CI", "Mean_CI"),
|
||||
c("Mean_CI", "Mean_CI"), # Keep as-is
|
||||
c("Yield Forecast (t/ha)", "TCH_Forecasted"),
|
||||
c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is
|
||||
c("Gap Score", "Gap_Score"),
|
||||
c("Gap_Score", "Gap_Score"), # Keep as-is
|
||||
c("Growth Uniformity", "Growth_Uniformity"),
|
||||
c("Decline Risk", "Decline_Risk"),
|
||||
c("Patchiness Risk", "Patchiness_Risk"),
|
||||
|
|
@ -721,6 +712,7 @@ generate_field_alerts <- function(field_details_table) {
|
|||
highest_patchiness_risk = case_when(
|
||||
any(`Patchiness Risk` == "High") ~ "High",
|
||||
any(`Patchiness Risk` == "Medium") ~ "Medium",
|
||||
any(`Patchiness Risk` == "Low") ~ "Low",
|
||||
any(`Patchiness Risk` == "Minimal") ~ "Minimal",
|
||||
TRUE ~ "Unknown"
|
||||
),
|
||||
|
|
@ -1510,25 +1502,9 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
|||
}
|
||||
|
||||
# Join field sizes and ages to KPI data, simplified column selection
|
||||
# DEFENSIVE: Normalize field_details_table column names one more time before joining
|
||||
# Ensure it has Field_id column (regardless of whether it was from RDS or fallback)
|
||||
if (!is.null(field_details_table) && nrow(field_details_table) > 0) {
|
||||
# If Field exists but Field_id doesn't, rename it
|
||||
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||
field_details_table <- field_details_table %>%
|
||||
dplyr::rename(Field_id = Field)
|
||||
}
|
||||
|
||||
# Ensure all expected KPI columns exist; add as NA if missing
|
||||
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
|
||||
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
|
||||
"Decline_Severity", "Patchiness_Risk")
|
||||
for (col in expected_cols) {
|
||||
if (!col %in% names(field_details_table)) {
|
||||
field_details_table[[col]] <- NA
|
||||
}
|
||||
}
|
||||
}
|
||||
# DEFENSIVE: Normalize field_details_table column structure before joining
|
||||
# Uses shared normalization function to ensure Field_id exists and all expected columns are present
|
||||
field_details_table <- normalize_field_details_columns(field_details_table)
|
||||
|
||||
field_details_clean <- field_details_table %>%
|
||||
left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
|
||||
|
|
@ -1540,7 +1516,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
|||
field_size_acres = round(field_size_acres, 1),
|
||||
Mean_CI = round(Mean_CI, 2),
|
||||
CV = round(CV, 2),
|
||||
Gap_Score = round(Gap_Score, 0),
|
||||
Gap_Score = round(Gap_Score, 2),
|
||||
TCH_Forecasted = round(TCH_Forecasted, 1)
|
||||
)
|
||||
|
||||
|
|
@ -1554,7 +1530,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
|||
`Mean CI` = Mean_CI,
|
||||
`Weekly CI Change` = Weekly_CI_Change,
|
||||
`Yield Forecast (t/ha)` = TCH_Forecasted,
|
||||
`Gap Score` = Gap_Score,
|
||||
`Gap Score %` = Gap_Score,
|
||||
`Decline Risk` = Decline_Severity,
|
||||
`Patchiness Risk` = Patchiness_Risk,
|
||||
`CV Value` = CV
|
||||
|
|
@ -1566,7 +1542,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
|||
`Field Size (acres)` = field_size_acres,
|
||||
`Mean CI` = Mean_CI,
|
||||
`Yield Forecast (t/ha)` = TCH_Forecasted,
|
||||
`Gap Score` = Gap_Score,
|
||||
`Gap Score %` = Gap_Score,
|
||||
`Decline Risk` = Decline_Severity,
|
||||
`Patchiness Risk` = Patchiness_Risk,
|
||||
`CV Value` = CV
|
||||
|
|
@ -1639,13 +1615,13 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
|
|||
- **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease.
|
||||
|
||||
- **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines
|
||||
- **Calculation:** ((Current Mean CI − Previous Mean CI) / Previous Mean CI) × 100
|
||||
- **Calculation:** Current Mean CI − Previous Mean CI (absolute change in CI units)
|
||||
- **Categories:**
|
||||
- **Rapid growth:** > 15% increase (excellent weekly progress)
|
||||
- **Positive growth:** 5–15% increase (steady improvement)
|
||||
- **Stable:** −5% to +5% (field maintained, no significant change)
|
||||
- **Declining:** −5% to −15% (slow decline, warrant closer monitoring)
|
||||
- **Rapid decline:** < −15% (alert: urgent issue requiring investigation)
|
||||
- **Rapid growth:** > +0.5 (excellent weekly progress)
|
||||
- **Positive growth:** +0.2 to +0.5 (steady improvement)
|
||||
- **Stable:** −0.2 to +0.2 (field maintained, no significant change)
|
||||
- **Declining:** −0.5 to −0.2 (slow decline, warrant closer monitoring)
|
||||
- **Rapid decline:** < −0.5 (alert: urgent issue requiring investigation)
|
||||
- **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention.
|
||||
|
||||
- **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields
|
||||
|
|
@ -1692,12 +1668,15 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
|
|||
Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.**
|
||||
|
||||
- **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment
|
||||
- **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution)
|
||||
- **Levels:**
|
||||
- **Minimal:** < 10% of field (good establishment, limited replanting needed)
|
||||
- **Moderate:** 10–25% of field (monitor gap expansion, coordinate with agronomy)
|
||||
- **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation)
|
||||
- **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention.
|
||||
- **Calculation Method:** Statistical outlier detection (2σ method)
|
||||
- Identifies pixels with CI below: **Median CI − (2 × Standard Deviation)**
|
||||
- Calculates: **Gap Score = (Outlier Pixels / Total Pixels) × 100**
|
||||
- Example: If 2 of 100 pixels fall below threshold → Gap Score = 2%
|
||||
- **Score Ranges & Interpretation:**
|
||||
- **0–10%:** Minimal gaps (excellent establishment, healthy field)
|
||||
- **10–25%:** Moderate gaps (monitor for expansion, coordinate with agronomy)
|
||||
- **≥ 25%:** Significant gaps (consider targeted replanting or rehabilitation)
|
||||
- **Why it matters:** Gap scores reveal areas of poor establishment that may indicate early growth problems or harvest-related residue issues. Lower is better (0–3% is typical for healthy fields).
|
||||
|
||||
2. **Overview Map: Growth on Farm:**
|
||||
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
|
||||
|
|
|
|||
|
|
@ -1165,4 +1165,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
|
|||
})
|
||||
}
|
||||
|
||||
#' Normalize field_details_table column structure
|
||||
#'
|
||||
#' Standardizes column names and ensures all expected KPI columns exist.
|
||||
#' Handles Field → Field_id rename and injects missing columns as NA.
|
||||
#'
|
||||
#' @param field_details_table data.frame to normalize
|
||||
#' @return data.frame with standardized column structure
|
||||
normalize_field_details_columns <- function(field_details_table) {
|
||||
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
|
||||
return(field_details_table)
|
||||
}
|
||||
|
||||
# Rename Field → Field_id if needed
|
||||
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||
field_details_table <- field_details_table %>%
|
||||
dplyr::rename(Field_id = Field)
|
||||
}
|
||||
|
||||
# Ensure all expected KPI columns exist; add as NA if missing
|
||||
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
|
||||
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
|
||||
"Decline_Severity", "Patchiness_Risk")
|
||||
for (col in expected_cols) {
|
||||
if (!col %in% names(field_details_table)) {
|
||||
field_details_table[[col]] <- NA
|
||||
}
|
||||
}
|
||||
|
||||
return(field_details_table)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue