Refactor patchiness KPI calculation to combine Gini Coefficient and Moran's I for comprehensive field heterogeneity assessment; update report to reflect new metrics and interpretations.

This commit is contained in:
Timon 2026-02-18 10:09:10 +01:00
parent 1f677f2626
commit af5c53e084
2 changed files with 195 additions and 141 deletions

View file

@ -400,121 +400,143 @@ calculate_growth_decline_kpi <- function(ci_values_list) {
return(result)
}
#' KPI 5: Calculate weed presence indicator
#'
#' Detects field fragmentation/patchiness (potential weed/pest pressure)
#' Combines two complementary metrics for comprehensive heterogeneity assessment:
#' - Gini Coefficient: Distribution inequality of CI values (0=uniform, 1=unequal)
#' - Moran's I: Spatial autocorrelation (-1 to +1, indicates clustering vs dispersal)
#'
#' @param ci_pixels_by_field List of CI pixel arrays for each field
#' @param field_boundaries_sf SF object with field geometries
#' @param mosaic_dir Directory path to per-field mosaic files (for Moran's I)
#' @param week_file Week file pattern (for Moran's I calculation)
#' @param mean_ci_values Optional vector of mean CI values per field
#'
#' @return Data frame with fragmentation indicators
calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
#' @return Data frame with gini_coefficient, morans_i, patchiness_risk, patchiness_interpretation
calculate_patchiness_kpi <- function(ci_pixels_by_field, field_boundaries_sf = NULL,
mosaic_dir = NULL, week_file = NULL, mean_ci_values = NULL) {
n_fields <- length(ci_pixels_by_field)
result <- data.frame(
field_idx = seq_len(length(ci_pixels_by_field)),
cv_value = NA_real_,
low_ci_percent = NA_real_,
fragmentation_index = NA_real_,
weed_pressure_risk = NA_character_,
field_idx = seq_len(n_fields),
gini_coefficient = NA_real_,
morans_i = NA_real_,
patchiness_risk = NA_character_,
patchiness_interpretation = NA_character_,
stringsAsFactors = FALSE
)
for (field_idx in seq_len(length(ci_pixels_by_field))) {
ci_pixels <- ci_pixels_by_field[[field_idx]]
# Determine if per-field structure available
is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) && !is.null(field_boundaries_sf)
for (i in seq_len(n_fields)) {
ci_pixels <- ci_pixels_by_field[[i]]
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
result$weed_pressure_risk[field_idx] <- "No data"
result$patchiness_risk[i] <- "No data"
result$patchiness_interpretation[i] <- "No data"
next
}
ci_pixels <- ci_pixels[!is.na(ci_pixels)]
if (length(ci_pixels) == 0) {
result$weed_pressure_risk[field_idx] <- "No data"
result$patchiness_risk[i] <- "No data"
result$patchiness_interpretation[i] <- "No data"
next
}
cv_val <- calculate_cv(ci_pixels)
low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100
fragmentation <- cv_val * low_ci_pct / 100
result$cv_value[field_idx] <- cv_val
result$low_ci_percent[field_idx] <- round(low_ci_pct, 2)
result$fragmentation_index[field_idx] <- round(fragmentation, 3)
if (is.na(fragmentation)) {
result$weed_pressure_risk[field_idx] <- "No data"
} else if (fragmentation > 0.15) {
result$weed_pressure_risk[field_idx] <- "High"
} else if (fragmentation > 0.08) {
result$weed_pressure_risk[field_idx] <- "Medium"
} else if (fragmentation > 0.04) {
result$weed_pressure_risk[field_idx] <- "Low"
} else {
result$weed_pressure_risk[field_idx] <- "Minimal"
# =========================================
# METRIC 1: Calculate Gini Coefficient
# =========================================
gini <- NA_real_
if (length(ci_pixels) > 1) {
ci_sorted <- sort(ci_pixels)
n <- length(ci_sorted)
numerator <- 2 * sum(seq_len(n) * ci_sorted)
denominator <- n * sum(ci_sorted)
gini <- (numerator / denominator) - (n + 1) / n
gini <- max(0, min(1, gini)) # Clamp to 0-1
}
}
return(result)
}
#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk)
#'
#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi()
#' @param mean_ci_values Optional vector of mean CI values per field
#'
#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation)
calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) {
result <- weed_kpi_result %>%
mutate(
# Calculate Gini coefficient from CI pixels (proper calculation)
gini_coefficient = NA_real_,
mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_,
# Map weed_pressure_risk to patchiness_risk
patchiness_risk = weed_pressure_risk,
# Create interpretation based on gini and risk
patchiness_interpretation = case_when(
is.na(gini_coefficient) ~ "No data",
gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth",
gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness",
gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness",
TRUE ~ "Mixed heterogeneity"
)
) %>%
select(field_idx, gini_coefficient, mean_ci,
patchiness_interpretation, patchiness_risk)
# Calculate actual Gini coefficients if CI pixels provided
if (!is.null(ci_pixels_by_field)) {
for (i in seq_len(nrow(result))) {
ci_pixels <- ci_pixels_by_field[[i]]
result$gini_coefficient[i] <- gini
# =========================================
# METRIC 2: Calculate Moran's I (spatial clustering)
# =========================================
morans_i <- NA_real_
if (is_per_field) {
field_name <- field_boundaries_sf$field[i]
field_mosaic_path <- file.path(mosaic_dir, field_name, week_file)
if (!is.null(ci_pixels) && length(ci_pixels) > 0) {
ci_pixels <- ci_pixels[!is.na(ci_pixels)]
if (length(ci_pixels) > 1) {
# Calculate Gini coefficient
# Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n
# where x_i are sorted values
ci_sorted <- sort(ci_pixels)
n <- length(ci_sorted)
if (file.exists(field_mosaic_path)) {
tryCatch({
field_raster <- terra::rast(field_mosaic_path)[["CI"]]
single_field <- field_boundaries_sf[i, ]
morans_result <- calculate_spatial_autocorrelation(field_raster, single_field)
numerator <- 2 * sum(seq_len(n) * ci_sorted)
denominator <- n * sum(ci_sorted)
gini <- (numerator / denominator) - (n + 1) / n
# Clamp to 0-1 range (should be within this by formula but guard against numerical errors)
gini <- max(0, min(1, gini))
result$gini_coefficient[i] <- gini
# Update interpretation based on calculated Gini
result$patchiness_interpretation[i] <- dplyr::case_when(
gini < 0.2 ~ "Uniform growth",
gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness",
gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness",
TRUE ~ "Mixed heterogeneity"
)
}
if (is.list(morans_result)) {
morans_i <- morans_result$morans_i
} else {
morans_i <- morans_result
}
}, error = function(e) {
safe_log(paste("Warning: Moran's I failed for field", field_name, ":", e$message), "WARNING")
})
}
}
result$morans_i[i] <- morans_i
# =========================================
# RISK DETERMINATION: Gini + Moran's I combination
# =========================================
# Logic:
# - High Gini (>0.3) + High Moran's I (>0.85) = High patchiness (localized clusters)
# - High Gini + Low Moran's I = Medium patchiness (scattered heterogeneity)
# - Low Gini (<0.15) = Minimal patchiness (uniform)
# - Moderate Gini = Low to Medium patchiness
if (is.na(gini)) {
result$patchiness_risk[i] <- "No data"
} else if (gini < 0.15) {
result$patchiness_risk[i] <- "Minimal"
} else if (gini < 0.30) {
# Low-to-moderate Gini
if (!is.na(morans_i) && morans_i > 0.85) {
result$patchiness_risk[i] <- "Medium" # Some clustering
} else {
result$patchiness_risk[i] <- "Low"
}
} else if (gini < 0.50) {
# High Gini
if (!is.na(morans_i) && morans_i > 0.85) {
result$patchiness_risk[i] <- "High" # Localized problem clusters
} else {
result$patchiness_risk[i] <- "Medium" # Scattered issues
}
} else {
# Very high Gini (>0.5)
result$patchiness_risk[i] <- "High"
}
# =========================================
# INTERPRETATION: Combined Gini + Moran's I narrative
# =========================================
result$patchiness_interpretation[i] <- dplyr::case_when(
is.na(gini) ~ "No data",
gini < 0.15 & (is.na(morans_i) | morans_i < 0.75) ~
"Excellent uniformity - minimal patchiness",
gini < 0.30 & (is.na(morans_i) | morans_i < 0.75) ~
"Good uniformity - low patchiness",
gini < 0.30 & !is.na(morans_i) & morans_i > 0.85 ~
"Moderate uniformity with localized clustering",
gini < 0.50 & (is.na(morans_i) | morans_i < 0.75) ~
"Poor uniformity - scattered heterogeneity",
gini < 0.50 & !is.na(morans_i) & morans_i > 0.85 ~
"Poor uniformity with clustered problem areas",
gini >= 0.50 ~
"Severe heterogeneity - requires field investigation",
TRUE ~ "Mixed heterogeneity"
)
}
return(result)
@ -536,13 +558,6 @@ create_summary_tables <- function(all_kpis) {
uniformity = all_kpis$uniformity %>%
select(field_idx, cv_value, uniformity_category, interpretation),
spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
all_kpis$spatial_clustering %>%
select(field_idx, morans_i)
} else {
NULL
},
area_change = all_kpis$area_change %>%
select(field_idx, mean_ci_pct_change, interpretation),
@ -553,7 +568,7 @@ create_summary_tables <- function(all_kpis) {
select(field_idx, four_week_trend, trend_interpretation, decline_severity),
patchiness = all_kpis$patchiness %>%
select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk),
select(field_idx, gini_coefficient, morans_i, patchiness_interpretation, patchiness_risk),
gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
all_kpis$gap_filling %>%
@ -621,28 +636,19 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
)
# ============================================
# GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering)
# GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5)
# ============================================
# KPI 5: Field Patchiness
# KPI 5: Field Patchiness (Gini + Moran's I combination)
result <- result %>%
left_join(
all_kpis$patchiness %>%
select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci,
select(field_idx, Gini_Coefficient = gini_coefficient,
Morans_I = morans_i,
Patchiness_Interpretation = patchiness_interpretation,
Patchiness_Risk = patchiness_risk),
by = "field_idx"
)
# Moran's I (spatial clustering - used in patchiness calculation)
if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
result <- result %>%
left_join(
all_kpis$spatial_clustering %>%
select(field_idx, Morans_I = morans_i),
by = "field_idx"
)
}
# ============================================
# GROUP 4: YIELD FORECAST (KPI 3)
# ============================================
@ -914,14 +920,14 @@ calculate_all_field_analysis_agronomic_support <- function(
)
message("Calculating KPI 5: Field Patchiness...")
weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)
# Convert weed metrics to patchiness metrics (Gini-like + risk combination)
mean_ci_values <- current_stats$Mean_CI
if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) {
mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf))
}
patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values)
# Calculate patchiness using both Gini coefficient and Moran's I spatial clustering
patchiness_kpi <- calculate_patchiness_kpi(
ci_pixels_by_field,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = current_mosaic_dir,
week_file = week_file,
mean_ci_values = current_stats$Mean_CI
)
message("Calculating KPI 6: Gap Filling...")
# Build list of per-field files for this week
@ -968,7 +974,6 @@ calculate_all_field_analysis_agronomic_support <- function(
tch_forecasted = tch_kpi,
growth_decline = growth_decline_kpi,
patchiness = patchiness_kpi,
spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i),
gap_filling = gap_filling_kpi
)
@ -985,10 +990,6 @@ calculate_all_field_analysis_agronomic_support <- function(
distinct(field_idx, .keep_all = TRUE)
all_kpis$patchiness <- all_kpis$patchiness %>%
distinct(field_idx, .keep_all = TRUE)
if (!is.null(all_kpis$spatial_clustering)) {
all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>%
distinct(field_idx, .keep_all = TRUE)
}
all_kpis$gap_filling <- all_kpis$gap_filling %>%
distinct(field_idx, .keep_all = TRUE)

View file

@ -1622,25 +1622,79 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
### What You'll Find in This Report:
1. **Key Performance Indicators (KPIs):**
The report provides a farm-wide analysis based on the Chlorophyll Index (CI) changes. KPIs are calculated field by field and summarized in tables. The current KPIs included are:
The report provides a farm-wide analysis based on weekly Chlorophyll Index (CI) measurements. Five comprehensive KPIs are calculated field by field to assess crop health:
- **KPI 1: Field Uniformity** — Measures how consistently crop is developing across the field
- **Metric:** Coefficient of Variation (CV) of CI pixel values
- **Calculation:** CV = (Standard Deviation of CI) / (Mean CI)
- **Categories:**
- **Excellent:** CV < 0.08 (very uniform growth, minimal intervention needed)
- **Good:** CV < 0.15 (acceptable uniformity, routine monitoring)
- **Acceptable:** CV < 0.25 (moderate variation, monitor irrigation/fertility)
- **Poor:** CV < 0.4 (high variation, investigate management issues)
- **Very poor:** CV ≥ 0.4 (severe variation, immediate field scout required)
- **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease.
- **Field Uniformity:** Assesses the consistency of crop growth within each field using the coefficient of variation (CV) of CI values. Uniformity levels are classified as:
- **Excellent:** CV < 0.08 (very uniform growth)
- **Good:** CV < 0.15 (acceptable uniformity)
- **Moderate:** CV < 0.25 (some variation present)
- **Poor:** CV ≥ 0.25 (high variation - investigate irrigation, fertility, or pests)
- **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines
- **Calculation:** ((Current Mean CI Previous Mean CI) / Previous Mean CI) × 100
- **Categories:**
- **Rapid growth:** > 15% increase (excellent weekly progress)
- **Positive growth:** 515% increase (steady improvement)
- **Stable:** 5% to +5% (field maintained, no significant change)
- **Declining:** 5% to 15% (slow decline, warrant closer monitoring)
- **Rapid decline:** < 15% (alert: urgent issue requiring investigation)
- **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention.
- **Area Change:** Summarizes the proportion of field area that is improving, stable, or declining week-over-week, based on CI changes. Helps identify fields requiring immediate attention.
- **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields
- **Applies to:** Fields ≥ 240 days old (mature stage)
- **Method:** Random Forest machine learning model trained on historical harvest data and CI trajectories
- **Inputs:** Days after harvest (DAH) and CI growth rate (CI_per_day)
- **Output:** Predicted tons of cane per hectare (t/ha)
- **Why it matters:** Helps plan harvest timing, mill throughput, and revenue forecasting for mature crops.
- **TCH Forecasted:** Provides yield predictions (tons cane per hectare) for mature fields (typically over 240 days old), using a machine learning model trained on historical CI and yield data. Helps plan harvest timing and logistics.
- **KPI 4: Growth Decline (4-Week Trend)** — Assesses short-term growth trajectory using linear regression
- **Calculation:** Linear slope of CI values over the previous 4 weeks
- **Categories:**
- **Strong growth:** Slope > 0.1 CI units/week (excellent sustained progress)
- **Weak growth:** Slope 00.1 (slow improvement, monitor closely)
- **Slight decline:** Slope 0.10 (low severity, non-urgent observation)
- **Moderate decline:** Slope 0.3 to 0.1 (medium severity, scouting recommended)
- **Strong decline:** Slope < 0.3 (high severity, immediate field investigation required)
- **Why it matters:** Trend analysis reveals whether crop is accelerating, stalling, or stressed over time.
- **Field Patchiness Score:** Measures field heterogeneity using the Gini coefficient, detecting spatial variation in crop health. High patchiness (Gini > 0.12) may indicate irrigation, pest, or fertility issues requiring targeted scouting:
- **Low:** Gini < 0.08 (excellent uniformity, minimal intervention needed)
- **Medium:** Gini 0.080.12 (acceptable variation, routine monitoring)
- **High:** Gini > 0.12 (poor uniformity, recommend field scouting)
- **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure.
- **KPI 5: Field Patchiness (Heterogeneity)** — Combines two complementary spatial metrics for comprehensive heterogeneity assessment
- **Metric 1: Gini Coefficient** — Statistical measure of distribution inequality in CI pixel values
- **Formula:** (2 × Σ(i × sorted_CI)) / (n × Σ(sorted_CI)) (n+1)/n
- **Range:** 0 (perfectly uniform) to 1 (highly unequal)
- **Interpretation:** Low Gini (< 0.15) = good uniformity; High Gini (> 0.3) = significant heterogeneity
- **Metric 2: Moran's I** — Spatial autocorrelation indicating whether high/low areas are clustered or scattered
- **Range:** 1 (dispersed pattern) to +1 (strong clustering)
- **Thresholds:** Moran's I > 0.85 indicates clustered problem areas; < 0.75 suggests scattered issues
- **Risk Determination (Gini + Moran's I Combined):**
- **Minimal Risk:** Gini < 0.15 (excellent uniformity regardless of spatial pattern)
- **Low Risk:** Gini 0.150.30, Moran's I < 0.85 (moderate variation, scattered distribution)
- **Medium Risk:** Gini 0.150.30, Moran's I > 0.85 OR Gini 0.300.50, Moran's I < 0.85 (notable issues)
- **High Risk:** Gini > 0.30, Moran's I > 0.85 (severe heterogeneity with localized clusters—urgent scouting needed)
- **Why it matters:** High patchiness may indicate irrigation inefficiencies, localized pest pressure, fertility variation, or disease spread. Combined Gini + Moran's I reveals not just *how much* variation exists, but also *how it's distributed* spatially. CI reflects chlorophyll = nitrogen status + plant health + vigor. High CV/Patchiness often signals N gaps, water stress, pests (borers), or ratoon decline.
- **Uniformity vs. Patchiness — What's the Difference?**
Both KPIs measure variation, but they answer different questions and drive different management actions:
- **Uniformity (CV-based)** answers: "*Is* growth even across the field?" — it detects whether a problem exists but not where.
- **Patchiness (Gini + Moran's I)** answers: "*Where* are problems and how are they arranged?" — it reveals the spatial pattern.
**Practical example:** Two fields both score "Poor" on Uniformity (CV = 0.25). However:
- Field A has scattered low-CI patches (Moran's I = 0.6) → suggests *random* stress (disease pressure, uneven irrigation)
- Field B has clustered low-CI in one corner (Moran's I = 0.95) → suggests *localized* problem (drainage, compaction, pest hotspot)
Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.**
- **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
- **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment
- **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution)
- **Levels:**
- **Minimal:** < 10% of field (good establishment, limited replanting needed)
- **Moderate:** 1025% of field (monitor gap expansion, coordinate with agronomy)
- **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation)
- **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention.
2. **Overview Map: Growth on Farm:**
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
@ -1659,7 +1713,6 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
---
### Historical Benchmark Lines
The CI time series graphs include historical benchmark lines for the 10th, 50th, and 90th percentiles of CI values across all fields and seasons.