Refactor patchiness KPI calculation to combine Gini Coefficient and Moran's I for comprehensive field heterogeneity assessment; update report to reflect new metrics and interpretations.

This commit is contained in:
Timon 2026-02-18 10:09:10 +01:00
parent 1f677f2626
commit af5c53e084
2 changed files with 195 additions and 141 deletions

View file

@ -400,121 +400,143 @@ calculate_growth_decline_kpi <- function(ci_values_list) {
return(result) return(result)
} }
#' KPI 5: Calculate weed presence indicator
#' #'
#' Detects field fragmentation/patchiness (potential weed/pest pressure) #' Combines two complementary metrics for comprehensive heterogeneity assessment:
#' - Gini Coefficient: Distribution inequality of CI values (0=uniform, 1=unequal)
#' - Moran's I: Spatial autocorrelation (-1 to +1, indicates clustering vs dispersal)
#' #'
#' @param ci_pixels_by_field List of CI pixel arrays for each field #' @param ci_pixels_by_field List of CI pixel arrays for each field
#' @param field_boundaries_sf SF object with field geometries
#' @param mosaic_dir Directory path to per-field mosaic files (for Moran's I)
#' @param week_file Week file pattern (for Moran's I calculation)
#' @param mean_ci_values Optional vector of mean CI values per field
#' #'
#' @return Data frame with fragmentation indicators #' @return Data frame with gini_coefficient, morans_i, patchiness_risk, patchiness_interpretation
calculate_weed_presence_kpi <- function(ci_pixels_by_field) { calculate_patchiness_kpi <- function(ci_pixels_by_field, field_boundaries_sf = NULL,
mosaic_dir = NULL, week_file = NULL, mean_ci_values = NULL) {
n_fields <- length(ci_pixels_by_field)
result <- data.frame( result <- data.frame(
field_idx = seq_len(length(ci_pixels_by_field)), field_idx = seq_len(n_fields),
cv_value = NA_real_, gini_coefficient = NA_real_,
low_ci_percent = NA_real_, morans_i = NA_real_,
fragmentation_index = NA_real_, patchiness_risk = NA_character_,
weed_pressure_risk = NA_character_, patchiness_interpretation = NA_character_,
stringsAsFactors = FALSE stringsAsFactors = FALSE
) )
for (field_idx in seq_len(length(ci_pixels_by_field))) { # Determine if per-field structure available
ci_pixels <- ci_pixels_by_field[[field_idx]] is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) && !is.null(field_boundaries_sf)
for (i in seq_len(n_fields)) {
ci_pixels <- ci_pixels_by_field[[i]]
if (is.null(ci_pixels) || length(ci_pixels) == 0) { if (is.null(ci_pixels) || length(ci_pixels) == 0) {
result$weed_pressure_risk[field_idx] <- "No data" result$patchiness_risk[i] <- "No data"
result$patchiness_interpretation[i] <- "No data"
next next
} }
ci_pixels <- ci_pixels[!is.na(ci_pixels)] ci_pixels <- ci_pixels[!is.na(ci_pixels)]
if (length(ci_pixels) == 0) { if (length(ci_pixels) == 0) {
result$weed_pressure_risk[field_idx] <- "No data" result$patchiness_risk[i] <- "No data"
result$patchiness_interpretation[i] <- "No data"
next next
} }
cv_val <- calculate_cv(ci_pixels) # =========================================
low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100 # METRIC 1: Calculate Gini Coefficient
fragmentation <- cv_val * low_ci_pct / 100 # =========================================
gini <- NA_real_
result$cv_value[field_idx] <- cv_val if (length(ci_pixels) > 1) {
result$low_ci_percent[field_idx] <- round(low_ci_pct, 2) ci_sorted <- sort(ci_pixels)
result$fragmentation_index[field_idx] <- round(fragmentation, 3) n <- length(ci_sorted)
numerator <- 2 * sum(seq_len(n) * ci_sorted)
if (is.na(fragmentation)) { denominator <- n * sum(ci_sorted)
result$weed_pressure_risk[field_idx] <- "No data" gini <- (numerator / denominator) - (n + 1) / n
} else if (fragmentation > 0.15) { gini <- max(0, min(1, gini)) # Clamp to 0-1
result$weed_pressure_risk[field_idx] <- "High"
} else if (fragmentation > 0.08) {
result$weed_pressure_risk[field_idx] <- "Medium"
} else if (fragmentation > 0.04) {
result$weed_pressure_risk[field_idx] <- "Low"
} else {
result$weed_pressure_risk[field_idx] <- "Minimal"
} }
} result$gini_coefficient[i] <- gini
return(result) # =========================================
} # METRIC 2: Calculate Moran's I (spatial clustering)
# =========================================
#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk) morans_i <- NA_real_
#' if (is_per_field) {
#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi() field_name <- field_boundaries_sf$field[i]
#' @param mean_ci_values Optional vector of mean CI values per field field_mosaic_path <- file.path(mosaic_dir, field_name, week_file)
#'
#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation)
calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) {
result <- weed_kpi_result %>%
mutate(
# Calculate Gini coefficient from CI pixels (proper calculation)
gini_coefficient = NA_real_,
mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_,
# Map weed_pressure_risk to patchiness_risk
patchiness_risk = weed_pressure_risk,
# Create interpretation based on gini and risk
patchiness_interpretation = case_when(
is.na(gini_coefficient) ~ "No data",
gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth",
gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness",
gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness",
TRUE ~ "Mixed heterogeneity"
)
) %>%
select(field_idx, gini_coefficient, mean_ci,
patchiness_interpretation, patchiness_risk)
# Calculate actual Gini coefficients if CI pixels provided
if (!is.null(ci_pixels_by_field)) {
for (i in seq_len(nrow(result))) {
ci_pixels <- ci_pixels_by_field[[i]]
if (!is.null(ci_pixels) && length(ci_pixels) > 0) { if (file.exists(field_mosaic_path)) {
ci_pixels <- ci_pixels[!is.na(ci_pixels)] tryCatch({
field_raster <- terra::rast(field_mosaic_path)[["CI"]]
if (length(ci_pixels) > 1) { single_field <- field_boundaries_sf[i, ]
# Calculate Gini coefficient morans_result <- calculate_spatial_autocorrelation(field_raster, single_field)
# Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n
# where x_i are sorted values
ci_sorted <- sort(ci_pixels)
n <- length(ci_sorted)
numerator <- 2 * sum(seq_len(n) * ci_sorted) if (is.list(morans_result)) {
denominator <- n * sum(ci_sorted) morans_i <- morans_result$morans_i
} else {
gini <- (numerator / denominator) - (n + 1) / n morans_i <- morans_result
# Clamp to 0-1 range (should be within this by formula but guard against numerical errors) }
gini <- max(0, min(1, gini)) }, error = function(e) {
safe_log(paste("Warning: Moran's I failed for field", field_name, ":", e$message), "WARNING")
result$gini_coefficient[i] <- gini })
# Update interpretation based on calculated Gini
result$patchiness_interpretation[i] <- dplyr::case_when(
gini < 0.2 ~ "Uniform growth",
gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness",
gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness",
TRUE ~ "Mixed heterogeneity"
)
}
} }
} }
result$morans_i[i] <- morans_i
# =========================================
# RISK DETERMINATION: Gini + Moran's I combination
# =========================================
# Logic:
# - High Gini (>0.3) + High Moran's I (>0.85) = High patchiness (localized clusters)
# - High Gini + Low Moran's I = Medium patchiness (scattered heterogeneity)
# - Low Gini (<0.15) = Minimal patchiness (uniform)
# - Moderate Gini = Low to Medium patchiness
if (is.na(gini)) {
result$patchiness_risk[i] <- "No data"
} else if (gini < 0.15) {
result$patchiness_risk[i] <- "Minimal"
} else if (gini < 0.30) {
# Low-to-moderate Gini
if (!is.na(morans_i) && morans_i > 0.85) {
result$patchiness_risk[i] <- "Medium" # Some clustering
} else {
result$patchiness_risk[i] <- "Low"
}
} else if (gini < 0.50) {
# High Gini
if (!is.na(morans_i) && morans_i > 0.85) {
result$patchiness_risk[i] <- "High" # Localized problem clusters
} else {
result$patchiness_risk[i] <- "Medium" # Scattered issues
}
} else {
# Very high Gini (>0.5)
result$patchiness_risk[i] <- "High"
}
# =========================================
# INTERPRETATION: Combined Gini + Moran's I narrative
# =========================================
result$patchiness_interpretation[i] <- dplyr::case_when(
is.na(gini) ~ "No data",
gini < 0.15 & (is.na(morans_i) | morans_i < 0.75) ~
"Excellent uniformity - minimal patchiness",
gini < 0.30 & (is.na(morans_i) | morans_i < 0.75) ~
"Good uniformity - low patchiness",
gini < 0.30 & !is.na(morans_i) & morans_i > 0.85 ~
"Moderate uniformity with localized clustering",
gini < 0.50 & (is.na(morans_i) | morans_i < 0.75) ~
"Poor uniformity - scattered heterogeneity",
gini < 0.50 & !is.na(morans_i) & morans_i > 0.85 ~
"Poor uniformity with clustered problem areas",
gini >= 0.50 ~
"Severe heterogeneity - requires field investigation",
TRUE ~ "Mixed heterogeneity"
)
} }
return(result) return(result)
@ -536,13 +558,6 @@ create_summary_tables <- function(all_kpis) {
uniformity = all_kpis$uniformity %>% uniformity = all_kpis$uniformity %>%
select(field_idx, cv_value, uniformity_category, interpretation), select(field_idx, cv_value, uniformity_category, interpretation),
spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
all_kpis$spatial_clustering %>%
select(field_idx, morans_i)
} else {
NULL
},
area_change = all_kpis$area_change %>% area_change = all_kpis$area_change %>%
select(field_idx, mean_ci_pct_change, interpretation), select(field_idx, mean_ci_pct_change, interpretation),
@ -553,7 +568,7 @@ create_summary_tables <- function(all_kpis) {
select(field_idx, four_week_trend, trend_interpretation, decline_severity), select(field_idx, four_week_trend, trend_interpretation, decline_severity),
patchiness = all_kpis$patchiness %>% patchiness = all_kpis$patchiness %>%
select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk), select(field_idx, gini_coefficient, morans_i, patchiness_interpretation, patchiness_risk),
gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
all_kpis$gap_filling %>% all_kpis$gap_filling %>%
@ -621,28 +636,19 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
) )
# ============================================ # ============================================
# GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering) # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5)
# ============================================ # ============================================
# KPI 5: Field Patchiness # KPI 5: Field Patchiness (Gini + Moran's I combination)
result <- result %>% result <- result %>%
left_join( left_join(
all_kpis$patchiness %>% all_kpis$patchiness %>%
select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci, select(field_idx, Gini_Coefficient = gini_coefficient,
Morans_I = morans_i,
Patchiness_Interpretation = patchiness_interpretation, Patchiness_Interpretation = patchiness_interpretation,
Patchiness_Risk = patchiness_risk), Patchiness_Risk = patchiness_risk),
by = "field_idx" by = "field_idx"
) )
# Moran's I (spatial clustering - used in patchiness calculation)
if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
result <- result %>%
left_join(
all_kpis$spatial_clustering %>%
select(field_idx, Morans_I = morans_i),
by = "field_idx"
)
}
# ============================================ # ============================================
# GROUP 4: YIELD FORECAST (KPI 3) # GROUP 4: YIELD FORECAST (KPI 3)
# ============================================ # ============================================
@ -914,14 +920,14 @@ calculate_all_field_analysis_agronomic_support <- function(
) )
message("Calculating KPI 5: Field Patchiness...") message("Calculating KPI 5: Field Patchiness...")
weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) # Calculate patchiness using both Gini coefficient and Moran's I spatial clustering
patchiness_kpi <- calculate_patchiness_kpi(
# Convert weed metrics to patchiness metrics (Gini-like + risk combination) ci_pixels_by_field,
mean_ci_values <- current_stats$Mean_CI field_boundaries_sf = field_boundaries_sf,
if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) { mosaic_dir = current_mosaic_dir,
mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf)) week_file = week_file,
} mean_ci_values = current_stats$Mean_CI
patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values) )
message("Calculating KPI 6: Gap Filling...") message("Calculating KPI 6: Gap Filling...")
# Build list of per-field files for this week # Build list of per-field files for this week
@ -968,7 +974,6 @@ calculate_all_field_analysis_agronomic_support <- function(
tch_forecasted = tch_kpi, tch_forecasted = tch_kpi,
growth_decline = growth_decline_kpi, growth_decline = growth_decline_kpi,
patchiness = patchiness_kpi, patchiness = patchiness_kpi,
spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i),
gap_filling = gap_filling_kpi gap_filling = gap_filling_kpi
) )
@ -985,10 +990,6 @@ calculate_all_field_analysis_agronomic_support <- function(
distinct(field_idx, .keep_all = TRUE) distinct(field_idx, .keep_all = TRUE)
all_kpis$patchiness <- all_kpis$patchiness %>% all_kpis$patchiness <- all_kpis$patchiness %>%
distinct(field_idx, .keep_all = TRUE) distinct(field_idx, .keep_all = TRUE)
if (!is.null(all_kpis$spatial_clustering)) {
all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>%
distinct(field_idx, .keep_all = TRUE)
}
all_kpis$gap_filling <- all_kpis$gap_filling %>% all_kpis$gap_filling <- all_kpis$gap_filling %>%
distinct(field_idx, .keep_all = TRUE) distinct(field_idx, .keep_all = TRUE)

View file

@ -1622,25 +1622,79 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
### What You'll Find in This Report: ### What You'll Find in This Report:
1. **Key Performance Indicators (KPIs):** 1. **Key Performance Indicators (KPIs):**
The report provides a farm-wide analysis based on the Chlorophyll Index (CI) changes. KPIs are calculated field by field and summarized in tables. The current KPIs included are: The report provides a farm-wide analysis based on weekly Chlorophyll Index (CI) measurements. Five comprehensive KPIs are calculated field by field to assess crop health:
- **KPI 1: Field Uniformity** — Measures how consistently crop is developing across the field
- **Metric:** Coefficient of Variation (CV) of CI pixel values
- **Calculation:** CV = (Standard Deviation of CI) / (Mean CI)
- **Categories:**
- **Excellent:** CV < 0.08 (very uniform growth, minimal intervention needed)
- **Good:** CV < 0.15 (acceptable uniformity, routine monitoring)
- **Acceptable:** CV < 0.25 (moderate variation, monitor irrigation/fertility)
- **Poor:** CV < 0.4 (high variation, investigate management issues)
- **Very poor:** CV ≥ 0.4 (severe variation, immediate field scout required)
- **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease.
- **Field Uniformity:** Assesses the consistency of crop growth within each field using the coefficient of variation (CV) of CI values. Uniformity levels are classified as: - **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines
- **Excellent:** CV < 0.08 (very uniform growth) - **Calculation:** ((Current Mean CI Previous Mean CI) / Previous Mean CI) × 100
- **Good:** CV < 0.15 (acceptable uniformity) - **Categories:**
- **Moderate:** CV < 0.25 (some variation present) - **Rapid growth:** > 15% increase (excellent weekly progress)
- **Poor:** CV ≥ 0.25 (high variation - investigate irrigation, fertility, or pests) - **Positive growth:** 515% increase (steady improvement)
- **Stable:** 5% to +5% (field maintained, no significant change)
- **Declining:** 5% to 15% (slow decline, warrant closer monitoring)
- **Rapid decline:** < 15% (alert: urgent issue requiring investigation)
- **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention.
- **Area Change:** Summarizes the proportion of field area that is improving, stable, or declining week-over-week, based on CI changes. Helps identify fields requiring immediate attention. - **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields
- **Applies to:** Fields ≥ 240 days old (mature stage)
- **Method:** Random Forest machine learning model trained on historical harvest data and CI trajectories
- **Inputs:** Days after harvest (DAH) and CI growth rate (CI_per_day)
- **Output:** Predicted tons of cane per hectare (t/ha)
- **Why it matters:** Helps plan harvest timing, mill throughput, and revenue forecasting for mature crops.
- **TCH Forecasted:** Provides yield predictions (tons cane per hectare) for mature fields (typically over 240 days old), using a machine learning model trained on historical CI and yield data. Helps plan harvest timing and logistics. - **KPI 4: Growth Decline (4-Week Trend)** — Assesses short-term growth trajectory using linear regression
- **Calculation:** Linear slope of CI values over the previous 4 weeks
- **Categories:**
- **Strong growth:** Slope > 0.1 CI units/week (excellent sustained progress)
- **Weak growth:** Slope 00.1 (slow improvement, monitor closely)
- **Slight decline:** Slope 0.10 (low severity, non-urgent observation)
- **Moderate decline:** Slope 0.3 to 0.1 (medium severity, scouting recommended)
- **Strong decline:** Slope < 0.3 (high severity, immediate field investigation required)
- **Why it matters:** Trend analysis reveals whether crop is accelerating, stalling, or stressed over time.
- **Field Patchiness Score:** Measures field heterogeneity using the Gini coefficient, detecting spatial variation in crop health. High patchiness (Gini > 0.12) may indicate irrigation, pest, or fertility issues requiring targeted scouting: - **KPI 5: Field Patchiness (Heterogeneity)** — Combines two complementary spatial metrics for comprehensive heterogeneity assessment
- **Low:** Gini < 0.08 (excellent uniformity, minimal intervention needed) - **Metric 1: Gini Coefficient** — Statistical measure of distribution inequality in CI pixel values
- **Medium:** Gini 0.080.12 (acceptable variation, routine monitoring) - **Formula:** (2 × Σ(i × sorted_CI)) / (n × Σ(sorted_CI)) (n+1)/n
- **High:** Gini > 0.12 (poor uniformity, recommend field scouting) - **Range:** 0 (perfectly uniform) to 1 (highly unequal)
- **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure. - **Interpretation:** Low Gini (< 0.15) = good uniformity; High Gini (> 0.3) = significant heterogeneity
- **Metric 2: Moran's I** — Spatial autocorrelation indicating whether high/low areas are clustered or scattered
- **Range:** 1 (dispersed pattern) to +1 (strong clustering)
- **Thresholds:** Moran's I > 0.85 indicates clustered problem areas; < 0.75 suggests scattered issues
- **Risk Determination (Gini + Moran's I Combined):**
- **Minimal Risk:** Gini < 0.15 (excellent uniformity regardless of spatial pattern)
- **Low Risk:** Gini 0.150.30, Moran's I < 0.85 (moderate variation, scattered distribution)
- **Medium Risk:** Gini 0.150.30, Moran's I > 0.85 OR Gini 0.300.50, Moran's I < 0.85 (notable issues)
- **High Risk:** Gini > 0.30, Moran's I > 0.85 (severe heterogeneity with localized clusters—urgent scouting needed)
- **Why it matters:** High patchiness may indicate irrigation inefficiencies, localized pest pressure, fertility variation, or disease spread. Combined Gini + Moran's I reveals not just *how much* variation exists, but also *how it's distributed* spatially. CI reflects chlorophyll = nitrogen status + plant health + vigor. High CV/Patchiness often signals N gaps, water stress, pests (borers), or ratoon decline.
- **Uniformity vs. Patchiness — What's the Difference?**
Both KPIs measure variation, but they answer different questions and drive different management actions:
- **Uniformity (CV-based)** answers: "*Is* growth even across the field?" — it detects whether a problem exists but not where.
- **Patchiness (Gini + Moran's I)** answers: "*Where* are problems and how are they arranged?" — it reveals the spatial pattern.
**Practical example:** Two fields both score "Poor" on Uniformity (CV = 0.25). However:
- Field A has scattered low-CI patches (Moran's I = 0.6) → suggests *random* stress (disease pressure, uneven irrigation)
- Field B has clustered low-CI in one corner (Moran's I = 0.95) → suggests *localized* problem (drainage, compaction, pest hotspot)
Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.**
- **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. - **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment
- **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution)
- **Levels:**
- **Minimal:** < 10% of field (good establishment, limited replanting needed)
- **Moderate:** 1025% of field (monitor gap expansion, coordinate with agronomy)
- **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation)
- **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention.
2. **Overview Map: Growth on Farm:** 2. **Overview Map: Growth on Farm:**
Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
@ -1659,7 +1713,6 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
--- ---
### Historical Benchmark Lines ### Historical Benchmark Lines
The CI time series graphs include historical benchmark lines for the 10th, 50th, and 90th percentiles of CI values across all fields and seasons. The CI time series graphs include historical benchmark lines for the 10th, 50th, and 90th percentiles of CI values across all fields and seasons.