Refactor patchiness KPI calculation to combine Gini Coefficient and Moran's I for comprehensive field heterogeneity assessment; update report to reflect new metrics and interpretations.

2026-02-18 10:09:10 +01:00 · 2026-02-18 10:09:10 +01:00 · af5c53e084
parent 1f677f2626
commit af5c53e084
2 changed files with 195 additions and 141 deletions
--- a/r_app/80_utils_agronomic_support.R
+++ b/r_app/80_utils_agronomic_support.R
@ -400,121 +400,143 @@ calculate_growth_decline_kpi <- function(ci_values_list) {
  return(result)
 }
-#' KPI 5: Calculate weed presence indicator
+
 #'
-#' Detects field fragmentation/patchiness (potential weed/pest pressure)
+#' Combines two complementary metrics for comprehensive heterogeneity assessment:
 #' - Gini Coefficient: Distribution inequality of CI values (0=uniform, 1=unequal)
 #' - Moran's I: Spatial autocorrelation (-1 to +1, indicates clustering vs dispersal)
 #'
 #' @param ci_pixels_by_field List of CI pixel arrays for each field
 #' @param field_boundaries_sf SF object with field geometries
 #' @param mosaic_dir Directory path to per-field mosaic files (for Moran's I)
 #' @param week_file Week file pattern (for Moran's I calculation)
 #' @param mean_ci_values Optional vector of mean CI values per field
 #'
-#' @return Data frame with fragmentation indicators
+#' @return Data frame with gini_coefficient, morans_i, patchiness_risk, patchiness_interpretation
-calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
+calculate_patchiness_kpi <- function(ci_pixels_by_field, field_boundaries_sf = NULL, 
                                     mosaic_dir = NULL, week_file = NULL, mean_ci_values = NULL) {
  n_fields <- length(ci_pixels_by_field)
  result <- data.frame(
-    field_idx = seq_len(length(ci_pixels_by_field)),
+    field_idx = seq_len(n_fields),
-    cv_value = NA_real_,
+    gini_coefficient = NA_real_,
-    low_ci_percent = NA_real_,
+    morans_i = NA_real_,
-    fragmentation_index = NA_real_,
+    patchiness_risk = NA_character_,
-    weed_pressure_risk = NA_character_,
+    patchiness_interpretation = NA_character_,
    stringsAsFactors = FALSE
  )
-  for (field_idx in seq_len(length(ci_pixels_by_field))) {
+  # Determine if per-field structure available
-    ci_pixels <- ci_pixels_by_field[[field_idx]]
+  is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) && !is.null(field_boundaries_sf)
  for (i in seq_len(n_fields)) {
    ci_pixels <- ci_pixels_by_field[[i]]
    if (is.null(ci_pixels) || length(ci_pixels) == 0) {
-      result$weed_pressure_risk[field_idx] <- "No data"
+      result$patchiness_risk[i] <- "No data"
      result$patchiness_interpretation[i] <- "No data"
      next
    }
    ci_pixels <- ci_pixels[!is.na(ci_pixels)]
    if (length(ci_pixels) == 0) {
-      result$weed_pressure_risk[field_idx] <- "No data"
+      result$patchiness_risk[i] <- "No data"
      result$patchiness_interpretation[i] <- "No data"
      next
    }
-    cv_val <- calculate_cv(ci_pixels)
+    # =========================================
-    low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100
+    # METRIC 1: Calculate Gini Coefficient
-    fragmentation <- cv_val * low_ci_pct / 100
+    # =========================================
-    
+    gini <- NA_real_
-    result$cv_value[field_idx] <- cv_val
+    if (length(ci_pixels) > 1) {
-    result$low_ci_percent[field_idx] <- round(low_ci_pct, 2)
+      ci_sorted <- sort(ci_pixels)
-    result$fragmentation_index[field_idx] <- round(fragmentation, 3)
+      n <- length(ci_sorted)
-    
+      numerator <- 2 * sum(seq_len(n) * ci_sorted)
-    if (is.na(fragmentation)) {
+      denominator <- n * sum(ci_sorted)
-      result$weed_pressure_risk[field_idx] <- "No data"
+      gini <- (numerator / denominator) - (n + 1) / n
-    } else if (fragmentation > 0.15) {
+      gini <- max(0, min(1, gini))  # Clamp to 0-1
      result$weed_pressure_risk[field_idx] <- "High"
    } else if (fragmentation > 0.08) {
      result$weed_pressure_risk[field_idx] <- "Medium"
    } else if (fragmentation > 0.04) {
      result$weed_pressure_risk[field_idx] <- "Low"
    } else {
      result$weed_pressure_risk[field_idx] <- "Minimal"
    }
-  }
+    result$gini_coefficient[i] <- gini
-  
+    
-  return(result)
+    # =========================================
-}
+    # METRIC 2: Calculate Moran's I (spatial clustering)
-
+    # =========================================
-#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk)
+    morans_i <- NA_real_
-#'
+    if (is_per_field) {
-#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi()
+      field_name <- field_boundaries_sf$field[i]
-#' @param mean_ci_values Optional vector of mean CI values per field
+      field_mosaic_path <- file.path(mosaic_dir, field_name, week_file)
 #'
 #' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation)
 calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) {
  result <- weed_kpi_result %>%
    mutate(
      # Calculate Gini coefficient from CI pixels (proper calculation)
      gini_coefficient = NA_real_,
      mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_,
      # Map weed_pressure_risk to patchiness_risk
      patchiness_risk = weed_pressure_risk,
      # Create interpretation based on gini and risk
      patchiness_interpretation = case_when(
        is.na(gini_coefficient) ~ "No data",
        gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth",
        gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness",
        gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness",
        TRUE ~ "Mixed heterogeneity"
      )
    ) %>%
    select(field_idx, gini_coefficient, mean_ci, 
           patchiness_interpretation, patchiness_risk)
  # Calculate actual Gini coefficients if CI pixels provided
  if (!is.null(ci_pixels_by_field)) {
    for (i in seq_len(nrow(result))) {
      ci_pixels <- ci_pixels_by_field[[i]]
-      if (!is.null(ci_pixels) && length(ci_pixels) > 0) {
+      if (file.exists(field_mosaic_path)) {
-        ci_pixels <- ci_pixels[!is.na(ci_pixels)]
+        tryCatch({
-        
+          field_raster <- terra::rast(field_mosaic_path)[["CI"]]
-        if (length(ci_pixels) > 1) {
+          single_field <- field_boundaries_sf[i, ]
-          # Calculate Gini coefficient
+          morans_result <- calculate_spatial_autocorrelation(field_raster, single_field)
          # Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n
          # where x_i are sorted values
          ci_sorted <- sort(ci_pixels)
          n <- length(ci_sorted)
-          numerator <- 2 * sum(seq_len(n) * ci_sorted)
+          if (is.list(morans_result)) {
-          denominator <- n * sum(ci_sorted)
+            morans_i <- morans_result$morans_i
-          
+          } else {
-          gini <- (numerator / denominator) - (n + 1) / n
+            morans_i <- morans_result
-          # Clamp to 0-1 range (should be within this by formula but guard against numerical errors)
+          }
-          gini <- max(0, min(1, gini))
+        }, error = function(e) {
-          
+          safe_log(paste("Warning: Moran's I failed for field", field_name, ":", e$message), "WARNING")
-          result$gini_coefficient[i] <- gini
+        })
          # Update interpretation based on calculated Gini
          result$patchiness_interpretation[i] <- dplyr::case_when(
            gini < 0.2 ~ "Uniform growth",
            gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness",
            gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness",
            TRUE ~ "Mixed heterogeneity"
          )
        }
      }
    }
    result$morans_i[i] <- morans_i
    # =========================================
    # RISK DETERMINATION: Gini + Moran's I combination
    # =========================================
    # Logic:
    # - High Gini (>0.3) + High Moran's I (>0.85) = High patchiness (localized clusters)
    # - High Gini + Low Moran's I = Medium patchiness (scattered heterogeneity)
    # - Low Gini (<0.15) = Minimal patchiness (uniform)
    # - Moderate Gini = Low to Medium patchiness
    if (is.na(gini)) {
      result$patchiness_risk[i] <- "No data"
    } else if (gini < 0.15) {
      result$patchiness_risk[i] <- "Minimal"
    } else if (gini < 0.30) {
      # Low-to-moderate Gini
      if (!is.na(morans_i) && morans_i > 0.85) {
        result$patchiness_risk[i] <- "Medium"  # Some clustering
      } else {
        result$patchiness_risk[i] <- "Low"
      }
    } else if (gini < 0.50) {
      # High Gini
      if (!is.na(morans_i) && morans_i > 0.85) {
        result$patchiness_risk[i] <- "High"  # Localized problem clusters
      } else {
        result$patchiness_risk[i] <- "Medium"  # Scattered issues
      }
    } else {
      # Very high Gini (>0.5)
      result$patchiness_risk[i] <- "High"
    }
    # =========================================
    # INTERPRETATION: Combined Gini + Moran's I narrative
    # =========================================
    result$patchiness_interpretation[i] <- dplyr::case_when(
      is.na(gini) ~ "No data",
      gini < 0.15 & (is.na(morans_i) | morans_i < 0.75) ~ 
        "Excellent uniformity - minimal patchiness",
      gini < 0.30 & (is.na(morans_i) | morans_i < 0.75) ~ 
        "Good uniformity - low patchiness",
      gini < 0.30 & !is.na(morans_i) & morans_i > 0.85 ~ 
        "Moderate uniformity with localized clustering",
      gini < 0.50 & (is.na(morans_i) | morans_i < 0.75) ~ 
        "Poor uniformity - scattered heterogeneity",
      gini < 0.50 & !is.na(morans_i) & morans_i > 0.85 ~ 
        "Poor uniformity with clustered problem areas",
      gini >= 0.50 ~ 
        "Severe heterogeneity - requires field investigation",
      TRUE ~ "Mixed heterogeneity"
    )
  }
  return(result)
@ -536,13 +558,6 @@ create_summary_tables <- function(all_kpis) {
    uniformity = all_kpis$uniformity %>%
      select(field_idx, cv_value, uniformity_category, interpretation),
    spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
      all_kpis$spatial_clustering %>%
        select(field_idx, morans_i)
    } else {
      NULL
    },
    area_change = all_kpis$area_change %>%
      select(field_idx, mean_ci_pct_change, interpretation),
@ -553,7 +568,7 @@ create_summary_tables <- function(all_kpis) {
      select(field_idx, four_week_trend, trend_interpretation, decline_severity),
    patchiness = all_kpis$patchiness %>%
-      select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk),
+      select(field_idx, gini_coefficient, morans_i, patchiness_interpretation, patchiness_risk),
    gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
      all_kpis$gap_filling %>%
@ -621,28 +636,19 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
    )
  # ============================================
-  # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering)
+  # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5)
  # ============================================
-  # KPI 5: Field Patchiness
+  # KPI 5: Field Patchiness (Gini + Moran's I combination)
  result <- result %>%
    left_join(
      all_kpis$patchiness %>% 
-        select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci,
+        select(field_idx, Gini_Coefficient = gini_coefficient, 
               Morans_I = morans_i,
               Patchiness_Interpretation = patchiness_interpretation,
               Patchiness_Risk = patchiness_risk),
      by = "field_idx"
    )
  # Moran's I (spatial clustering - used in patchiness calculation)
  if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
    result <- result %>%
      left_join(
        all_kpis$spatial_clustering %>% 
          select(field_idx, Morans_I = morans_i),
        by = "field_idx"
      )
  }
  # ============================================
  # GROUP 4: YIELD FORECAST (KPI 3)
  # ============================================
@ -914,14 +920,14 @@ calculate_all_field_analysis_agronomic_support <- function(
  )
  message("Calculating KPI 5: Field Patchiness...")
-  weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)
+  # Calculate patchiness using both Gini coefficient and Moran's I spatial clustering
-  
+  patchiness_kpi <- calculate_patchiness_kpi(
-  # Convert weed metrics to patchiness metrics (Gini-like + risk combination)
+    ci_pixels_by_field,
-  mean_ci_values <- current_stats$Mean_CI
+    field_boundaries_sf = field_boundaries_sf,
-  if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) {
+    mosaic_dir = current_mosaic_dir,
-    mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf))
+    week_file = week_file,
-  }
+    mean_ci_values = current_stats$Mean_CI
-  patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values)
+  )
  message("Calculating KPI 6: Gap Filling...")
  # Build list of per-field files for this week
@ -968,7 +974,6 @@ calculate_all_field_analysis_agronomic_support <- function(
    tch_forecasted = tch_kpi,
    growth_decline = growth_decline_kpi,
    patchiness = patchiness_kpi,
    spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i),
    gap_filling = gap_filling_kpi
  )
@ -985,10 +990,6 @@ calculate_all_field_analysis_agronomic_support <- function(
    distinct(field_idx, .keep_all = TRUE)
  all_kpis$patchiness <- all_kpis$patchiness %>%
    distinct(field_idx, .keep_all = TRUE)
  if (!is.null(all_kpis$spatial_clustering)) {
    all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>%
      distinct(field_idx, .keep_all = TRUE)
  }
  all_kpis$gap_filling <- all_kpis$gap_filling %>%
    distinct(field_idx, .keep_all = TRUE)
--- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
+++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
@ -1622,25 +1622,79 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
 ### What You'll Find in This Report:
 1. **Key Performance Indicators (KPIs):**  
-   The report provides a farm-wide analysis based on the Chlorophyll Index (CI) changes. KPIs are calculated field by field and summarized in tables. The current KPIs included are:
+   The report provides a farm-wide analysis based on weekly Chlorophyll Index (CI) measurements. Five comprehensive KPIs are calculated field by field to assess crop health:
   - **KPI 1: Field Uniformity** — Measures how consistently crop is developing across the field
     - **Metric:** Coefficient of Variation (CV) of CI pixel values
     - **Calculation:** CV = (Standard Deviation of CI) / (Mean CI)
     - **Categories:**
       - **Excellent:** CV < 0.08 (very uniform growth, minimal intervention needed)
       - **Good:** CV < 0.15 (acceptable uniformity, routine monitoring)
       - **Acceptable:** CV < 0.25 (moderate variation, monitor irrigation/fertility)
       - **Poor:** CV < 0.4 (high variation, investigate management issues)
       - **Very poor:** CV ≥ 0.4 (severe variation, immediate field scout required)
     - **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease.
-   - **Field Uniformity:** Assesses the consistency of crop growth within each field using the coefficient of variation (CV) of CI values. Uniformity levels are classified as:
+   - **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines
-     - **Excellent:** CV < 0.08 (very uniform growth)
+     - **Calculation:** ((Current Mean CI − Previous Mean CI) / Previous Mean CI) × 100
-     - **Good:** CV < 0.15 (acceptable uniformity)
+     - **Categories:**
-     - **Moderate:** CV < 0.25 (some variation present)
+       - **Rapid growth:** > 15% increase (excellent weekly progress)
-     - **Poor:** CV ≥ 0.25 (high variation - investigate irrigation, fertility, or pests)
+       - **Positive growth:** 5–15% increase (steady improvement)
       - **Stable:** −5% to +5% (field maintained, no significant change)
       - **Declining:** −5% to −15% (slow decline, warrant closer monitoring)
       - **Rapid decline:** < −15% (alert: urgent issue requiring investigation)
     - **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention.
-   - **Area Change:** Summarizes the proportion of field area that is improving, stable, or declining week-over-week, based on CI changes. Helps identify fields requiring immediate attention.
+   - **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields
     - **Applies to:** Fields ≥ 240 days old (mature stage)
     - **Method:** Random Forest machine learning model trained on historical harvest data and CI trajectories
     - **Inputs:** Days after harvest (DAH) and CI growth rate (CI_per_day)
     - **Output:** Predicted tons of cane per hectare (t/ha)
     - **Why it matters:** Helps plan harvest timing, mill throughput, and revenue forecasting for mature crops.
-   - **TCH Forecasted:** Provides yield predictions (tons cane per hectare) for mature fields (typically over 240 days old), using a machine learning model trained on historical CI and yield data. Helps plan harvest timing and logistics.
+   - **KPI 4: Growth Decline (4-Week Trend)** — Assesses short-term growth trajectory using linear regression
     - **Calculation:** Linear slope of CI values over the previous 4 weeks
     - **Categories:**
       - **Strong growth:** Slope > 0.1 CI units/week (excellent sustained progress)
       - **Weak growth:** Slope 0–0.1 (slow improvement, monitor closely)
       - **Slight decline:** Slope −0.1–0 (low severity, non-urgent observation)
       - **Moderate decline:** Slope −0.3 to −0.1 (medium severity, scouting recommended)
       - **Strong decline:** Slope < −0.3 (high severity, immediate field investigation required)
     - **Why it matters:** Trend analysis reveals whether crop is accelerating, stalling, or stressed over time.
-   - **Field Patchiness Score:** Measures field heterogeneity using the Gini coefficient, detecting spatial variation in crop health. High patchiness (Gini > 0.12) may indicate irrigation, pest, or fertility issues requiring targeted scouting:
+   - **KPI 5: Field Patchiness (Heterogeneity)** — Combines two complementary spatial metrics for comprehensive heterogeneity assessment
-     - **Low:** Gini < 0.08 (excellent uniformity, minimal intervention needed)
+     - **Metric 1: Gini Coefficient** — Statistical measure of distribution inequality in CI pixel values
-     - **Medium:** Gini 0.08–0.12 (acceptable variation, routine monitoring)
+       - **Formula:** (2 × Σ(i × sorted_CI)) / (n × Σ(sorted_CI)) − (n+1)/n
-     - **High:** Gini > 0.12 (poor uniformity, recommend field scouting)
+       - **Range:** 0 (perfectly uniform) to 1 (highly unequal)
-     - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure.
+       - **Interpretation:** Low Gini (< 0.15) = good uniformity; High Gini (> 0.3) = significant heterogeneity
     - **Metric 2: Moran's I** — Spatial autocorrelation indicating whether high/low areas are clustered or scattered
       - **Range:** −1 (dispersed pattern) to +1 (strong clustering)
       - **Thresholds:** Moran's I > 0.85 indicates clustered problem areas; < 0.75 suggests scattered issues
     - **Risk Determination (Gini + Moran's I Combined):**
       - **Minimal Risk:** Gini < 0.15 (excellent uniformity regardless of spatial pattern)
       - **Low Risk:** Gini 0.15–0.30, Moran's I < 0.85 (moderate variation, scattered distribution)
       - **Medium Risk:** Gini 0.15–0.30, Moran's I > 0.85 OR Gini 0.30–0.50, Moran's I < 0.85 (notable issues)
       - **High Risk:** Gini > 0.30, Moran's I > 0.85 (severe heterogeneity with localized clusters—urgent scouting needed)
     - **Why it matters:** High patchiness may indicate irrigation inefficiencies, localized pest pressure, fertility variation, or disease spread. Combined Gini + Moran's I reveals not just *how much* variation exists, but also *how it's distributed* spatially. CI reflects chlorophyll = nitrogen status + plant health + vigor. High CV/Patchiness often signals N gaps, water stress, pests (borers), or ratoon decline.
   - **Uniformity vs. Patchiness — What's the Difference?**
     Both KPIs measure variation, but they answer different questions and drive different management actions:
     - **Uniformity (CV-based)** answers: "*Is* growth even across the field?" — it detects whether a problem exists but not where.
     - **Patchiness (Gini + Moran's I)** answers: "*Where* are problems and how are they arranged?" — it reveals the spatial pattern.
     **Practical example:** Two fields both score "Poor" on Uniformity (CV = 0.25). However:
     - Field A has scattered low-CI patches (Moran's I = 0.6) → suggests *random* stress (disease pressure, uneven irrigation)
     - Field B has clustered low-CI in one corner (Moran's I = 0.95) → suggests *localized* problem (drainage, compaction, pest hotspot)
     Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.**
-   - **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting.
+   - **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment
     - **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution)
     - **Levels:**
       - **Minimal:** < 10% of field (good establishment, limited replanting needed)
       - **Moderate:** 10–25% of field (monitor gap expansion, coordinate with agronomy)
       - **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation)
     - **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention.
 2. **Overview Map: Growth on Farm:**  
   Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting.
@ -1659,7 +1713,6 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
 ---
 ### Historical Benchmark Lines 
 The CI time series graphs include historical benchmark lines for the 10th, 50th, and 90th percentiles of CI values across all fields and seasons.