Add field patchiness KPI calculation and update summaries; replace weed pressure metrics with patchiness metrics in reports

2026-02-17 21:57:34 +01:00 · 2026-02-17 21:57:34 +01:00 · 253ff51ca2
parent 34159b3003
commit 253ff51ca2
2 changed files with 238 additions and 59 deletions
--- a/r_app/80_utils_agronomic_support.R
+++ b/r_app/80_utils_agronomic_support.R
@ -64,6 +64,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
    cv_value = numeric(),
    morans_i = numeric(),
    uniformity_score = numeric(),
+    uniformity_category = character(),
    interpretation = character(),
    stringsAsFactors = FALSE
  )
@ -80,6 +81,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
        cv_value = NA_real_,
        morans_i = NA_real_,
        uniformity_score = NA_real_,
+        uniformity_category = "No data",
        interpretation = "No data",
        stringsAsFactors = FALSE
      ))
@ -142,16 +144,22 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
    # Interpretation
    if (is.na(cv_val)) {
      interpretation <- "No data"
+      uniformity_category <- "No data"
    } else if (cv_val < 0.08) {
      interpretation <- "Excellent uniformity"
+      uniformity_category <- "Excellent"
    } else if (cv_val < 0.15) {
      interpretation <- "Good uniformity"
+      uniformity_category <- "Good"
    } else if (cv_val < 0.25) {
      interpretation <- "Acceptable uniformity"
+      uniformity_category <- "Acceptable"
    } else if (cv_val < 0.4) {
      interpretation <- "Poor uniformity"
+      uniformity_category <- "Poor"
    } else {
      interpretation <- "Very poor uniformity"
+      uniformity_category <- "Very poor"
    }
    
    result <- rbind(result, data.frame(
@ -159,6 +167,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
      cv_value = cv_val,
      morans_i = morans_i,
      uniformity_score = round(uniformity_score, 3),
+      uniformity_category = uniformity_category,
      interpretation = interpretation,
      stringsAsFactors = FALSE
    ))
@ -446,6 +455,72 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
  return(result)
 }

+#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk)
+#'
+#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi()
+#' @param mean_ci_values Optional vector of mean CI values per field
+#'
+#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation)
+calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) {
+  result <- weed_kpi_result %>%
+    mutate(
+      # Calculate Gini coefficient from CI pixels (proper calculation)
+      gini_coefficient = NA_real_,
+      mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_,
+      # Map weed_pressure_risk to patchiness_risk
+      patchiness_risk = weed_pressure_risk,
+      # Create interpretation based on gini and risk
+      patchiness_interpretation = case_when(
+        is.na(gini_coefficient) ~ "No data",
+        gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth",
+        gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness",
+        gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness",
+        TRUE ~ "Mixed heterogeneity"
+      )
+    ) %>%
+    select(field_idx, gini_coefficient, mean_ci, 
+           patchiness_interpretation, patchiness_risk)
+  
+  # Calculate actual Gini coefficients if CI pixels provided
+  if (!is.null(ci_pixels_by_field)) {
+    for (i in seq_len(nrow(result))) {
+      ci_pixels <- ci_pixels_by_field[[i]]
+      
+      if (!is.null(ci_pixels) && length(ci_pixels) > 0) {
+        ci_pixels <- ci_pixels[!is.na(ci_pixels)]
+        
+        if (length(ci_pixels) > 1) {
+          # Calculate Gini coefficient
+          # Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n
+          # where x_i are sorted values
+          ci_sorted <- sort(ci_pixels)
+          n <- length(ci_sorted)
+          
+          numerator <- 2 * sum(seq_len(n) * ci_sorted)
+          denominator <- n * sum(ci_sorted)
+          
+          gini <- (numerator / denominator) - (n + 1) / n
+          # Clamp to 0-1 range (should be within this by formula but guard against numerical errors)
+          gini <- max(0, min(1, gini))
+          
+          result$gini_coefficient[i] <- gini
+          
+          # Update interpretation based on calculated Gini
+          result$patchiness_interpretation[i] <- dplyr::case_when(
+            gini < 0.2 ~ "Uniform growth",
+            gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness",
+            gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness",
+            TRUE ~ "Mixed heterogeneity"
+          )
+        }
+      }
+    }
+  }
+  
+  return(result)
+}
+
+

 # ============================================================================
 # KPI ORCHESTRATOR AND REPORTING
@ -459,7 +534,14 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
 create_summary_tables <- function(all_kpis) {
  kpi_summary <- list(
    uniformity = all_kpis$uniformity %>%
-      select(field_idx, cv_value, morans_i, uniformity_score, interpretation),
+      select(field_idx, cv_value, uniformity_category, interpretation),
+    
+    spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
+      all_kpis$spatial_clustering %>%
+        select(field_idx, morans_i)
+    } else {
+      NULL
+    },
    
    area_change = all_kpis$area_change %>%
      select(field_idx, mean_ci_pct_change, interpretation),
@ -470,8 +552,8 @@ create_summary_tables <- function(all_kpis) {
    growth_decline = all_kpis$growth_decline %>%
      select(field_idx, four_week_trend, trend_interpretation, decline_severity),
    
-    weed_pressure = all_kpis$weed_presence %>%
-      select(field_idx, fragmentation_index, weed_pressure_risk),
+    patchiness = all_kpis$patchiness %>%
+      select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk),
    
    gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
      all_kpis$gap_filling %>%
@ -497,47 +579,83 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
  result <- field_boundaries_sf %>%
    sf::st_drop_geometry() %>%
    mutate(
-      field_idx = row_number(),  # ADD THIS: match the integer index used in KPI functions
+      field_idx = row_number(),
      Field_id = field,
      Field_name = field,
      Week = current_week,
      Year = current_year
    ) %>%
-    select(field_idx, Field_id, Field_name, Week, Year)  # Include field_idx first
+    select(field_idx, Field_id, Field_name, Week, Year)
  
-  # Join all KPI results (now field_idx matches on both sides)
+  # ============================================
+  # GROUP 1: FIELD UNIFORMITY (KPI 1)
+  # ============================================
  result <- result %>%
    left_join(
      all_kpis$uniformity %>% 
-        select(field_idx, CV = cv_value, Uniformity_Score = uniformity_score, 
-               Morans_I = morans_i, Uniformity_Interpretation = interpretation),
+        select(field_idx, CV = cv_value, 
+               Uniformity_Category = uniformity_category),
      by = "field_idx"
-    ) %>%
+    )
+  
+  # ============================================
+  # GROUP 2: GROWTH & TREND ANALYSIS (KPI 2 + KPI 4)
+  # ============================================
+  # KPI 2: Area Change
+  result <- result %>%
    left_join(
      all_kpis$area_change %>% 
        select(field_idx, Weekly_CI_Change = mean_ci_pct_change, 
               Area_Change_Interpretation = interpretation),
      by = "field_idx"
-    ) %>%
-    left_join(
-      all_kpis$tch_forecasted %>% 
-        select(field_idx, TCH_Forecasted = tch_forecasted),
-      by = "field_idx"
-    ) %>%
+    )
+  
+  # KPI 4: Growth Decline
+  result <- result %>%
    left_join(
      all_kpis$growth_decline %>% 
        select(field_idx, Four_Week_Trend = four_week_trend, 
               Trend_Interpretation = trend_interpretation, 
               Decline_Severity = decline_severity),
      by = "field_idx"
-    ) %>%
+    )
+  
+  # ============================================
+  # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering)
+  # ============================================
+  # KPI 5: Field Patchiness
+  result <- result %>%
    left_join(
-      all_kpis$weed_presence %>% 
-        select(field_idx, Fragmentation_Index = fragmentation_index, 
-               Weed_Pressure_Risk = weed_pressure_risk),
+      all_kpis$patchiness %>% 
+        select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci,
+               Patchiness_Interpretation = patchiness_interpretation,
+               Patchiness_Risk = patchiness_risk),
      by = "field_idx"
    )
  
+  # Moran's I (spatial clustering - used in patchiness calculation)
+  if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
+    result <- result %>%
+      left_join(
+        all_kpis$spatial_clustering %>% 
+          select(field_idx, Morans_I = morans_i),
+        by = "field_idx"
+      )
+  }
+  
+  # ============================================
+  # GROUP 4: YIELD FORECAST (KPI 3)
+  # ============================================
+  result <- result %>%
+    left_join(
+      all_kpis$tch_forecasted %>% 
+        select(field_idx, TCH_Forecasted = tch_forecasted),
+      by = "field_idx"
+    )
+  
+  # ============================================
+  # GROUP 5: DATA QUALITY / GAP FILLING (KPI 6)
+  # ============================================
  # Add gap filling if available
  if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
    result <- result %>%
@ -548,7 +666,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
      )
  }
  
-  # Remove field_idx from final output (it was only needed for joining)
+  # Remove field_idx from final output
  result <- result %>%
    select(-field_idx)
  
@ -795,9 +913,16 @@ calculate_all_field_analysis_agronomic_support <- function(
  ci_pixels_by_field  
  )
  
-  message("Calculating KPI 5: Weed Presence...")
+  message("Calculating KPI 5: Field Patchiness...")
  weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)
  
+  # Convert weed metrics to patchiness metrics (Gini-like + risk combination)
+  mean_ci_values <- current_stats$Mean_CI
+  if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) {
+    mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf))
+  }
+  patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values)
+  
  message("Calculating KPI 6: Gap Filling...")
  # Build list of per-field files for this week
  per_field_files <- c()
@ -842,10 +967,31 @@ calculate_all_field_analysis_agronomic_support <- function(
    area_change = area_change_kpi,
    tch_forecasted = tch_kpi,
    growth_decline = growth_decline_kpi,
-    weed_presence = weed_kpi,
+    patchiness = patchiness_kpi,
+    spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i),
    gap_filling = gap_filling_kpi
  )
  
+  # Deduplicate KPI dataframes to ensure one row per field_idx
+  # (sometimes joins or calculations can create duplicate rows)
+  message("Deduplicating KPI results (keeping first occurrence per field)...")
+  all_kpis$uniformity <- all_kpis$uniformity %>%
+    distinct(field_idx, .keep_all = TRUE)
+  all_kpis$area_change <- all_kpis$area_change %>%
+    distinct(field_idx, .keep_all = TRUE)
+  all_kpis$tch_forecasted <- all_kpis$tch_forecasted %>%
+    distinct(field_idx, .keep_all = TRUE)
+  all_kpis$growth_decline <- all_kpis$growth_decline %>%
+    distinct(field_idx, .keep_all = TRUE)
+  all_kpis$patchiness <- all_kpis$patchiness %>%
+    distinct(field_idx, .keep_all = TRUE)
+  if (!is.null(all_kpis$spatial_clustering)) {
+    all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>%
+      distinct(field_idx, .keep_all = TRUE)
+  }
+  all_kpis$gap_filling <- all_kpis$gap_filling %>%
+    distinct(field_idx, .keep_all = TRUE)
+  
  # Built single-sheet field detail table with all KPIs
  message("\nBuilding comprehensive field detail table...")
  field_detail_df <- create_field_detail_table(
--- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
+++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
@ -218,10 +218,10 @@ if (dir.exists(kpi_data_dir)) {
        if (is.null(summary_tables)) {
          summary_tables <- list()
          
-          # 1. Uniformity summary - GROUP BY Uniformity_Interpretation and COUNT
-          if ("Uniformity_Interpretation" %in% names(field_details_table)) {
+          # 1. Uniformity summary - GROUP BY Uniformity_Category and COUNT
+          if ("Uniformity_Category" %in% names(field_details_table)) {
            summary_tables$uniformity <- field_details_table %>%
-              group_by(interpretation = Uniformity_Interpretation) %>%
+              group_by(interpretation = Uniformity_Category) %>%
              summarise(field_count = n(), .groups = 'drop')
            safe_log("  ✓ Created uniformity summary")
          }
@ -242,28 +242,62 @@ if (dir.exists(kpi_data_dir)) {
            safe_log("  ✓ Created growth_decline summary")
          }
          
-          # 4. Weed pressure summary - GROUP BY Weed_Pressure_Risk and COUNT
-          if ("Weed_Pressure_Risk" %in% names(field_details_table)) {
-            summary_tables$weed_pressure <- field_details_table %>%
-              group_by(weed_pressure_risk = Weed_Pressure_Risk) %>%
-              summarise(field_count = n(), .groups = 'drop')
-            safe_log("  ✓ Created weed_pressure summary")
-          }
-          
-          # 5. TCH forecast summary - bin into categories and COUNT
-          if ("TCH_Forecasted" %in% names(field_details_table)) {
-            summary_tables$tch_forecast <- field_details_table %>%
-              filter(!is.na(TCH_Forecasted)) %>%
+          # 4. Patchiness summary - GROUP BY Patchiness_Risk + Gini ranges
+          if ("Patchiness_Risk" %in% names(field_details_table) && "Gini_Coefficient" %in% names(field_details_table)) {
+            summary_tables$patchiness <- field_details_table %>%
              mutate(
-                tch_category = case_when(
-                  TCH_Forecasted >= quantile(TCH_Forecasted, 0.75, na.rm = TRUE) ~ "Top 25%",
-                  TCH_Forecasted >= quantile(TCH_Forecasted, 0.25, na.rm = TRUE) ~ "Average",
-                  TRUE ~ "Lowest 25%"
+                gini_category = case_when(
+                  Gini_Coefficient < 0.2 ~ "Uniform (Gini<0.2)",
+                  Gini_Coefficient < 0.4 ~ "Moderate (Gini 0.2-0.4)",
+                  TRUE ~ "High (Gini≥0.4)"
                )
              ) %>%
-              group_by(tch_category) %>%
+              group_by(gini_category, patchiness_risk = Patchiness_Risk) %>%
              summarise(field_count = n(), .groups = 'drop')
-            safe_log("  ✓ Created tch_forecast summary")
+            safe_log("  ✓ Created patchiness summary")
+          }
+          
+          # 5. TCH forecast summary - show actual value ranges (quartiles) instead of counts
+          if ("TCH_Forecasted" %in% names(field_details_table)) {
+            tch_values <- field_details_table %>%
+              filter(!is.na(TCH_Forecasted)) %>%
+              pull(TCH_Forecasted)
+            
+            if (length(tch_values) > 0) {
+              # Defensive check: if all TCH values are identical, handle as special case
+              if (length(unique(tch_values)) == 1) {
+                # All values are identical
+                identical_value <- tch_values[1]
+                summary_tables$tch_forecast <- tibble::tibble(
+                  tch_category = "All equal",
+                  range = paste0(round(identical_value, 1), " t/ha"),
+                  field_count = length(tch_values)
+                )
+                safe_log("  ✓ Created tch_forecast summary (all TCH values identical)")
+              } else {
+                # Multiple distinct values - use three-quartile approach
+                q25 <- quantile(tch_values, 0.25, na.rm = TRUE)
+                q50 <- quantile(tch_values, 0.50, na.rm = TRUE)
+                q75 <- quantile(tch_values, 0.75, na.rm = TRUE)
+                min_val <- min(tch_values, na.rm = TRUE)
+                max_val <- max(tch_values, na.rm = TRUE)
+                
+                summary_tables$tch_forecast <- tibble::tibble(
+                  tch_category = c("Top 25%", "Middle 50%", "Bottom 25%"),
+                  range = c(
+                    paste0(round(q75, 1), "-", round(max_val, 1), " t/ha"),
+                    paste0(round(q25, 1), "-", round(q75, 1), " t/ha"),
+                    paste0(round(min_val, 1), "-", round(q25, 1), " t/ha")
+                  ),
+                  field_count = c(
+                    nrow(field_details_table %>% filter(TCH_Forecasted >= q75, !is.na(TCH_Forecasted))),
+                    nrow(field_details_table %>% filter(TCH_Forecasted >= q25 & TCH_Forecasted < q75, !is.na(TCH_Forecasted))),
+                    nrow(field_details_table %>% filter(TCH_Forecasted < q25, !is.na(TCH_Forecasted)))
+                  )
+                )
+                safe_log("  ✓ Created tch_forecast summary with value ranges")
+              }
+            }
          }
          
          # 6. Gap filling summary - GROUP BY Gap_Level and COUNT
@ -544,22 +578,29 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
    kpi_display_order <- list(
      uniformity = list(display = "Field Uniformity", level_col = "interpretation", count_col = "field_count"),
      area_change = list(display = "Area Change", level_col = "interpretation", count_col = "field_count"),
-      tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", count_col = "field_count"),
-      growth_decline = list(display = "Growth Decline", level_col = "trend_interpretation", count_col = "field_count"),
-      weed_pressure = list(display = "Weed Presence", level_col = "weed_pressure_risk", count_col = "field_count"),
+      growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"),
+      patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"),
+      tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"),
      gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count")
    )

-    standardize_kpi <- function(df, level_col, count_col) {
+    standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) {
      if (is.null(level_col) || !(level_col %in% names(df)) || is.null(count_col) || !(count_col %in% names(df))) {
        return(NULL)
      }
      total <- sum(df[[count_col]], na.rm = TRUE)
      total <- ifelse(total == 0, NA_real_, total)

+      # If detail_col is specified, use it as the Level instead
+      if (!is.null(detail_col) && detail_col %in% names(df)) {
+        display_level <- df[[detail_col]]
+      } else {
+        display_level <- df[[level_col]]
+      }
+
      df %>%
        dplyr::transmute(
-          Level = as.character(.data[[level_col]]),
+          Level = as.character(display_level),
          Count = as.integer(round(as.numeric(.data[[count_col]]))),
          Percent = if (is.na(total)) {
            NA_real_
@ -578,17 +619,9 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
      kpi_df <- summary_tables[[kpi_key]]
      if (is.null(kpi_df) || !is.data.frame(kpi_df) || nrow(kpi_df) == 0) next

-      kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col)
-      if (is.null(kpi_rows) && kpi_key %in% c("tch_forecast", "gap_filling")) {
-        numeric_cols <- names(kpi_df)[vapply(kpi_df, is.numeric, logical(1))]
-        if (length(numeric_cols) > 0) {
-          kpi_rows <- tibble::tibble(
-            Level = numeric_cols,
-            Count = round(as.numeric(kpi_df[1, numeric_cols]), 2),
-            Percent = NA_real_
-          )
-        }
-      }
+      # Pass detail_col if it exists in config
+      detail_col <- if (!is.null(config$detail_col)) config$detail_col else NULL
+      kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col, detail_col)

      if (!is.null(kpi_rows)) {
        kpi_rows$KPI <- config$display