diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index df5232a..adb629a 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -400,121 +400,143 @@ calculate_growth_decline_kpi <- function(ci_values_list) { return(result) } -#' KPI 5: Calculate weed presence indicator + #' -#' Detects field fragmentation/patchiness (potential weed/pest pressure) +#' Combines two complementary metrics for comprehensive heterogeneity assessment: +#' - Gini Coefficient: Distribution inequality of CI values (0=uniform, 1=unequal) +#' - Moran's I: Spatial autocorrelation (-1 to +1, indicates clustering vs dispersal) #' #' @param ci_pixels_by_field List of CI pixel arrays for each field +#' @param field_boundaries_sf SF object with field geometries +#' @param mosaic_dir Directory path to per-field mosaic files (for Moran's I) +#' @param week_file Week file pattern (for Moran's I calculation) +#' @param mean_ci_values Optional vector of mean CI values per field #' -#' @return Data frame with fragmentation indicators -calculate_weed_presence_kpi <- function(ci_pixels_by_field) { +#' @return Data frame with gini_coefficient, morans_i, patchiness_risk, patchiness_interpretation +calculate_patchiness_kpi <- function(ci_pixels_by_field, field_boundaries_sf = NULL, + mosaic_dir = NULL, week_file = NULL, mean_ci_values = NULL) { + + n_fields <- length(ci_pixels_by_field) + result <- data.frame( - field_idx = seq_len(length(ci_pixels_by_field)), - cv_value = NA_real_, - low_ci_percent = NA_real_, - fragmentation_index = NA_real_, - weed_pressure_risk = NA_character_, + field_idx = seq_len(n_fields), + gini_coefficient = NA_real_, + morans_i = NA_real_, + patchiness_risk = NA_character_, + patchiness_interpretation = NA_character_, stringsAsFactors = FALSE ) - for (field_idx in seq_len(length(ci_pixels_by_field))) { - ci_pixels <- ci_pixels_by_field[[field_idx]] + # Determine if per-field structure available + is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) && !is.null(field_boundaries_sf) + + for (i in seq_len(n_fields)) { + ci_pixels <- ci_pixels_by_field[[i]] if (is.null(ci_pixels) || length(ci_pixels) == 0) { - result$weed_pressure_risk[field_idx] <- "No data" + result$patchiness_risk[i] <- "No data" + result$patchiness_interpretation[i] <- "No data" next } ci_pixels <- ci_pixels[!is.na(ci_pixels)] if (length(ci_pixels) == 0) { - result$weed_pressure_risk[field_idx] <- "No data" + result$patchiness_risk[i] <- "No data" + result$patchiness_interpretation[i] <- "No data" next } - cv_val <- calculate_cv(ci_pixels) - low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100 - fragmentation <- cv_val * low_ci_pct / 100 - - result$cv_value[field_idx] <- cv_val - result$low_ci_percent[field_idx] <- round(low_ci_pct, 2) - result$fragmentation_index[field_idx] <- round(fragmentation, 3) - - if (is.na(fragmentation)) { - result$weed_pressure_risk[field_idx] <- "No data" - } else if (fragmentation > 0.15) { - result$weed_pressure_risk[field_idx] <- "High" - } else if (fragmentation > 0.08) { - result$weed_pressure_risk[field_idx] <- "Medium" - } else if (fragmentation > 0.04) { - result$weed_pressure_risk[field_idx] <- "Low" - } else { - result$weed_pressure_risk[field_idx] <- "Minimal" + # ========================================= + # METRIC 1: Calculate Gini Coefficient + # ========================================= + gini <- NA_real_ + if (length(ci_pixels) > 1) { + ci_sorted <- sort(ci_pixels) + n <- length(ci_sorted) + numerator <- 2 * sum(seq_len(n) * ci_sorted) + denominator <- n * sum(ci_sorted) + gini <- (numerator / denominator) - (n + 1) / n + gini <- max(0, min(1, gini)) # Clamp to 0-1 } - } - - return(result) -} - -#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk) -#' -#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi() -#' @param mean_ci_values Optional vector of mean CI values per field -#' -#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation) -calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) { - result <- weed_kpi_result %>% - mutate( - # Calculate Gini coefficient from CI pixels (proper calculation) - gini_coefficient = NA_real_, - mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_, - # Map weed_pressure_risk to patchiness_risk - patchiness_risk = weed_pressure_risk, - # Create interpretation based on gini and risk - patchiness_interpretation = case_when( - is.na(gini_coefficient) ~ "No data", - gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth", - gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness", - gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness", - TRUE ~ "Mixed heterogeneity" - ) - ) %>% - select(field_idx, gini_coefficient, mean_ci, - patchiness_interpretation, patchiness_risk) - - # Calculate actual Gini coefficients if CI pixels provided - if (!is.null(ci_pixels_by_field)) { - for (i in seq_len(nrow(result))) { - ci_pixels <- ci_pixels_by_field[[i]] + result$gini_coefficient[i] <- gini + + # ========================================= + # METRIC 2: Calculate Moran's I (spatial clustering) + # ========================================= + morans_i <- NA_real_ + if (is_per_field) { + field_name <- field_boundaries_sf$field[i] + field_mosaic_path <- file.path(mosaic_dir, field_name, week_file) - if (!is.null(ci_pixels) && length(ci_pixels) > 0) { - ci_pixels <- ci_pixels[!is.na(ci_pixels)] - - if (length(ci_pixels) > 1) { - # Calculate Gini coefficient - # Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n - # where x_i are sorted values - ci_sorted <- sort(ci_pixels) - n <- length(ci_sorted) + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_raster <- terra::rast(field_mosaic_path)[["CI"]] + single_field <- field_boundaries_sf[i, ] + morans_result <- calculate_spatial_autocorrelation(field_raster, single_field) - numerator <- 2 * sum(seq_len(n) * ci_sorted) - denominator <- n * sum(ci_sorted) - - gini <- (numerator / denominator) - (n + 1) / n - # Clamp to 0-1 range (should be within this by formula but guard against numerical errors) - gini <- max(0, min(1, gini)) - - result$gini_coefficient[i] <- gini - - # Update interpretation based on calculated Gini - result$patchiness_interpretation[i] <- dplyr::case_when( - gini < 0.2 ~ "Uniform growth", - gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness", - gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness", - TRUE ~ "Mixed heterogeneity" - ) - } + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + }, error = function(e) { + safe_log(paste("Warning: Moran's I failed for field", field_name, ":", e$message), "WARNING") + }) } } + result$morans_i[i] <- morans_i + + # ========================================= + # RISK DETERMINATION: Gini + Moran's I combination + # ========================================= + # Logic: + # - High Gini (>0.3) + High Moran's I (>0.85) = High patchiness (localized clusters) + # - High Gini + Low Moran's I = Medium patchiness (scattered heterogeneity) + # - Low Gini (<0.15) = Minimal patchiness (uniform) + # - Moderate Gini = Low to Medium patchiness + + if (is.na(gini)) { + result$patchiness_risk[i] <- "No data" + } else if (gini < 0.15) { + result$patchiness_risk[i] <- "Minimal" + } else if (gini < 0.30) { + # Low-to-moderate Gini + if (!is.na(morans_i) && morans_i > 0.85) { + result$patchiness_risk[i] <- "Medium" # Some clustering + } else { + result$patchiness_risk[i] <- "Low" + } + } else if (gini < 0.50) { + # High Gini + if (!is.na(morans_i) && morans_i > 0.85) { + result$patchiness_risk[i] <- "High" # Localized problem clusters + } else { + result$patchiness_risk[i] <- "Medium" # Scattered issues + } + } else { + # Very high Gini (>0.5) + result$patchiness_risk[i] <- "High" + } + + # ========================================= + # INTERPRETATION: Combined Gini + Moran's I narrative + # ========================================= + result$patchiness_interpretation[i] <- dplyr::case_when( + is.na(gini) ~ "No data", + gini < 0.15 & (is.na(morans_i) | morans_i < 0.75) ~ + "Excellent uniformity - minimal patchiness", + gini < 0.30 & (is.na(morans_i) | morans_i < 0.75) ~ + "Good uniformity - low patchiness", + gini < 0.30 & !is.na(morans_i) & morans_i > 0.85 ~ + "Moderate uniformity with localized clustering", + gini < 0.50 & (is.na(morans_i) | morans_i < 0.75) ~ + "Poor uniformity - scattered heterogeneity", + gini < 0.50 & !is.na(morans_i) & morans_i > 0.85 ~ + "Poor uniformity with clustered problem areas", + gini >= 0.50 ~ + "Severe heterogeneity - requires field investigation", + TRUE ~ "Mixed heterogeneity" + ) } return(result) @@ -536,13 +558,6 @@ create_summary_tables <- function(all_kpis) { uniformity = all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_category, interpretation), - spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { - all_kpis$spatial_clustering %>% - select(field_idx, morans_i) - } else { - NULL - }, - area_change = all_kpis$area_change %>% select(field_idx, mean_ci_pct_change, interpretation), @@ -553,7 +568,7 @@ create_summary_tables <- function(all_kpis) { select(field_idx, four_week_trend, trend_interpretation, decline_severity), patchiness = all_kpis$patchiness %>% - select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk), + select(field_idx, gini_coefficient, morans_i, patchiness_interpretation, patchiness_risk), gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { all_kpis$gap_filling %>% @@ -621,28 +636,19 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee ) # ============================================ - # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering) + # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5) # ============================================ - # KPI 5: Field Patchiness + # KPI 5: Field Patchiness (Gini + Moran's I combination) result <- result %>% left_join( all_kpis$patchiness %>% - select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci, + select(field_idx, Gini_Coefficient = gini_coefficient, + Morans_I = morans_i, Patchiness_Interpretation = patchiness_interpretation, Patchiness_Risk = patchiness_risk), by = "field_idx" ) - # Moran's I (spatial clustering - used in patchiness calculation) - if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { - result <- result %>% - left_join( - all_kpis$spatial_clustering %>% - select(field_idx, Morans_I = morans_i), - by = "field_idx" - ) - } - # ============================================ # GROUP 4: YIELD FORECAST (KPI 3) # ============================================ @@ -914,14 +920,14 @@ calculate_all_field_analysis_agronomic_support <- function( ) message("Calculating KPI 5: Field Patchiness...") - weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) - - # Convert weed metrics to patchiness metrics (Gini-like + risk combination) - mean_ci_values <- current_stats$Mean_CI - if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) { - mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf)) - } - patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values) + # Calculate patchiness using both Gini coefficient and Moran's I spatial clustering + patchiness_kpi <- calculate_patchiness_kpi( + ci_pixels_by_field, + field_boundaries_sf = field_boundaries_sf, + mosaic_dir = current_mosaic_dir, + week_file = week_file, + mean_ci_values = current_stats$Mean_CI + ) message("Calculating KPI 6: Gap Filling...") # Build list of per-field files for this week @@ -968,7 +974,6 @@ calculate_all_field_analysis_agronomic_support <- function( tch_forecasted = tch_kpi, growth_decline = growth_decline_kpi, patchiness = patchiness_kpi, - spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i), gap_filling = gap_filling_kpi ) @@ -985,10 +990,6 @@ calculate_all_field_analysis_agronomic_support <- function( distinct(field_idx, .keep_all = TRUE) all_kpis$patchiness <- all_kpis$patchiness %>% distinct(field_idx, .keep_all = TRUE) - if (!is.null(all_kpis$spatial_clustering)) { - all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>% - distinct(field_idx, .keep_all = TRUE) - } all_kpis$gap_filling <- all_kpis$gap_filling %>% distinct(field_idx, .keep_all = TRUE) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 55b81de..4725229 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -1622,25 +1622,79 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to ### What You'll Find in This Report: 1. **Key Performance Indicators (KPIs):** - The report provides a farm-wide analysis based on the Chlorophyll Index (CI) changes. KPIs are calculated field by field and summarized in tables. The current KPIs included are: + The report provides a farm-wide analysis based on weekly Chlorophyll Index (CI) measurements. Five comprehensive KPIs are calculated field by field to assess crop health: + + - **KPI 1: Field Uniformity** — Measures how consistently crop is developing across the field + - **Metric:** Coefficient of Variation (CV) of CI pixel values + - **Calculation:** CV = (Standard Deviation of CI) / (Mean CI) + - **Categories:** + - **Excellent:** CV < 0.08 (very uniform growth, minimal intervention needed) + - **Good:** CV < 0.15 (acceptable uniformity, routine monitoring) + - **Acceptable:** CV < 0.25 (moderate variation, monitor irrigation/fertility) + - **Poor:** CV < 0.4 (high variation, investigate management issues) + - **Very poor:** CV ≥ 0.4 (severe variation, immediate field scout required) + - **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease. - - **Field Uniformity:** Assesses the consistency of crop growth within each field using the coefficient of variation (CV) of CI values. Uniformity levels are classified as: - - **Excellent:** CV < 0.08 (very uniform growth) - - **Good:** CV < 0.15 (acceptable uniformity) - - **Moderate:** CV < 0.25 (some variation present) - - **Poor:** CV ≥ 0.25 (high variation - investigate irrigation, fertility, or pests) + - **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines + - **Calculation:** ((Current Mean CI − Previous Mean CI) / Previous Mean CI) × 100 + - **Categories:** + - **Rapid growth:** > 15% increase (excellent weekly progress) + - **Positive growth:** 5–15% increase (steady improvement) + - **Stable:** −5% to +5% (field maintained, no significant change) + - **Declining:** −5% to −15% (slow decline, warrant closer monitoring) + - **Rapid decline:** < −15% (alert: urgent issue requiring investigation) + - **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention. - - **Area Change:** Summarizes the proportion of field area that is improving, stable, or declining week-over-week, based on CI changes. Helps identify fields requiring immediate attention. + - **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields + - **Applies to:** Fields ≥ 240 days old (mature stage) + - **Method:** Random Forest machine learning model trained on historical harvest data and CI trajectories + - **Inputs:** Days after harvest (DAH) and CI growth rate (CI_per_day) + - **Output:** Predicted tons of cane per hectare (t/ha) + - **Why it matters:** Helps plan harvest timing, mill throughput, and revenue forecasting for mature crops. - - **TCH Forecasted:** Provides yield predictions (tons cane per hectare) for mature fields (typically over 240 days old), using a machine learning model trained on historical CI and yield data. Helps plan harvest timing and logistics. + - **KPI 4: Growth Decline (4-Week Trend)** — Assesses short-term growth trajectory using linear regression + - **Calculation:** Linear slope of CI values over the previous 4 weeks + - **Categories:** + - **Strong growth:** Slope > 0.1 CI units/week (excellent sustained progress) + - **Weak growth:** Slope 0–0.1 (slow improvement, monitor closely) + - **Slight decline:** Slope −0.1–0 (low severity, non-urgent observation) + - **Moderate decline:** Slope −0.3 to −0.1 (medium severity, scouting recommended) + - **Strong decline:** Slope < −0.3 (high severity, immediate field investigation required) + - **Why it matters:** Trend analysis reveals whether crop is accelerating, stalling, or stressed over time. - - **Field Patchiness Score:** Measures field heterogeneity using the Gini coefficient, detecting spatial variation in crop health. High patchiness (Gini > 0.12) may indicate irrigation, pest, or fertility issues requiring targeted scouting: - - **Low:** Gini < 0.08 (excellent uniformity, minimal intervention needed) - - **Medium:** Gini 0.08–0.12 (acceptable variation, routine monitoring) - - **High:** Gini > 0.12 (poor uniformity, recommend field scouting) - - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure. + - **KPI 5: Field Patchiness (Heterogeneity)** — Combines two complementary spatial metrics for comprehensive heterogeneity assessment + - **Metric 1: Gini Coefficient** — Statistical measure of distribution inequality in CI pixel values + - **Formula:** (2 × Σ(i × sorted_CI)) / (n × Σ(sorted_CI)) − (n+1)/n + - **Range:** 0 (perfectly uniform) to 1 (highly unequal) + - **Interpretation:** Low Gini (< 0.15) = good uniformity; High Gini (> 0.3) = significant heterogeneity + - **Metric 2: Moran's I** — Spatial autocorrelation indicating whether high/low areas are clustered or scattered + - **Range:** −1 (dispersed pattern) to +1 (strong clustering) + - **Thresholds:** Moran's I > 0.85 indicates clustered problem areas; < 0.75 suggests scattered issues + - **Risk Determination (Gini + Moran's I Combined):** + - **Minimal Risk:** Gini < 0.15 (excellent uniformity regardless of spatial pattern) + - **Low Risk:** Gini 0.15–0.30, Moran's I < 0.85 (moderate variation, scattered distribution) + - **Medium Risk:** Gini 0.15–0.30, Moran's I > 0.85 OR Gini 0.30–0.50, Moran's I < 0.85 (notable issues) + - **High Risk:** Gini > 0.30, Moran's I > 0.85 (severe heterogeneity with localized clusters—urgent scouting needed) + - **Why it matters:** High patchiness may indicate irrigation inefficiencies, localized pest pressure, fertility variation, or disease spread. Combined Gini + Moran's I reveals not just *how much* variation exists, but also *how it's distributed* spatially. CI reflects chlorophyll = nitrogen status + plant health + vigor. High CV/Patchiness often signals N gaps, water stress, pests (borers), or ratoon decline. + + - **Uniformity vs. Patchiness — What's the Difference?** + Both KPIs measure variation, but they answer different questions and drive different management actions: + - **Uniformity (CV-based)** answers: "*Is* growth even across the field?" — it detects whether a problem exists but not where. + - **Patchiness (Gini + Moran's I)** answers: "*Where* are problems and how are they arranged?" — it reveals the spatial pattern. + + **Practical example:** Two fields both score "Poor" on Uniformity (CV = 0.25). However: + - Field A has scattered low-CI patches (Moran's I = 0.6) → suggests *random* stress (disease pressure, uneven irrigation) + - Field B has clustered low-CI in one corner (Moran's I = 0.95) → suggests *localized* problem (drainage, compaction, pest hotspot) + + Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.** - - **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. + - **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment + - **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution) + - **Levels:** + - **Minimal:** < 10% of field (good establishment, limited replanting needed) + - **Moderate:** 10–25% of field (monitor gap expansion, coordinate with agronomy) + - **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation) + - **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention. 2. **Overview Map: Growth on Farm:** Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. @@ -1659,7 +1713,6 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to --- - ### Historical Benchmark Lines The CI time series graphs include historical benchmark lines for the 10th, 50th, and 90th percentiles of CI values across all fields and seasons.