diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index a71ea28..df5232a 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -64,6 +64,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ cv_value = numeric(), morans_i = numeric(), uniformity_score = numeric(), + uniformity_category = character(), interpretation = character(), stringsAsFactors = FALSE ) @@ -80,6 +81,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ cv_value = NA_real_, morans_i = NA_real_, uniformity_score = NA_real_, + uniformity_category = "No data", interpretation = "No data", stringsAsFactors = FALSE )) @@ -142,16 +144,22 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ # Interpretation if (is.na(cv_val)) { interpretation <- "No data" + uniformity_category <- "No data" } else if (cv_val < 0.08) { interpretation <- "Excellent uniformity" + uniformity_category <- "Excellent" } else if (cv_val < 0.15) { interpretation <- "Good uniformity" + uniformity_category <- "Good" } else if (cv_val < 0.25) { interpretation <- "Acceptable uniformity" + uniformity_category <- "Acceptable" } else if (cv_val < 0.4) { interpretation <- "Poor uniformity" + uniformity_category <- "Poor" } else { interpretation <- "Very poor uniformity" + uniformity_category <- "Very poor" } result <- rbind(result, data.frame( @@ -159,6 +167,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ cv_value = cv_val, morans_i = morans_i, uniformity_score = round(uniformity_score, 3), + uniformity_category = uniformity_category, interpretation = interpretation, stringsAsFactors = FALSE )) @@ -446,6 +455,72 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) { return(result) } +#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk) +#' +#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi() +#' @param mean_ci_values Optional vector of mean CI values per field +#' +#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation) +calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) { + result <- weed_kpi_result %>% + mutate( + # Calculate Gini coefficient from CI pixels (proper calculation) + gini_coefficient = NA_real_, + mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_, + # Map weed_pressure_risk to patchiness_risk + patchiness_risk = weed_pressure_risk, + # Create interpretation based on gini and risk + patchiness_interpretation = case_when( + is.na(gini_coefficient) ~ "No data", + gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth", + gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness", + gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness", + TRUE ~ "Mixed heterogeneity" + ) + ) %>% + select(field_idx, gini_coefficient, mean_ci, + patchiness_interpretation, patchiness_risk) + + # Calculate actual Gini coefficients if CI pixels provided + if (!is.null(ci_pixels_by_field)) { + for (i in seq_len(nrow(result))) { + ci_pixels <- ci_pixels_by_field[[i]] + + if (!is.null(ci_pixels) && length(ci_pixels) > 0) { + ci_pixels <- ci_pixels[!is.na(ci_pixels)] + + if (length(ci_pixels) > 1) { + # Calculate Gini coefficient + # Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n + # where x_i are sorted values + ci_sorted <- sort(ci_pixels) + n <- length(ci_sorted) + + numerator <- 2 * sum(seq_len(n) * ci_sorted) + denominator <- n * sum(ci_sorted) + + gini <- (numerator / denominator) - (n + 1) / n + # Clamp to 0-1 range (should be within this by formula but guard against numerical errors) + gini <- max(0, min(1, gini)) + + result$gini_coefficient[i] <- gini + + # Update interpretation based on calculated Gini + result$patchiness_interpretation[i] <- dplyr::case_when( + gini < 0.2 ~ "Uniform growth", + gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness", + gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness", + TRUE ~ "Mixed heterogeneity" + ) + } + } + } + } + + return(result) +} + + # ============================================================================ # KPI ORCHESTRATOR AND REPORTING @@ -459,7 +534,14 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) { create_summary_tables <- function(all_kpis) { kpi_summary <- list( uniformity = all_kpis$uniformity %>% - select(field_idx, cv_value, morans_i, uniformity_score, interpretation), + select(field_idx, cv_value, uniformity_category, interpretation), + + spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { + all_kpis$spatial_clustering %>% + select(field_idx, morans_i) + } else { + NULL + }, area_change = all_kpis$area_change %>% select(field_idx, mean_ci_pct_change, interpretation), @@ -470,8 +552,8 @@ create_summary_tables <- function(all_kpis) { growth_decline = all_kpis$growth_decline %>% select(field_idx, four_week_trend, trend_interpretation, decline_severity), - weed_pressure = all_kpis$weed_presence %>% - select(field_idx, fragmentation_index, weed_pressure_risk), + patchiness = all_kpis$patchiness %>% + select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk), gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { all_kpis$gap_filling %>% @@ -497,47 +579,83 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee result <- field_boundaries_sf %>% sf::st_drop_geometry() %>% mutate( - field_idx = row_number(), # ADD THIS: match the integer index used in KPI functions + field_idx = row_number(), Field_id = field, Field_name = field, Week = current_week, Year = current_year ) %>% - select(field_idx, Field_id, Field_name, Week, Year) # Include field_idx first + select(field_idx, Field_id, Field_name, Week, Year) - # Join all KPI results (now field_idx matches on both sides) + # ============================================ + # GROUP 1: FIELD UNIFORMITY (KPI 1) + # ============================================ result <- result %>% left_join( all_kpis$uniformity %>% - select(field_idx, CV = cv_value, Uniformity_Score = uniformity_score, - Morans_I = morans_i, Uniformity_Interpretation = interpretation), + select(field_idx, CV = cv_value, + Uniformity_Category = uniformity_category), by = "field_idx" - ) %>% + ) + + # ============================================ + # GROUP 2: GROWTH & TREND ANALYSIS (KPI 2 + KPI 4) + # ============================================ + # KPI 2: Area Change + result <- result %>% left_join( all_kpis$area_change %>% select(field_idx, Weekly_CI_Change = mean_ci_pct_change, Area_Change_Interpretation = interpretation), by = "field_idx" - ) %>% - left_join( - all_kpis$tch_forecasted %>% - select(field_idx, TCH_Forecasted = tch_forecasted), - by = "field_idx" - ) %>% + ) + + # KPI 4: Growth Decline + result <- result %>% left_join( all_kpis$growth_decline %>% select(field_idx, Four_Week_Trend = four_week_trend, Trend_Interpretation = trend_interpretation, Decline_Severity = decline_severity), by = "field_idx" - ) %>% + ) + + # ============================================ + # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering) + # ============================================ + # KPI 5: Field Patchiness + result <- result %>% left_join( - all_kpis$weed_presence %>% - select(field_idx, Fragmentation_Index = fragmentation_index, - Weed_Pressure_Risk = weed_pressure_risk), + all_kpis$patchiness %>% + select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci, + Patchiness_Interpretation = patchiness_interpretation, + Patchiness_Risk = patchiness_risk), by = "field_idx" ) + # Moran's I (spatial clustering - used in patchiness calculation) + if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { + result <- result %>% + left_join( + all_kpis$spatial_clustering %>% + select(field_idx, Morans_I = morans_i), + by = "field_idx" + ) + } + + # ============================================ + # GROUP 4: YIELD FORECAST (KPI 3) + # ============================================ + result <- result %>% + left_join( + all_kpis$tch_forecasted %>% + select(field_idx, TCH_Forecasted = tch_forecasted), + by = "field_idx" + ) + + # ============================================ + # GROUP 5: DATA QUALITY / GAP FILLING (KPI 6) + # ============================================ # Add gap filling if available if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { result <- result %>% @@ -548,7 +666,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee ) } - # Remove field_idx from final output (it was only needed for joining) + # Remove field_idx from final output result <- result %>% select(-field_idx) @@ -795,9 +913,16 @@ calculate_all_field_analysis_agronomic_support <- function( ci_pixels_by_field ) - message("Calculating KPI 5: Weed Presence...") + message("Calculating KPI 5: Field Patchiness...") weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) + # Convert weed metrics to patchiness metrics (Gini-like + risk combination) + mean_ci_values <- current_stats$Mean_CI + if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) { + mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf)) + } + patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values) + message("Calculating KPI 6: Gap Filling...") # Build list of per-field files for this week per_field_files <- c() @@ -842,10 +967,31 @@ calculate_all_field_analysis_agronomic_support <- function( area_change = area_change_kpi, tch_forecasted = tch_kpi, growth_decline = growth_decline_kpi, - weed_presence = weed_kpi, + patchiness = patchiness_kpi, + spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i), gap_filling = gap_filling_kpi ) + # Deduplicate KPI dataframes to ensure one row per field_idx + # (sometimes joins or calculations can create duplicate rows) + message("Deduplicating KPI results (keeping first occurrence per field)...") + all_kpis$uniformity <- all_kpis$uniformity %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$area_change <- all_kpis$area_change %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$tch_forecasted <- all_kpis$tch_forecasted %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$growth_decline <- all_kpis$growth_decline %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$patchiness <- all_kpis$patchiness %>% + distinct(field_idx, .keep_all = TRUE) + if (!is.null(all_kpis$spatial_clustering)) { + all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>% + distinct(field_idx, .keep_all = TRUE) + } + all_kpis$gap_filling <- all_kpis$gap_filling %>% + distinct(field_idx, .keep_all = TRUE) + # Built single-sheet field detail table with all KPIs message("\nBuilding comprehensive field detail table...") field_detail_df <- create_field_detail_table( diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 9945571..4d873b2 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -218,10 +218,10 @@ if (dir.exists(kpi_data_dir)) { if (is.null(summary_tables)) { summary_tables <- list() - # 1. Uniformity summary - GROUP BY Uniformity_Interpretation and COUNT - if ("Uniformity_Interpretation" %in% names(field_details_table)) { + # 1. Uniformity summary - GROUP BY Uniformity_Category and COUNT + if ("Uniformity_Category" %in% names(field_details_table)) { summary_tables$uniformity <- field_details_table %>% - group_by(interpretation = Uniformity_Interpretation) %>% + group_by(interpretation = Uniformity_Category) %>% summarise(field_count = n(), .groups = 'drop') safe_log(" ✓ Created uniformity summary") } @@ -242,28 +242,62 @@ if (dir.exists(kpi_data_dir)) { safe_log(" ✓ Created growth_decline summary") } - # 4. Weed pressure summary - GROUP BY Weed_Pressure_Risk and COUNT - if ("Weed_Pressure_Risk" %in% names(field_details_table)) { - summary_tables$weed_pressure <- field_details_table %>% - group_by(weed_pressure_risk = Weed_Pressure_Risk) %>% - summarise(field_count = n(), .groups = 'drop') - safe_log(" ✓ Created weed_pressure summary") - } - - # 5. TCH forecast summary - bin into categories and COUNT - if ("TCH_Forecasted" %in% names(field_details_table)) { - summary_tables$tch_forecast <- field_details_table %>% - filter(!is.na(TCH_Forecasted)) %>% + # 4. Patchiness summary - GROUP BY Patchiness_Risk + Gini ranges + if ("Patchiness_Risk" %in% names(field_details_table) && "Gini_Coefficient" %in% names(field_details_table)) { + summary_tables$patchiness <- field_details_table %>% mutate( - tch_category = case_when( - TCH_Forecasted >= quantile(TCH_Forecasted, 0.75, na.rm = TRUE) ~ "Top 25%", - TCH_Forecasted >= quantile(TCH_Forecasted, 0.25, na.rm = TRUE) ~ "Average", - TRUE ~ "Lowest 25%" + gini_category = case_when( + Gini_Coefficient < 0.2 ~ "Uniform (Gini<0.2)", + Gini_Coefficient < 0.4 ~ "Moderate (Gini 0.2-0.4)", + TRUE ~ "High (Gini≥0.4)" ) ) %>% - group_by(tch_category) %>% + group_by(gini_category, patchiness_risk = Patchiness_Risk) %>% summarise(field_count = n(), .groups = 'drop') - safe_log(" ✓ Created tch_forecast summary") + safe_log(" ✓ Created patchiness summary") + } + + # 5. TCH forecast summary - show actual value ranges (quartiles) instead of counts + if ("TCH_Forecasted" %in% names(field_details_table)) { + tch_values <- field_details_table %>% + filter(!is.na(TCH_Forecasted)) %>% + pull(TCH_Forecasted) + + if (length(tch_values) > 0) { + # Defensive check: if all TCH values are identical, handle as special case + if (length(unique(tch_values)) == 1) { + # All values are identical + identical_value <- tch_values[1] + summary_tables$tch_forecast <- tibble::tibble( + tch_category = "All equal", + range = paste0(round(identical_value, 1), " t/ha"), + field_count = length(tch_values) + ) + safe_log(" ✓ Created tch_forecast summary (all TCH values identical)") + } else { + # Multiple distinct values - use three-quartile approach + q25 <- quantile(tch_values, 0.25, na.rm = TRUE) + q50 <- quantile(tch_values, 0.50, na.rm = TRUE) + q75 <- quantile(tch_values, 0.75, na.rm = TRUE) + min_val <- min(tch_values, na.rm = TRUE) + max_val <- max(tch_values, na.rm = TRUE) + + summary_tables$tch_forecast <- tibble::tibble( + tch_category = c("Top 25%", "Middle 50%", "Bottom 25%"), + range = c( + paste0(round(q75, 1), "-", round(max_val, 1), " t/ha"), + paste0(round(q25, 1), "-", round(q75, 1), " t/ha"), + paste0(round(min_val, 1), "-", round(q25, 1), " t/ha") + ), + field_count = c( + nrow(field_details_table %>% filter(TCH_Forecasted >= q75, !is.na(TCH_Forecasted))), + nrow(field_details_table %>% filter(TCH_Forecasted >= q25 & TCH_Forecasted < q75, !is.na(TCH_Forecasted))), + nrow(field_details_table %>% filter(TCH_Forecasted < q25, !is.na(TCH_Forecasted))) + ) + ) + safe_log(" ✓ Created tch_forecast summary with value ranges") + } + } } # 6. Gap filling summary - GROUP BY Gap_Level and COUNT @@ -544,22 +578,29 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table kpi_display_order <- list( uniformity = list(display = "Field Uniformity", level_col = "interpretation", count_col = "field_count"), area_change = list(display = "Area Change", level_col = "interpretation", count_col = "field_count"), - tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", count_col = "field_count"), - growth_decline = list(display = "Growth Decline", level_col = "trend_interpretation", count_col = "field_count"), - weed_pressure = list(display = "Weed Presence", level_col = "weed_pressure_risk", count_col = "field_count"), + growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"), + patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"), + tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"), gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count") ) - standardize_kpi <- function(df, level_col, count_col) { + standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) { if (is.null(level_col) || !(level_col %in% names(df)) || is.null(count_col) || !(count_col %in% names(df))) { return(NULL) } total <- sum(df[[count_col]], na.rm = TRUE) total <- ifelse(total == 0, NA_real_, total) + # If detail_col is specified, use it as the Level instead + if (!is.null(detail_col) && detail_col %in% names(df)) { + display_level <- df[[detail_col]] + } else { + display_level <- df[[level_col]] + } + df %>% dplyr::transmute( - Level = as.character(.data[[level_col]]), + Level = as.character(display_level), Count = as.integer(round(as.numeric(.data[[count_col]]))), Percent = if (is.na(total)) { NA_real_ @@ -578,17 +619,9 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table kpi_df <- summary_tables[[kpi_key]] if (is.null(kpi_df) || !is.data.frame(kpi_df) || nrow(kpi_df) == 0) next - kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col) - if (is.null(kpi_rows) && kpi_key %in% c("tch_forecast", "gap_filling")) { - numeric_cols <- names(kpi_df)[vapply(kpi_df, is.numeric, logical(1))] - if (length(numeric_cols) > 0) { - kpi_rows <- tibble::tibble( - Level = numeric_cols, - Count = round(as.numeric(kpi_df[1, numeric_cols]), 2), - Percent = NA_real_ - ) - } - } + # Pass detail_col if it exists in config + detail_col <- if (!is.null(config$detail_col)) config$detail_col else NULL + kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col, detail_col) if (!is.null(kpi_rows)) { kpi_rows$KPI <- config$display