Add field patchiness KPI calculation and update summaries; replace weed pressure metrics with patchiness metrics in reports

This commit is contained in:
Timon 2026-02-17 21:57:34 +01:00
parent 34159b3003
commit 253ff51ca2
2 changed files with 238 additions and 59 deletions

View file

@ -64,6 +64,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
cv_value = numeric(),
morans_i = numeric(),
uniformity_score = numeric(),
uniformity_category = character(),
interpretation = character(),
stringsAsFactors = FALSE
)
@ -80,6 +81,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
cv_value = NA_real_,
morans_i = NA_real_,
uniformity_score = NA_real_,
uniformity_category = "No data",
interpretation = "No data",
stringsAsFactors = FALSE
))
@ -142,16 +144,22 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
# Interpretation
if (is.na(cv_val)) {
interpretation <- "No data"
uniformity_category <- "No data"
} else if (cv_val < 0.08) {
interpretation <- "Excellent uniformity"
uniformity_category <- "Excellent"
} else if (cv_val < 0.15) {
interpretation <- "Good uniformity"
uniformity_category <- "Good"
} else if (cv_val < 0.25) {
interpretation <- "Acceptable uniformity"
uniformity_category <- "Acceptable"
} else if (cv_val < 0.4) {
interpretation <- "Poor uniformity"
uniformity_category <- "Poor"
} else {
interpretation <- "Very poor uniformity"
uniformity_category <- "Very poor"
}
result <- rbind(result, data.frame(
@ -159,6 +167,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_
cv_value = cv_val,
morans_i = morans_i,
uniformity_score = round(uniformity_score, 3),
uniformity_category = uniformity_category,
interpretation = interpretation,
stringsAsFactors = FALSE
))
@ -446,6 +455,72 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
return(result)
}
#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk)
#'
#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi()
#' @param mean_ci_values Optional vector of mean CI values per field
#'
#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation)
calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) {
result <- weed_kpi_result %>%
mutate(
# Calculate Gini coefficient from CI pixels (proper calculation)
gini_coefficient = NA_real_,
mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_,
# Map weed_pressure_risk to patchiness_risk
patchiness_risk = weed_pressure_risk,
# Create interpretation based on gini and risk
patchiness_interpretation = case_when(
is.na(gini_coefficient) ~ "No data",
gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth",
gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness",
gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness",
TRUE ~ "Mixed heterogeneity"
)
) %>%
select(field_idx, gini_coefficient, mean_ci,
patchiness_interpretation, patchiness_risk)
# Calculate actual Gini coefficients if CI pixels provided
if (!is.null(ci_pixels_by_field)) {
for (i in seq_len(nrow(result))) {
ci_pixels <- ci_pixels_by_field[[i]]
if (!is.null(ci_pixels) && length(ci_pixels) > 0) {
ci_pixels <- ci_pixels[!is.na(ci_pixels)]
if (length(ci_pixels) > 1) {
# Calculate Gini coefficient
# Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n
# where x_i are sorted values
ci_sorted <- sort(ci_pixels)
n <- length(ci_sorted)
numerator <- 2 * sum(seq_len(n) * ci_sorted)
denominator <- n * sum(ci_sorted)
gini <- (numerator / denominator) - (n + 1) / n
# Clamp to 0-1 range (should be within this by formula but guard against numerical errors)
gini <- max(0, min(1, gini))
result$gini_coefficient[i] <- gini
# Update interpretation based on calculated Gini
result$patchiness_interpretation[i] <- dplyr::case_when(
gini < 0.2 ~ "Uniform growth",
gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness",
gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness",
TRUE ~ "Mixed heterogeneity"
)
}
}
}
}
return(result)
}
# ============================================================================
# KPI ORCHESTRATOR AND REPORTING
@ -459,7 +534,14 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
create_summary_tables <- function(all_kpis) {
kpi_summary <- list(
uniformity = all_kpis$uniformity %>%
select(field_idx, cv_value, morans_i, uniformity_score, interpretation),
select(field_idx, cv_value, uniformity_category, interpretation),
spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
all_kpis$spatial_clustering %>%
select(field_idx, morans_i)
} else {
NULL
},
area_change = all_kpis$area_change %>%
select(field_idx, mean_ci_pct_change, interpretation),
@ -470,8 +552,8 @@ create_summary_tables <- function(all_kpis) {
growth_decline = all_kpis$growth_decline %>%
select(field_idx, four_week_trend, trend_interpretation, decline_severity),
weed_pressure = all_kpis$weed_presence %>%
select(field_idx, fragmentation_index, weed_pressure_risk),
patchiness = all_kpis$patchiness %>%
select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk),
gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
all_kpis$gap_filling %>%
@ -497,47 +579,83 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
result <- field_boundaries_sf %>%
sf::st_drop_geometry() %>%
mutate(
field_idx = row_number(), # ADD THIS: match the integer index used in KPI functions
field_idx = row_number(),
Field_id = field,
Field_name = field,
Week = current_week,
Year = current_year
) %>%
select(field_idx, Field_id, Field_name, Week, Year) # Include field_idx first
select(field_idx, Field_id, Field_name, Week, Year)
# Join all KPI results (now field_idx matches on both sides)
# ============================================
# GROUP 1: FIELD UNIFORMITY (KPI 1)
# ============================================
result <- result %>%
left_join(
all_kpis$uniformity %>%
select(field_idx, CV = cv_value, Uniformity_Score = uniformity_score,
Morans_I = morans_i, Uniformity_Interpretation = interpretation),
select(field_idx, CV = cv_value,
Uniformity_Category = uniformity_category),
by = "field_idx"
) %>%
)
# ============================================
# GROUP 2: GROWTH & TREND ANALYSIS (KPI 2 + KPI 4)
# ============================================
# KPI 2: Area Change
result <- result %>%
left_join(
all_kpis$area_change %>%
select(field_idx, Weekly_CI_Change = mean_ci_pct_change,
Area_Change_Interpretation = interpretation),
by = "field_idx"
) %>%
left_join(
all_kpis$tch_forecasted %>%
select(field_idx, TCH_Forecasted = tch_forecasted),
by = "field_idx"
) %>%
)
# KPI 4: Growth Decline
result <- result %>%
left_join(
all_kpis$growth_decline %>%
select(field_idx, Four_Week_Trend = four_week_trend,
Trend_Interpretation = trend_interpretation,
Decline_Severity = decline_severity),
by = "field_idx"
) %>%
)
# ============================================
# GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering)
# ============================================
# KPI 5: Field Patchiness
result <- result %>%
left_join(
all_kpis$weed_presence %>%
select(field_idx, Fragmentation_Index = fragmentation_index,
Weed_Pressure_Risk = weed_pressure_risk),
all_kpis$patchiness %>%
select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci,
Patchiness_Interpretation = patchiness_interpretation,
Patchiness_Risk = patchiness_risk),
by = "field_idx"
)
# Moran's I (spatial clustering - used in patchiness calculation)
if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) {
result <- result %>%
left_join(
all_kpis$spatial_clustering %>%
select(field_idx, Morans_I = morans_i),
by = "field_idx"
)
}
# ============================================
# GROUP 4: YIELD FORECAST (KPI 3)
# ============================================
result <- result %>%
left_join(
all_kpis$tch_forecasted %>%
select(field_idx, TCH_Forecasted = tch_forecasted),
by = "field_idx"
)
# ============================================
# GROUP 5: DATA QUALITY / GAP FILLING (KPI 6)
# ============================================
# Add gap filling if available
if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
result <- result %>%
@ -548,7 +666,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
)
}
# Remove field_idx from final output (it was only needed for joining)
# Remove field_idx from final output
result <- result %>%
select(-field_idx)
@ -795,9 +913,16 @@ calculate_all_field_analysis_agronomic_support <- function(
ci_pixels_by_field
)
message("Calculating KPI 5: Weed Presence...")
message("Calculating KPI 5: Field Patchiness...")
weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)
# Convert weed metrics to patchiness metrics (Gini-like + risk combination)
mean_ci_values <- current_stats$Mean_CI
if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) {
mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf))
}
patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values)
message("Calculating KPI 6: Gap Filling...")
# Build list of per-field files for this week
per_field_files <- c()
@ -842,10 +967,31 @@ calculate_all_field_analysis_agronomic_support <- function(
area_change = area_change_kpi,
tch_forecasted = tch_kpi,
growth_decline = growth_decline_kpi,
weed_presence = weed_kpi,
patchiness = patchiness_kpi,
spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i),
gap_filling = gap_filling_kpi
)
# Deduplicate KPI dataframes to ensure one row per field_idx
# (sometimes joins or calculations can create duplicate rows)
message("Deduplicating KPI results (keeping first occurrence per field)...")
all_kpis$uniformity <- all_kpis$uniformity %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$area_change <- all_kpis$area_change %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$tch_forecasted <- all_kpis$tch_forecasted %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$growth_decline <- all_kpis$growth_decline %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$patchiness <- all_kpis$patchiness %>%
distinct(field_idx, .keep_all = TRUE)
if (!is.null(all_kpis$spatial_clustering)) {
all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>%
distinct(field_idx, .keep_all = TRUE)
}
all_kpis$gap_filling <- all_kpis$gap_filling %>%
distinct(field_idx, .keep_all = TRUE)
# Built single-sheet field detail table with all KPIs
message("\nBuilding comprehensive field detail table...")
field_detail_df <- create_field_detail_table(

View file

@ -218,10 +218,10 @@ if (dir.exists(kpi_data_dir)) {
if (is.null(summary_tables)) {
summary_tables <- list()
# 1. Uniformity summary - GROUP BY Uniformity_Interpretation and COUNT
if ("Uniformity_Interpretation" %in% names(field_details_table)) {
# 1. Uniformity summary - GROUP BY Uniformity_Category and COUNT
if ("Uniformity_Category" %in% names(field_details_table)) {
summary_tables$uniformity <- field_details_table %>%
group_by(interpretation = Uniformity_Interpretation) %>%
group_by(interpretation = Uniformity_Category) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created uniformity summary")
}
@ -242,28 +242,62 @@ if (dir.exists(kpi_data_dir)) {
safe_log(" ✓ Created growth_decline summary")
}
# 4. Weed pressure summary - GROUP BY Weed_Pressure_Risk and COUNT
if ("Weed_Pressure_Risk" %in% names(field_details_table)) {
summary_tables$weed_pressure <- field_details_table %>%
group_by(weed_pressure_risk = Weed_Pressure_Risk) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created weed_pressure summary")
}
# 5. TCH forecast summary - bin into categories and COUNT
if ("TCH_Forecasted" %in% names(field_details_table)) {
summary_tables$tch_forecast <- field_details_table %>%
filter(!is.na(TCH_Forecasted)) %>%
# 4. Patchiness summary - GROUP BY Patchiness_Risk + Gini ranges
if ("Patchiness_Risk" %in% names(field_details_table) && "Gini_Coefficient" %in% names(field_details_table)) {
summary_tables$patchiness <- field_details_table %>%
mutate(
tch_category = case_when(
TCH_Forecasted >= quantile(TCH_Forecasted, 0.75, na.rm = TRUE) ~ "Top 25%",
TCH_Forecasted >= quantile(TCH_Forecasted, 0.25, na.rm = TRUE) ~ "Average",
TRUE ~ "Lowest 25%"
gini_category = case_when(
Gini_Coefficient < 0.2 ~ "Uniform (Gini<0.2)",
Gini_Coefficient < 0.4 ~ "Moderate (Gini 0.2-0.4)",
TRUE ~ "High (Gini≥0.4)"
)
) %>%
group_by(tch_category) %>%
group_by(gini_category, patchiness_risk = Patchiness_Risk) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created tch_forecast summary")
safe_log(" ✓ Created patchiness summary")
}
# 5. TCH forecast summary - show actual value ranges (quartiles) instead of counts
if ("TCH_Forecasted" %in% names(field_details_table)) {
tch_values <- field_details_table %>%
filter(!is.na(TCH_Forecasted)) %>%
pull(TCH_Forecasted)
if (length(tch_values) > 0) {
# Defensive check: if all TCH values are identical, handle as special case
if (length(unique(tch_values)) == 1) {
# All values are identical
identical_value <- tch_values[1]
summary_tables$tch_forecast <- tibble::tibble(
tch_category = "All equal",
range = paste0(round(identical_value, 1), " t/ha"),
field_count = length(tch_values)
)
safe_log(" ✓ Created tch_forecast summary (all TCH values identical)")
} else {
# Multiple distinct values - use three-quartile approach
q25 <- quantile(tch_values, 0.25, na.rm = TRUE)
q50 <- quantile(tch_values, 0.50, na.rm = TRUE)
q75 <- quantile(tch_values, 0.75, na.rm = TRUE)
min_val <- min(tch_values, na.rm = TRUE)
max_val <- max(tch_values, na.rm = TRUE)
summary_tables$tch_forecast <- tibble::tibble(
tch_category = c("Top 25%", "Middle 50%", "Bottom 25%"),
range = c(
paste0(round(q75, 1), "-", round(max_val, 1), " t/ha"),
paste0(round(q25, 1), "-", round(q75, 1), " t/ha"),
paste0(round(min_val, 1), "-", round(q25, 1), " t/ha")
),
field_count = c(
nrow(field_details_table %>% filter(TCH_Forecasted >= q75, !is.na(TCH_Forecasted))),
nrow(field_details_table %>% filter(TCH_Forecasted >= q25 & TCH_Forecasted < q75, !is.na(TCH_Forecasted))),
nrow(field_details_table %>% filter(TCH_Forecasted < q25, !is.na(TCH_Forecasted)))
)
)
safe_log(" ✓ Created tch_forecast summary with value ranges")
}
}
}
# 6. Gap filling summary - GROUP BY Gap_Level and COUNT
@ -544,22 +578,29 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
kpi_display_order <- list(
uniformity = list(display = "Field Uniformity", level_col = "interpretation", count_col = "field_count"),
area_change = list(display = "Area Change", level_col = "interpretation", count_col = "field_count"),
tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", count_col = "field_count"),
growth_decline = list(display = "Growth Decline", level_col = "trend_interpretation", count_col = "field_count"),
weed_pressure = list(display = "Weed Presence", level_col = "weed_pressure_risk", count_col = "field_count"),
growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"),
patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"),
tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"),
gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count")
)
standardize_kpi <- function(df, level_col, count_col) {
standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) {
if (is.null(level_col) || !(level_col %in% names(df)) || is.null(count_col) || !(count_col %in% names(df))) {
return(NULL)
}
total <- sum(df[[count_col]], na.rm = TRUE)
total <- ifelse(total == 0, NA_real_, total)
# If detail_col is specified, use it as the Level instead
if (!is.null(detail_col) && detail_col %in% names(df)) {
display_level <- df[[detail_col]]
} else {
display_level <- df[[level_col]]
}
df %>%
dplyr::transmute(
Level = as.character(.data[[level_col]]),
Level = as.character(display_level),
Count = as.integer(round(as.numeric(.data[[count_col]]))),
Percent = if (is.na(total)) {
NA_real_
@ -578,17 +619,9 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
kpi_df <- summary_tables[[kpi_key]]
if (is.null(kpi_df) || !is.data.frame(kpi_df) || nrow(kpi_df) == 0) next
kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col)
if (is.null(kpi_rows) && kpi_key %in% c("tch_forecast", "gap_filling")) {
numeric_cols <- names(kpi_df)[vapply(kpi_df, is.numeric, logical(1))]
if (length(numeric_cols) > 0) {
kpi_rows <- tibble::tibble(
Level = numeric_cols,
Count = round(as.numeric(kpi_df[1, numeric_cols]), 2),
Percent = NA_real_
)
}
}
# Pass detail_col if it exists in config
detail_col <- if (!is.null(config$detail_col)) config$detail_col else NULL
kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col, detail_col)
if (!is.null(kpi_rows)) {
kpi_rows$KPI <- config$display