Refactor gap filling score calculations to use 1σ method; ensure all fields are included in output
This commit is contained in:
parent
e0bfbccf0e
commit
83951efca2
|
|
@ -80,7 +80,8 @@ calculate_field_acreages <- function(field_boundaries_sf, unit = AREA_UNIT_PREFE
|
|||
|
||||
result_df <- data.frame(
|
||||
field = field_names,
|
||||
warning(paste("Could not calculate areas from geometries -", e$message)) stringsAsFactors = FALSE
|
||||
area = areas,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
colnames(result_df) <- c("field", col_name)
|
||||
|
||||
|
|
@ -213,14 +214,19 @@ calculate_status_alert <- function(imminent_prob, age_week, mean_ci,
|
|||
|
||||
|
||||
|
||||
#' Build complete per-field KPI dataframe with all 22 columns
|
||||
#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats
|
||||
#' Build complete per-field KPI dataframe - ALWAYS ALL FIELDS from GeoJSON
|
||||
#'
|
||||
#' CRITICAL: Always outputs all fields from field_boundaries_sf, regardless of data availability.
|
||||
#' Fields without satellite data this week have all KPI columns set to NA.
|
||||
#' This is dynamic - output row count equals nrow(field_boundaries_sf), not hardcoded.
|
||||
#'
|
||||
#' @param current_stats data.frame with current week statistics (may have fewer rows if data is incomplete)
|
||||
#' @param planting_dates data.frame with field_id and planting_date columns
|
||||
#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL)
|
||||
#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL)
|
||||
#' @param field_boundaries_sf sf object with field geometries
|
||||
#' @param gap_scores_df data.frame with Field_id and gap_score columns (guaranteed to have all fields)
|
||||
#' @param field_boundaries_sf sf object with field geometries (authoritative source of all field IDs)
|
||||
#' @param end_date Date object for current report date
|
||||
#' @return data.frame with all 22 KPI columns
|
||||
#' @return data.frame with 22 columns and nrow(field_boundaries_sf) rows (all fields, NAs for missing data)
|
||||
calculate_all_field_kpis <- function(current_stats,
|
||||
planting_dates,
|
||||
imminent_prob_data,
|
||||
|
|
@ -230,6 +236,14 @@ calculate_all_field_kpis <- function(current_stats,
|
|||
|
||||
message("\nBuilding final field analysis output...")
|
||||
|
||||
# CRITICAL: Start with ALL fields from field_boundaries_sf (dynamic, based on GeoJSON)
|
||||
# This ensures output always includes all fields, even if they lack satellite data
|
||||
all_fields <- field_boundaries_sf %>%
|
||||
sf::st_drop_geometry() %>%
|
||||
mutate(Field_id = field) %>%
|
||||
select(Field_id) %>%
|
||||
distinct()
|
||||
|
||||
# Pre-calculate areas using unified function
|
||||
acreage_lookup <- calculate_field_acreages(field_boundaries_sf, unit = AREA_UNIT_PREFERENCE)
|
||||
|
||||
|
|
@ -237,7 +251,10 @@ calculate_all_field_kpis <- function(current_stats,
|
|||
unit_label <- get_area_unit_label(AREA_UNIT_PREFERENCE)
|
||||
area_col_name <- paste0("Area_", unit_label)
|
||||
|
||||
field_analysis_df <- current_stats %>%
|
||||
# Left-join current_stats to all_fields to preserve all field IDs
|
||||
# Fields without data in current_stats will have NA values
|
||||
field_analysis_df <- all_fields %>%
|
||||
left_join(current_stats, by = "Field_id") %>%
|
||||
mutate(
|
||||
# Column 2: Farm_Section (user fills manually)
|
||||
Farm_Section = NA_character_,
|
||||
|
|
@ -258,7 +275,7 @@ calculate_all_field_kpis <- function(current_stats,
|
|||
|
||||
# Column 9: Age_week (calculated)
|
||||
Age_week = {
|
||||
sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||
sapply(seq_len(n()), function(idx) {
|
||||
calculate_age_week(Last_harvest_or_planting_date[idx], end_date)
|
||||
})
|
||||
},
|
||||
|
|
@ -274,13 +291,13 @@ calculate_all_field_kpis <- function(current_stats,
|
|||
if (!is.null(imminent_prob_data)) {
|
||||
as.numeric(imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)])
|
||||
} else {
|
||||
rep(NA_real_, nrow(current_stats))
|
||||
rep(NA_real_, n())
|
||||
}
|
||||
},
|
||||
|
||||
# Column 14: Status_Alert (multi-priority logic for harvest/milling workflow)
|
||||
Status_Alert = {
|
||||
sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||
sapply(seq_len(n()), function(idx) {
|
||||
calculate_status_alert(
|
||||
imminent_prob = Imminent_prob[idx],
|
||||
age_week = Age_week[idx],
|
||||
|
|
@ -293,17 +310,17 @@ calculate_all_field_kpis <- function(current_stats,
|
|||
},
|
||||
|
||||
# Column 19b: CV_Trend_Long_Term_Category (categorical slope)
|
||||
CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term),
|
||||
CV_Trend_Long_Term_Category = sapply(CV_Trend_Long_Term, categorize_cv_trend_long_term),
|
||||
|
||||
# Column 21: Cloud_pct_clear (binned into intervals)
|
||||
Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage),
|
||||
|
||||
# Column 22: Gap_score (2σ method)
|
||||
# Column 22: Gap_score (1σ method); gap_scores_df now has all fields
|
||||
Gap_score = {
|
||||
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
||||
gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
|
||||
gap_scores_df$gap_score[match(Field_id, gap_scores_df$Field_id)]
|
||||
} else {
|
||||
rep(NA_real_, nrow(current_stats))
|
||||
rep(NA_real_, n())
|
||||
}
|
||||
}
|
||||
) %>%
|
||||
|
|
@ -316,7 +333,7 @@ calculate_all_field_kpis <- function(current_stats,
|
|||
"Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
|
||||
)
|
||||
|
||||
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns"))
|
||||
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields (from GeoJSON) and 22 columns"))
|
||||
|
||||
return(field_analysis_df)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -465,7 +465,7 @@ calculate_regression_slope <- function(values, decimal_places = 2) {
|
|||
})
|
||||
}
|
||||
|
||||
#' Calculate Gap Filling Score KPI (2σ method)
|
||||
#' Calculate Gap Filling Score KPI (1σ method)
|
||||
#' @param ci_raster Current week CI raster (single band)
|
||||
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||||
#' @return List with summary data frame and field-level results data frame
|
||||
|
|
@ -490,7 +490,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
|||
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
|
||||
|
||||
if (length(valid_values) > 1) {
|
||||
# Gap score using 2σ below median to detect outliers
|
||||
# Gap score using 1σ below median to detect outliers
|
||||
median_ci <- median(valid_values)
|
||||
sd_ci <- sd(valid_values)
|
||||
outlier_threshold <- median_ci - (1 * sd_ci)
|
||||
|
|
@ -544,8 +544,17 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
|||
#' @param field_boundaries_sf sf object with field geometries
|
||||
#' @return data.frame with Field_id and gap_score columns
|
||||
calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
|
||||
message("\nCalculating gap filling scores (2σ method)...")
|
||||
message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
|
||||
message("\nCalculating gap filling scores (1σ method)...")
|
||||
message(paste(" Using per-field mosaics for", nrow(field_boundaries_sf), "fields"))
|
||||
|
||||
# Initialize results with ALL field IDs (1185 for angata), defaulting to NA
|
||||
# This ensures that even fields without satellite data this week are included in output
|
||||
all_field_ids <- field_boundaries_sf$field
|
||||
gap_scores_df <- data.frame(
|
||||
Field_id = all_field_ids,
|
||||
gap_score = NA_real_,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
|
||||
|
||||
|
|
@ -583,9 +592,11 @@ calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
|
|||
})
|
||||
}
|
||||
|
||||
# Process fields sequentially with progress bar
|
||||
message(" Processing gap scores for ", length(per_field_files), " fields...")
|
||||
pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
|
||||
# Process only fields that have per-field mosaic files
|
||||
n_files <- length(per_field_files)
|
||||
if (n_files > 0) {
|
||||
message(" Processing gap scores for", n_files, "available per-field mosaics...")
|
||||
pb <- utils::txtProgressBar(min = 0, max = n_files, style = 3, width = 50)
|
||||
|
||||
results_list <- lapply(seq_along(per_field_files), function(idx) {
|
||||
result <- process_gap_for_field(per_field_files[[idx]])
|
||||
|
|
@ -594,26 +605,30 @@ calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
|
|||
})
|
||||
close(pb)
|
||||
|
||||
gap_scores_df <- dplyr::bind_rows(results_list)
|
||||
|
||||
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
||||
gap_scores_df <- gap_scores_df %>%
|
||||
# Merge calculated results into the complete field list
|
||||
results_df <- dplyr::bind_rows(results_list) %>%
|
||||
dplyr::group_by(Field_id) %>%
|
||||
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
|
||||
|
||||
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
|
||||
# Update gap_scores_df with calculated values (overwrite NAs where we have data)
|
||||
gap_scores_df <- gap_scores_df %>%
|
||||
left_join(results_df, by = "Field_id") %>%
|
||||
mutate(gap_score = coalesce(gap_score.y, gap_score.x)) %>%
|
||||
select(Field_id, gap_score)
|
||||
} else {
|
||||
message(" No per-field mosaic files available - all gap scores will be NA")
|
||||
}
|
||||
|
||||
# Always count how many fields got actual calculations (not NA)
|
||||
n_calculated <- sum(!is.na(gap_scores_df$gap_score))
|
||||
message(paste(" ✓ Gap scores for", nrow(gap_scores_df), "fields (", n_calculated, "calculated,",
|
||||
nrow(gap_scores_df) - n_calculated, "no data)"))
|
||||
|
||||
# Guard against all-NA values which would produce Inf/-Inf warnings
|
||||
if (any(is.finite(gap_scores_df$gap_score))) {
|
||||
min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2)
|
||||
max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2)
|
||||
message(paste(" Gap score range:", min_score, "-", max_score, "%"))
|
||||
} else {
|
||||
message(" Gap score range: All values are NA (no valid gap scores)")
|
||||
}
|
||||
} else {
|
||||
message(" WARNING: No gap scores calculated from per-field mosaics")
|
||||
gap_scores_df <- NULL
|
||||
}
|
||||
|
||||
return(gap_scores_df)
|
||||
|
|
|
|||
|
|
@ -382,7 +382,7 @@
|
|||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R [END_DATE] [PROJECT] [OFFSET]
|
||||
#
|
||||
# Example (2026-02-09, angata, 7-day lookback):
|
||||
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-02-09 angata 7
|
||||
#
|
||||
#
|
||||
# EXPECTED OUTPUT:
|
||||
# File: {PROJECT}_field_analysis_week{WW}_{YYYY}.xlsx
|
||||
|
|
@ -456,8 +456,8 @@ rmarkdown::render(
|
|||
# rmarkdown::render(
|
||||
rmarkdown::render(
|
||||
"r_app/91_CI_report_with_kpis_cane_supply.Rmd",
|
||||
params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||
output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||
params = list(data_dir = "angata", report_date = as.Date("2026-02-23")),
|
||||
output_file = "SmartCane_Report_cane_supply_angata_2026-02-23_en.docx",
|
||||
output_dir = "laravel_app/storage/app/angata/reports"
|
||||
)
|
||||
#
|
||||
|
|
|
|||
Loading…
Reference in a new issue