Add gap score calculation to common utils

This commit is contained in:
DimitraVeropoulou 2026-02-16 14:46:16 +01:00
parent 8e2d796b41
commit 35e474cf5c

View file

@ -355,11 +355,10 @@ calculate_cv_trend_long_term <- function(cv_values) {
}
#' Calculate Gap Filling Score KPI (2σ method)
#' @param ci_raster Current week CI raster
#' @param field_boundaries Field boundaries
#' @return Data frame with field-level gap filling scores
#' @param ci_raster Current week CI raster (single band)
#' @param field_boundaries Field boundaries (sf or SpatVector)
#' @return List with summary data frame and field-level results data frame
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
safe_log("Calculating Gap Filling Score KPI (placeholder)")
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
@ -368,19 +367,11 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
field_boundaries_vect <- field_boundaries
}
# Ensure field_boundaries_vect is valid and matches field_boundaries dimensions
n_fields_vect <- length(field_boundaries_vect)
n_fields_sf <- nrow(field_boundaries)
if (n_fields_sf != n_fields_vect) {
warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length."))
}
field_results <- data.frame()
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
field_vect <- field_boundaries_vect[i]
# Extract CI values using helper function
@ -394,7 +385,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
outlier_threshold <- median_ci - (2 * sd_ci)
low_ci_pixels <- sum(valid_values < outlier_threshold)
total_pixels <- length(valid_values)
gap_score <- (low_ci_pixels / total_pixels) * 100
gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
# Classify gap severity
gap_level <- dplyr::case_when(
@ -412,7 +403,6 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
outlier_threshold = outlier_threshold
))
} else {
# Not enough valid data, fill with NA row
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
@ -423,9 +413,92 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
))
}
}
# Summarize results
gap_summary <- field_results %>%
dplyr::group_by(gap_level) %>%
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
return(list(summary = gap_summary, field_results = field_results))
}
#' Calculate gap filling scores for all per-field mosaics (wrapper)
#'
#' This wrapper handles per-field mosaic structure by iterating over
#' individual field files and calling the basic KPI function
#'
#' @param per_field_files Character vector of paths to per-field mosaic TIFFs
#' @param field_boundaries_sf sf object with field geometries
#' @return data.frame with Field_id and gap_score columns
calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
message("\nCalculating gap filling scores (2σ method)...")
message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
process_gap_for_field <- function(field_file) {
field_id <- basename(dirname(field_file))
field_bounds <- field_boundaries_by_id[[field_id]]
if (is.null(field_bounds) || nrow(field_bounds) == 0) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
tryCatch({
field_raster <- terra::rast(field_file)
ci_band_name <- "CI"
if (!(ci_band_name %in% names(field_raster))) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
field_ci_band <- field_raster[[ci_band_name]]
names(field_ci_band) <- "CI"
gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
gap_scores <- gap_result$field_results
gap_scores$Field_id <- gap_scores$field
gap_scores <- gap_scores[, c("Field_id", "gap_score")]
stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
}, error = function(e) {
message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message))
data.frame(Field_id = field_id, gap_score = NA_real_)
})
}
# Process fields sequentially with progress bar
message(" Processing gap scores for ", length(per_field_files), " fields...")
pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
results_list <- lapply(seq_along(per_field_files), function(idx) {
result <- process_gap_for_field(per_field_files[[idx]])
utils::setTxtProgressBar(pb, idx)
result
})
close(pb)
gap_scores_df <- dplyr::bind_rows(results_list)
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
gap_scores_df <- gap_scores_df %>%
dplyr::group_by(Field_id) %>%
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-",
round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
} else {
message(" WARNING: No gap scores calculated from per-field mosaics")
gap_scores_df <- NULL
}
return(gap_scores_df)
}
# ============================================================================
# HELPER FUNCTIONS