feat: Integrate 2σ gap filling KPI into weekly field analysis
- Changed gap calculation from Q25 to 2σ below median method (kpi_utils.R) - Integrated gap filling into script 80 with tile-based processing - Added Gap_score column to field analysis output (Excel/CSV/RDS) - Fixed memory issues by processing 25 tiles individually instead of merging - Fixed Field_id matching to ensure gap scores populate correctly Gap scores now calculate for all 1185 fields with range 0-11.25% Works with tile-based mosaics (Angata 5x5 grid) without memory errors
This commit is contained in:
parent
19986706b9
commit
f94a6317bd
|
|
@ -134,6 +134,12 @@ tryCatch({
|
||||||
stop("Error loading 80_report_building_utils.R: ", e$message)
|
stop("Error loading 80_report_building_utils.R: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
source(here("r_app", "kpi_utils.R"))
|
||||||
|
}, error = function(e) {
|
||||||
|
stop("Error loading kpi_utils.R: ", e$message)
|
||||||
|
})
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# PHASE AND STATUS TRIGGER DEFINITIONS
|
# PHASE AND STATUS TRIGGER DEFINITIONS
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -426,7 +432,115 @@ main <- function() {
|
||||||
})
|
})
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Build final output dataframe with all 21 columns
|
# CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
message("\nCalculating gap filling scores (2σ method)...")
|
||||||
|
|
||||||
|
# Try single merged mosaic first, then fall back to merging tiles
|
||||||
|
week_mosaic_file <- file.path(mosaic_dir, sprintf("week_%02d_%d.tif", current_week, year))
|
||||||
|
|
||||||
|
gap_scores_df <- NULL
|
||||||
|
|
||||||
|
if (file.exists(week_mosaic_file)) {
|
||||||
|
# Single merged mosaic exists - use it directly
|
||||||
|
tryCatch({
|
||||||
|
current_week_raster <- terra::rast(week_mosaic_file)
|
||||||
|
current_ci_band <- current_week_raster[[5]] # CI is the 5th band
|
||||||
|
names(current_ci_band) <- "CI"
|
||||||
|
|
||||||
|
message(paste(" Loaded single mosaic:", week_mosaic_file))
|
||||||
|
|
||||||
|
# Calculate gap scores for all fields
|
||||||
|
gap_result <- calculate_gap_filling_kpi(current_ci_band, field_boundaries_sf)
|
||||||
|
|
||||||
|
# Extract field-level results (use field column directly to match current_stats Field_id)
|
||||||
|
gap_scores_df <- gap_result$field_results %>%
|
||||||
|
mutate(Field_id = field) %>%
|
||||||
|
select(Field_id, gap_score)
|
||||||
|
|
||||||
|
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
|
||||||
|
message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste(" WARNING: Could not calculate gap scores from single mosaic:", e$message))
|
||||||
|
message(" Gap scores will be set to NA")
|
||||||
|
gap_scores_df <- NULL
|
||||||
|
})
|
||||||
|
|
||||||
|
} else {
|
||||||
|
# Single mosaic doesn't exist - check for tiles and process per-tile
|
||||||
|
message(" Single mosaic not found. Checking for tiles...")
|
||||||
|
|
||||||
|
# List all tiles for this week (e.g., week_04_2026_01.tif through week_04_2026_25.tif)
|
||||||
|
tile_pattern <- sprintf("week_%02d_%d_\\d{2}\\.tif$", current_week, year)
|
||||||
|
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
||||||
|
|
||||||
|
if (length(tile_files) == 0) {
|
||||||
|
message(sprintf(" WARNING: No tiles found matching pattern: %s in %s", tile_pattern, mosaic_dir))
|
||||||
|
message(" Gap scores will be set to NA")
|
||||||
|
|
||||||
|
} else {
|
||||||
|
tryCatch({
|
||||||
|
message(sprintf(" Found %d tiles. Processing per-tile (memory efficient)...", length(tile_files)))
|
||||||
|
|
||||||
|
# Process each tile separately and accumulate results
|
||||||
|
all_tile_results <- list()
|
||||||
|
|
||||||
|
for (i in seq_along(tile_files)) {
|
||||||
|
tile_file <- tile_files[i]
|
||||||
|
|
||||||
|
# Load tile raster
|
||||||
|
tile_raster <- terra::rast(tile_file)
|
||||||
|
tile_ci_band <- tile_raster[[5]]
|
||||||
|
names(tile_ci_band) <- "CI"
|
||||||
|
|
||||||
|
# Calculate gap scores for fields in this tile
|
||||||
|
tile_gap_result <- calculate_gap_filling_kpi(tile_ci_band, field_boundaries_sf)
|
||||||
|
|
||||||
|
# Store results (only keep fields with non-NA scores, use field directly to match current_stats)
|
||||||
|
if (!is.null(tile_gap_result$field_results) && nrow(tile_gap_result$field_results) > 0) {
|
||||||
|
tile_results_clean <- tile_gap_result$field_results %>%
|
||||||
|
mutate(Field_id = field) %>%
|
||||||
|
select(Field_id, gap_score) %>%
|
||||||
|
filter(!is.na(gap_score))
|
||||||
|
|
||||||
|
if (nrow(tile_results_clean) > 0) {
|
||||||
|
all_tile_results[[i]] <- tile_results_clean
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Clear memory
|
||||||
|
rm(tile_raster, tile_ci_band, tile_gap_result)
|
||||||
|
gc(verbose = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Combine all tile results
|
||||||
|
if (length(all_tile_results) > 0) {
|
||||||
|
gap_scores_df <- bind_rows(all_tile_results)
|
||||||
|
|
||||||
|
# If a field appears in multiple tiles, take the maximum gap score
|
||||||
|
gap_scores_df <- gap_scores_df %>%
|
||||||
|
group_by(Field_id) %>%
|
||||||
|
summarise(gap_score = max(gap_score, na.rm = TRUE), .groups = "drop")
|
||||||
|
|
||||||
|
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields across", length(all_tile_results), "tiles"))
|
||||||
|
message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
|
||||||
|
} else {
|
||||||
|
message(" WARNING: No gap scores calculated from any tiles")
|
||||||
|
gap_scores_df <- NULL
|
||||||
|
}
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste(" WARNING: Could not process tiles or calculate gap scores:", e$message))
|
||||||
|
message(" Gap scores will be set to NA")
|
||||||
|
gap_scores_df <- NULL
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Build final output dataframe with all 22 columns (including Gap_score)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
message("\nBuilding final field analysis output...")
|
message("\nBuilding final field analysis output...")
|
||||||
|
|
@ -591,6 +705,23 @@ main <- function() {
|
||||||
else if (pct >= 10) return("10-20%")
|
else if (pct >= 10) return("10-20%")
|
||||||
else return("0-10%")
|
else return("0-10%")
|
||||||
}),
|
}),
|
||||||
|
# Column 22: Gap_score (2σ below median - from kpi_utils.R)
|
||||||
|
Gap_score = {
|
||||||
|
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
||||||
|
# Debug: Print first few gap scores
|
||||||
|
message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)",
|
||||||
|
nrow(gap_scores_df),
|
||||||
|
paste(head(gap_scores_df$gap_score, 3), collapse=", ")))
|
||||||
|
message(sprintf(" First 3 Field_ids in gap_scores_df: %s",
|
||||||
|
paste(head(gap_scores_df$Field_id, 3), collapse=", ")))
|
||||||
|
message(sprintf(" First 3 Field_ids in current_stats: %s",
|
||||||
|
paste(head(current_stats$Field_id, 3), collapse=", ")))
|
||||||
|
|
||||||
|
gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
|
||||||
|
} else {
|
||||||
|
rep(NA_real_, nrow(current_stats))
|
||||||
|
}
|
||||||
|
}
|
||||||
) %>%
|
) %>%
|
||||||
select(
|
select(
|
||||||
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
|
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
|
||||||
|
|
@ -598,10 +729,10 @@ main <- function() {
|
||||||
"Germination_progress",
|
"Germination_progress",
|
||||||
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
|
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
|
||||||
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
|
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
|
||||||
"Imminent_prob", "Cloud_pct_clear", "Cloud_category"))
|
"Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
|
||||||
)
|
)
|
||||||
|
|
||||||
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
|
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)"))
|
||||||
|
|
||||||
export_paths <- export_field_analysis_excel(
|
export_paths <- export_field_analysis_excel(
|
||||||
field_analysis_df,
|
field_analysis_df,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue