SmartCane/r_app/experiments/harvest_prediction/investigate_khwc_2024.R
2026-01-06 14:17:37 +01:00

113 lines
4.4 KiB
R

# ============================================================================
# INVESTIGATE FIELD KHWC 2024 HARVEST
# Recorded: Aug 16, 2024
# Satellite shows empty: Aug 8, 2024
# Check what our model predicted
# ============================================================================
library(dplyr)
library(lubridate)
library(here)
# Load the validation results from the best performing system
results_file <- here("r_app/experiments/harvest_prediction/two_stage_validation_results.rds")
results <- readRDS(results_file)
all_results <- results$all_results
cat("============================================================================\n")
cat("FIELD KHWC - 2024 HARVEST INVESTIGATION\n")
cat("============================================================================\n\n")
cat("Recorded harvest date: Aug 16, 2024 (week", isoweek(as.Date("2024-08-16")), ")\n")
cat("Satellite shows empty: Aug 8, 2024 (week", isoweek(as.Date("2024-08-08")), ")\n")
cat("Difference: 8 days EARLY in satellite vs recorded\n\n")
# Get all KHWC data for 2024
khwc_2024 <- all_results %>%
filter(field == "KHWC",
year(harvest_date) == 2023) %>%
arrange(test_date)
if (nrow(khwc_2024) > 0) {
actual_harvest <- unique(khwc_2024$harvest_date)[1]
cat("Actual recorded harvest date:", format(actual_harvest, "%Y-%m-%d"), "\n\n")
# Find when Stage 1 first triggered
stage1_alerts <- khwc_2024 %>%
filter(stage1_alert == TRUE) %>%
arrange(test_date)
if (nrow(stage1_alerts) > 0) {
first_alert <- stage1_alerts[1,]
cat("============================================================================\n")
cat("STAGE 1 - FIRST ALERT\n")
cat("============================================================================\n\n")
cat("First alert date:", format(first_alert$test_date, "%Y-%m-%d"), "\n")
cat("Days before recorded harvest:", first_alert$days_from_harvest, "\n")
cat("Alert level:", first_alert$stage1_level, "\n\n")
# Calculate days from Aug 8 (satellite empty date)
satellite_empty_date <- as.Date("2024-08-08")
days_from_satellite <- as.numeric(first_alert$test_date - satellite_empty_date)
cat("Days from satellite empty date (Aug 8):", days_from_satellite, "\n")
if (days_from_satellite >= -7 && days_from_satellite <= 7) {
cat("✓✓✓ MODEL PREDICTION ALIGNS WITH SATELLITE IMAGE! ✓✓✓\n\n")
} else if (days_from_satellite < 0) {
cat("Model alerted", abs(days_from_satellite), "days BEFORE satellite showed empty\n\n")
} else {
cat("Model alerted", days_from_satellite, "days AFTER satellite showed empty\n\n")
}
} else {
cat("No Stage 1 alerts found\n\n")
}
# Show day-by-day around Aug 8
cat("============================================================================\n")
cat("DAY-BY-DAY ALERTS AROUND SATELLITE EMPTY DATE (AUG 8)\n")
cat("============================================================================\n\n")
around_aug8 <- khwc_2024 %>%
filter(test_date >= as.Date("2024-07-25"),
test_date <= as.Date("2024-08-25")) %>%
mutate(
days_from_aug8 = as.numeric(test_date - as.Date("2024-08-08")),
stage1_status = ifelse(stage1_alert, paste0("ALERT ", stage1_level), "no"),
stage2_status = ifelse(stage2_alert, paste0("ALERT ", stage2_level), "no")
) %>%
select(
Date = test_date,
Days_from_Aug8 = days_from_aug8,
Days_from_Recorded = days_from_harvest,
Stage1 = stage1_status,
Stage2 = stage2_status
)
print(as.data.frame(around_aug8), row.names = FALSE)
cat("\n============================================================================\n")
cat("INTERPRETATION\n")
cat("============================================================================\n\n")
cat("If the satellite image showed the field empty on Aug 8, 2024,\n")
cat("then the ACTUAL harvest date is likely Aug 8, NOT Aug 16.\n\n")
cat("This means:\n")
cat(" - The 'recorded' date (Aug 16) is 8 days LATE\n")
cat(" - Our model predictions 'early' by 8 days are actually CORRECT\n")
cat(" - We should validate recorded dates against satellite imagery\n\n")
cat("Recommendation: Check other 'early' predictions against satellite images\n")
cat("to see if recorded dates are consistently delayed\n\n")
} else {
cat("No data found for KHWC in 2024\n")
}
cat("============================================================================\n")