# ============================================================================ # INVESTIGATE FIELD KHWC 2024 HARVEST # Recorded: Aug 16, 2024 # Satellite shows empty: Aug 8, 2024 # Check what our model predicted # ============================================================================ library(dplyr) library(lubridate) library(here) # Load the validation results from the best performing system results_file <- here("r_app/experiments/harvest_prediction/two_stage_validation_results.rds") results <- readRDS(results_file) all_results <- results$all_results cat("============================================================================\n") cat("FIELD KHWC - 2024 HARVEST INVESTIGATION\n") cat("============================================================================\n\n") cat("Recorded harvest date: Aug 16, 2024 (week", isoweek(as.Date("2024-08-16")), ")\n") cat("Satellite shows empty: Aug 8, 2024 (week", isoweek(as.Date("2024-08-08")), ")\n") cat("Difference: 8 days EARLY in satellite vs recorded\n\n") # Get all KHWC data for 2024 khwc_2024 <- all_results %>% filter(field == "KHWC", year(harvest_date) == 2023) %>% arrange(test_date) if (nrow(khwc_2024) > 0) { actual_harvest <- unique(khwc_2024$harvest_date)[1] cat("Actual recorded harvest date:", format(actual_harvest, "%Y-%m-%d"), "\n\n") # Find when Stage 1 first triggered stage1_alerts <- khwc_2024 %>% filter(stage1_alert == TRUE) %>% arrange(test_date) if (nrow(stage1_alerts) > 0) { first_alert <- stage1_alerts[1,] cat("============================================================================\n") cat("STAGE 1 - FIRST ALERT\n") cat("============================================================================\n\n") cat("First alert date:", format(first_alert$test_date, "%Y-%m-%d"), "\n") cat("Days before recorded harvest:", first_alert$days_from_harvest, "\n") cat("Alert level:", first_alert$stage1_level, "\n\n") # Calculate days from Aug 8 (satellite empty date) satellite_empty_date <- as.Date("2024-08-08") days_from_satellite <- as.numeric(first_alert$test_date - satellite_empty_date) cat("Days from satellite empty date (Aug 8):", days_from_satellite, "\n") if (days_from_satellite >= -7 && days_from_satellite <= 7) { cat("✓✓✓ MODEL PREDICTION ALIGNS WITH SATELLITE IMAGE! ✓✓✓\n\n") } else if (days_from_satellite < 0) { cat("Model alerted", abs(days_from_satellite), "days BEFORE satellite showed empty\n\n") } else { cat("Model alerted", days_from_satellite, "days AFTER satellite showed empty\n\n") } } else { cat("No Stage 1 alerts found\n\n") } # Show day-by-day around Aug 8 cat("============================================================================\n") cat("DAY-BY-DAY ALERTS AROUND SATELLITE EMPTY DATE (AUG 8)\n") cat("============================================================================\n\n") around_aug8 <- khwc_2024 %>% filter(test_date >= as.Date("2024-07-25"), test_date <= as.Date("2024-08-25")) %>% mutate( days_from_aug8 = as.numeric(test_date - as.Date("2024-08-08")), stage1_status = ifelse(stage1_alert, paste0("ALERT ", stage1_level), "no"), stage2_status = ifelse(stage2_alert, paste0("ALERT ", stage2_level), "no") ) %>% select( Date = test_date, Days_from_Aug8 = days_from_aug8, Days_from_Recorded = days_from_harvest, Stage1 = stage1_status, Stage2 = stage2_status ) print(as.data.frame(around_aug8), row.names = FALSE) cat("\n============================================================================\n") cat("INTERPRETATION\n") cat("============================================================================\n\n") cat("If the satellite image showed the field empty on Aug 8, 2024,\n") cat("then the ACTUAL harvest date is likely Aug 8, NOT Aug 16.\n\n") cat("This means:\n") cat(" - The 'recorded' date (Aug 16) is 8 days LATE\n") cat(" - Our model predictions 'early' by 8 days are actually CORRECT\n") cat(" - We should validate recorded dates against satellite imagery\n\n") cat("Recommendation: Check other 'early' predictions against satellite images\n") cat("to see if recorded dates are consistently delayed\n\n") } else { cat("No data found for KHWC in 2024\n") } cat("============================================================================\n")