# Analyze specific MISSED harvests to understand why detection failed suppressPackageStartupMessages({ library(readxl) library(dplyr) library(tidyr) library(lubridate) library(terra) library(sf) library(here) }) # Set project directory project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) # Source required files cat("Loading project configuration...\n") source(here("r_app", "parameters_project.R")) # Read pre-extracted CI data ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds") ci_data_raw <- readRDS(ci_rds_file) %>% ungroup() time_series <- ci_data_raw %>% mutate( date = as.Date(Date), week = isoweek(date), year = isoyear(date) ) %>% select( field_id = field, date, week, year, mean_ci = FitData ) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) # Read actual harvest data harvest_actual_all <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(!is.na(season_end)) fields_with_data <- unique(field_boundaries_sf$field) harvest_actual <- harvest_actual_all %>% filter(field %in% fields_with_data) %>% filter(!is.na(season_end)) cat("=== ANALYZING MISSED HARVESTS ===\n\n") # Fields that were missed in detection results (from previous output) missed_cases <- c("00302", "00F25", "00F28", "00P81", "00P82", "00P83", "00P84", "KHWA", "KHWB", "KHWC", "LOMDA") # Analyze each missed field's harvests for (field_name in missed_cases[1:5]) { # Analyze first 5 fields field_harvests <- harvest_actual %>% filter(field == field_name) %>% arrange(season_end) if (nrow(field_harvests) == 0) next cat("\n========================================\n") cat("FIELD:", field_name, "\n") cat("Total harvests:", nrow(field_harvests), "\n") cat("========================================\n\n") # Analyze each harvest for this field for (i in 1:min(3, nrow(field_harvests))) { # First 3 harvests harvest_date <- field_harvests$season_end[i] harvest_week <- isoweek(harvest_date) harvest_year <- isoyear(harvest_date) cat("\n--- Harvest", i, "---\n") cat("Date:", as.character(harvest_date), "(Week", harvest_week, harvest_year, ")\n\n") # Get CI values around this harvest harvest_window <- time_series %>% filter( field_id == field_name, date >= (harvest_date - 30), date <= (harvest_date + 30) ) %>% mutate( days_from_harvest = as.numeric(date - harvest_date), ci_smooth = zoo::rollmean(mean_ci, k = 7, fill = NA, align = "center"), ci_lag7 = lag(ci_smooth, 7), ci_drop = ci_lag7 - ci_smooth, is_low_1.5 = mean_ci < 1.5, is_low_2.0 = mean_ci < 2.0, is_low_2.5 = mean_ci < 2.5, is_drop_0.3 = ci_drop > 0.3, is_drop_0.5 = ci_drop > 0.5 ) if (nrow(harvest_window) == 0) { cat(" NO DATA available for this harvest period\n") next } # Summary statistics cat("CI Summary (±30 days):\n") cat(" Min CI:", round(min(harvest_window$mean_ci, na.rm = TRUE), 2), "\n") cat(" Max CI:", round(max(harvest_window$mean_ci, na.rm = TRUE), 2), "\n") cat(" Mean CI:", round(mean(harvest_window$mean_ci, na.rm = TRUE), 2), "\n") # CI at/near harvest date near_harvest <- harvest_window %>% filter(abs(days_from_harvest) <= 3) %>% arrange(abs(days_from_harvest)) if (nrow(near_harvest) > 0) { cat(" CI at harvest date (±3 days):", round(near_harvest$mean_ci[1], 2), "\n") } # Find minimum CI and when it occurred min_ci_row <- harvest_window %>% filter(mean_ci == min(mean_ci, na.rm = TRUE)) %>% head(1) cat(" Minimum CI:", round(min_ci_row$mean_ci, 2), "at day", min_ci_row$days_from_harvest, "\n\n") # Count days below different thresholds cat("Days with low CI:\n") cat(" CI < 1.5:", sum(harvest_window$is_low_1.5, na.rm = TRUE), "days\n") cat(" CI < 2.0:", sum(harvest_window$is_low_2.0, na.rm = TRUE), "days\n") cat(" CI < 2.5:", sum(harvest_window$is_low_2.5, na.rm = TRUE), "days\n\n") # Find longest consecutive period below threshold for (threshold in c(1.5, 2.0, 2.5)) { consecutive <- harvest_window %>% arrange(date) %>% mutate( is_low = mean_ci < threshold, day_diff = as.numeric(date - lag(date)), new_period = is.na(day_diff) | day_diff > 3 | !is_low, period_id = cumsum(new_period) ) %>% filter(is_low) %>% group_by(period_id) %>% summarise( start_day = min(days_from_harvest), end_day = max(days_from_harvest), duration = n(), mean_ci_period = mean(mean_ci), .groups = "drop" ) %>% arrange(desc(duration)) if (nrow(consecutive) > 0) { longest <- consecutive[1, ] cat("Longest consecutive period (CI <", threshold, "):\n") cat(" Duration:", longest$duration, "days\n") cat(" Start day:", longest$start_day, ", End day:", longest$end_day, "\n") cat(" Mean CI:", round(longest$mean_ci_period, 2), "\n\n") } } # Show when significant drops occurred drops <- harvest_window %>% filter(!is.na(ci_drop), ci_drop > 0.3) %>% arrange(days_from_harvest) if (nrow(drops) > 0) { cat("Significant CI drops (>0.3) detected:\n") cat(" First drop at day:", drops$days_from_harvest[1], "(drop:", round(drops$ci_drop[1], 2), ")\n") if (nrow(drops) > 1) { cat(" Total drops detected:", nrow(drops), "\n") } cat("\n") } else { cat("No significant CI drops (>0.3) detected in this period\n\n") } # Show daily data around harvest cat("Daily CI values (days -7 to +21):\n") daily_view <- harvest_window %>% filter(days_from_harvest >= -7, days_from_harvest <= 21) %>% select(days_from_harvest, date, mean_ci, is_low_2.0) %>% arrange(days_from_harvest) print(daily_view, n = 100) } } cat("\n\n=== SUMMARY ===\n") cat("Key observations:\n") cat("1. Check if CI actually drops below 2.0 around harvest dates\n") cat("2. Check when the minimum CI occurs (before, during, or after harvest)\n") cat("3. Check duration of low CI periods\n") cat("4. Identify timing offset between reported harvest date and actual low CI period\n")