# Examine CI patterns for fields where detection failed suppressPackageStartupMessages({ library(readxl) library(dplyr) library(tidyr) library(lubridate) library(here) library(ggplot2) }) project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) source(here("r_app", "parameters_project.R")) # Load daily CI data ci_data_raw <- readRDS(here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% ungroup() time_series <- ci_data_raw %>% mutate( date = as.Date(Date), week = isoweek(date), year = isoyear(date) ) %>% select(field_id = field, date, week, year, mean_ci = FitData) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) # Load harvest data harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(field %in% unique(field_boundaries_sf$field)) %>% filter(!is.na(season_end)) cat("=== EXAMINING MISSED HARVESTS ===\n\n") # Focus on specific fields mentioned: 00302, 00F28 test_fields <- c("00302", "00F28") for (field_name in test_fields) { cat("\n\n===============================================\n") cat("FIELD:", field_name, "\n") cat("===============================================\n\n") # Get harvest dates for this field field_harvests <- harvest_actual %>% filter(field == field_name) %>% arrange(season_end) %>% mutate(harvest_week = isoweek(season_end)) cat("Harvest dates:\n") print(field_harvests %>% select(year, season_end, harvest_week)) # Get time series for this field field_ts <- time_series %>% filter(field_id == field_name) %>% arrange(date) %>% mutate( # Add rolling average ci_smooth = zoo::rollmean(mean_ci, k = 7, fill = NA, align = "center"), # Add week-over-week change ci_lag7 = lag(ci_smooth, 7), ci_drop = ci_lag7 - ci_smooth ) # For each harvest, show ±30 days of data for (i in 1:nrow(field_harvests)) { h_date <- field_harvests$season_end[i] h_year <- field_harvests$year[i] cat("\n--- Harvest", i, ":", as.character(h_date), "(Year:", h_year, ") ---\n") window_data <- field_ts %>% filter(date >= (h_date - 30), date <= (h_date + 30)) %>% mutate( days_from_harvest = as.numeric(date - h_date), is_harvest_week = abs(days_from_harvest) <= 3 ) if (nrow(window_data) > 0) { cat("\nCI values around harvest (±30 days):\n") # Summary by week relative to harvest weekly_summary <- window_data %>% mutate(week_offset = floor(days_from_harvest / 7)) %>% group_by(week_offset) %>% summarise( n_days = n(), mean_ci = mean(mean_ci, na.rm = TRUE), min_ci = min(mean_ci, na.rm = TRUE), max_ci = max(mean_ci, na.rm = TRUE), mean_drop = mean(ci_drop, na.rm = TRUE), max_drop = max(ci_drop, na.rm = TRUE), .groups = "drop" ) %>% arrange(week_offset) print(weekly_summary) # Check detection conditions cat("\nDetection analysis:\n") # Count days meeting different thresholds low_ci_1.5 <- sum(window_data$mean_ci < 1.5, na.rm = TRUE) low_ci_2.0 <- sum(window_data$mean_ci < 2.0, na.rm = TRUE) low_ci_2.5 <- sum(window_data$mean_ci < 2.5, na.rm = TRUE) drop_0.3 <- sum(window_data$ci_drop > 0.3, na.rm = TRUE) drop_0.5 <- sum(window_data$ci_drop > 0.5, na.rm = TRUE) drop_0.8 <- sum(window_data$ci_drop > 0.8, na.rm = TRUE) cat(" Days with CI < 1.5:", low_ci_1.5, "\n") cat(" Days with CI < 2.0:", low_ci_2.0, "\n") cat(" Days with CI < 2.5:", low_ci_2.5, "\n") cat(" Days with drop > 0.3:", drop_0.3, "\n") cat(" Days with drop > 0.5:", drop_0.5, "\n") cat(" Days with drop > 0.8:", drop_0.8, "\n") # Check if there's a sustained low period consecutive_low <- window_data %>% mutate(is_low = mean_ci < 2.0) %>% filter(is_low) %>% mutate( date_diff = as.numeric(date - lag(date)), new_group = is.na(date_diff) | date_diff > 3 ) %>% mutate(group_id = cumsum(new_group)) %>% group_by(group_id) %>% summarise( start = min(date), end = max(date), duration = n(), mean_ci = mean(mean_ci), .groups = "drop" ) %>% arrange(desc(duration)) if (nrow(consecutive_low) > 0) { cat("\nLongest consecutive low CI periods (CI < 2.0):\n") print(head(consecutive_low, 3)) } else { cat("\nNo sustained low CI periods found (CI < 2.0)\n") } } else { cat("No data available in this window\n") } } } cat("\n\n=== SUMMARY ===\n") cat("This analysis shows why harvests were missed.\n") cat("If CI doesn't drop low enough OR doesn't stay low for enough consecutive days,\n") cat("the detection algorithm won't trigger.\n")