# Debug: Check why harvest dates aren't matching with time series suppressPackageStartupMessages({ library(readxl) library(dplyr) library(lubridate) library(here) }) project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) source(here("r_app", "parameters_project.R")) # Load data ci_data_raw <- readRDS(here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% ungroup() time_series <- ci_data_raw %>% mutate( date = as.Date(Date), week = isoweek(date), year = isoyear(date) ) %>% select(field_id = field, date, week, year, mean_ci = FitData) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(field %in% unique(field_boundaries_sf$field)) %>% filter(!is.na(season_end)) cat("=== DEBUGGING HARVEST DATE MATCHING ===\n\n") # Pick one field to analyze test_field <- "00302" cat("Testing field:", test_field, "\n\n") # Get time series for this field field_ts <- time_series %>% filter(field_id == test_field) cat("Time series dates for", test_field, ":\n") cat(" Total days:", nrow(field_ts), "\n") cat(" Date range:", as.character(min(field_ts$date)), "to", as.character(max(field_ts$date)), "\n") cat(" Sample dates:\n") print(head(field_ts$date, 20)) # Get harvest dates for this field field_harvests <- harvest_actual %>% filter(field == test_field) cat("\nActual harvest dates for", test_field, ":\n") print(field_harvests %>% select(field, year, season_end)) # Check if exact harvest dates exist in time series cat("\nChecking if harvest dates exist in time series:\n") for (i in 1:nrow(field_harvests)) { h_date <- field_harvests$season_end[i] exists <- h_date %in% field_ts$date if (exists) { ci_val <- field_ts %>% filter(date == h_date) %>% pull(mean_ci) cat(" ", as.character(h_date), "- EXISTS, CI =", round(ci_val, 2), "\n") } else { # Find nearest date nearest <- field_ts %>% mutate(diff = abs(as.numeric(date - h_date))) %>% arrange(diff) %>% head(1) cat(" ", as.character(h_date), "- NOT FOUND (nearest:", as.character(nearest$date), ", diff:", nearest$diff, "days, CI =", round(nearest$mean_ci, 2), ")\n") } } cat("\n=== SOLUTION: Use nearest date matching instead of exact ===\n") cat("The RDS file has interpolated/fitted data, not every calendar date.\n") cat("We should match harvest dates to the nearest available date in time series.\n")