SmartCane/r_app/experiments/harvest_prediction/old/debug_harvest_dates.R
2026-01-06 14:17:37 +01:00

76 lines
2.6 KiB
R

# Debug: Check why harvest dates aren't matching with time series
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(lubridate)
library(here)
})
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
source(here("r_app", "parameters_project.R"))
# Load data
ci_data_raw <- readRDS(here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% ungroup()
time_series <- ci_data_raw %>%
mutate(
date = as.Date(Date),
week = isoweek(date),
year = isoyear(date)
) %>%
select(field_id = field, date, week, year, mean_ci = FitData) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(field %in% unique(field_boundaries_sf$field)) %>%
filter(!is.na(season_end))
cat("=== DEBUGGING HARVEST DATE MATCHING ===\n\n")
# Pick one field to analyze
test_field <- "00302"
cat("Testing field:", test_field, "\n\n")
# Get time series for this field
field_ts <- time_series %>% filter(field_id == test_field)
cat("Time series dates for", test_field, ":\n")
cat(" Total days:", nrow(field_ts), "\n")
cat(" Date range:", as.character(min(field_ts$date)), "to", as.character(max(field_ts$date)), "\n")
cat(" Sample dates:\n")
print(head(field_ts$date, 20))
# Get harvest dates for this field
field_harvests <- harvest_actual %>% filter(field == test_field)
cat("\nActual harvest dates for", test_field, ":\n")
print(field_harvests %>% select(field, year, season_end))
# Check if exact harvest dates exist in time series
cat("\nChecking if harvest dates exist in time series:\n")
for (i in 1:nrow(field_harvests)) {
h_date <- field_harvests$season_end[i]
exists <- h_date %in% field_ts$date
if (exists) {
ci_val <- field_ts %>% filter(date == h_date) %>% pull(mean_ci)
cat(" ", as.character(h_date), "- EXISTS, CI =", round(ci_val, 2), "\n")
} else {
# Find nearest date
nearest <- field_ts %>%
mutate(diff = abs(as.numeric(date - h_date))) %>%
arrange(diff) %>%
head(1)
cat(" ", as.character(h_date), "- NOT FOUND (nearest:", as.character(nearest$date),
", diff:", nearest$diff, "days, CI =", round(nearest$mean_ci, 2), ")\n")
}
}
cat("\n=== SOLUTION: Use nearest date matching instead of exact ===\n")
cat("The RDS file has interpolated/fitted data, not every calendar date.\n")
cat("We should match harvest dates to the nearest available date in time series.\n")