# ============================================================================ # BFAST ROLLING WINDOW TEST - Single Season Analysis # ============================================================================ # Test bfast's ability to detect harvest breakpoints progressively # For each day from -20 to +20 days around harvest: # 1. Load CI data from previous harvest to current test day # 2. Run bfast to detect breaks # 3. Check if harvest date is detected as a breakpoint # ============================================================================ suppressPackageStartupMessages({ library(readxl) library(dplyr) library(tidyr) library(lubridate) library(here) library(bfast) library(zoo) library(ggplot2) }) # Set project directory project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) # Navigate to project root if in experiments folder if (basename(getwd()) == "harvest_prediction") { setwd("../../..") } source(here("r_app", "parameters_project.R")) # ============================================================================ # CONFIGURATION # ============================================================================ CONFIG <- list( test_field = "KHWC", # Field to test test_harvest_index = 2, # Which harvest event (1 = first, 2 = second, etc.) days_before = 20, # Days before harvest to start testing days_after = 20, # Days after harvest to end testing # bfast parameters h = 0.15, # Minimal segment size (15% of data) season = "none", # Seasonal model type ("dummy", "harmonic", "none") max_iter = 10, # Maximum iterations breaks = NULL # Let bfast determine number of breaks ) cat("============================================================================\n") cat("BFAST ROLLING WINDOW TEST - PROGRESSIVE HARVEST DETECTION\n") cat("============================================================================\n\n") cat("Test configuration:\n") cat(" Field:", CONFIG$test_field, "\n") cat(" Harvest event:", CONFIG$test_harvest_index, "\n") cat(" Test window:", CONFIG$days_before, "days before to", CONFIG$days_after, "days after\n") cat(" bfast h parameter:", CONFIG$h, "\n") cat(" Seasonal model:", CONFIG$season, "\n\n") # ============================================================================ # LOAD DATA # ============================================================================ cat("=== LOADING DATA ===\n\n") ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds") ci_data_raw <- readRDS(ci_rds_file) %>% ungroup() time_series_daily <- ci_data_raw %>% mutate(date = as.Date(Date)) %>% select(field_id = field, date, mean_ci = FitData) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(!is.na(season_end)) # ============================================================================ # GET TEST FIELD AND HARVEST EVENT # ============================================================================ field_harvests <- harvest_data %>% filter(field == CONFIG$test_field) %>% arrange(season_end) if (nrow(field_harvests) < CONFIG$test_harvest_index) { stop("Field ", CONFIG$test_field, " does not have harvest event #", CONFIG$test_harvest_index) } test_harvest <- field_harvests[CONFIG$test_harvest_index, ] harvest_date <- test_harvest$season_end # Get previous harvest date to define season start if (CONFIG$test_harvest_index == 1) { season_start <- test_harvest$season_start if (is.na(season_start)) { # If no season_start, use earliest data for this field season_start <- min(time_series_daily$date[time_series_daily$field_id == CONFIG$test_field]) } } else { season_start <- field_harvests$season_end[CONFIG$test_harvest_index - 1] } cat("Test harvest event:\n") cat(" Field:", CONFIG$test_field, "\n") cat(" Season start:", format(season_start, "%Y-%m-%d"), "\n") cat(" Harvest date:", format(harvest_date, "%Y-%m-%d"), "\n") cat(" Season length:", as.numeric(harvest_date - season_start), "days\n\n") # ============================================================================ # PREPARE FIELD TIME SERIES # ============================================================================ field_ts <- time_series_daily %>% filter(field_id == CONFIG$test_field, date >= season_start, date <= harvest_date + CONFIG$days_after) %>% arrange(date) cat("CI data available:\n") cat(" Total observations:", nrow(field_ts), "\n") cat(" Date range:", format(min(field_ts$date), "%Y-%m-%d"), "to", format(max(field_ts$date), "%Y-%m-%d"), "\n\n") # ============================================================================ # BFAST WRAPPER FUNCTION # ============================================================================ run_bfast_on_window <- function(ts_data, end_date, harvest_date, config = CONFIG) { # Filter data up to end_date window_data <- ts_data %>% filter(date <= end_date) %>% arrange(date) if (nrow(window_data) < 50) { return(list( success = FALSE, reason = "insufficient_data", n_obs = nrow(window_data), breaks_detected = 0, harvest_detected = FALSE )) } # Create regular time series (fill gaps with NA for now, bfast can handle) date_seq <- seq.Date(min(window_data$date), max(window_data$date), by = "1 day") ts_regular <- data.frame(date = date_seq) %>% left_join(window_data, by = "date") # Interpolate missing values (linear interpolation) ts_regular$mean_ci_interp <- na.approx(ts_regular$mean_ci, rule = 2) # Convert to ts object (yearly frequency = 365 days) start_year <- as.numeric(format(min(ts_regular$date), "%Y")) start_doy <- as.numeric(format(min(ts_regular$date), "%j")) ts_obj <- ts(ts_regular$mean_ci_interp, start = c(start_year, start_doy), frequency = 365) # Run bfast result <- tryCatch({ bfast_result <- bfast(ts_obj, h = config$h, season = config$season, max.iter = config$max_iter, breaks = config$breaks) # Check if any breaks were detected in the trend component if (!is.null(bfast_result$output[[1]]$bp.Vt) && length(bfast_result$output[[1]]$bp.Vt$breakpoints) > 0) { # Get breakpoint dates bp_indices <- bfast_result$output[[1]]$bp.Vt$breakpoints bp_indices <- bp_indices[!is.na(bp_indices)] if (length(bp_indices) > 0) { bp_dates <- ts_regular$date[bp_indices] # Check if any breakpoint is close to harvest date (within 14 days) days_from_harvest <- as.numeric(bp_dates - harvest_date) harvest_detected <- any(abs(days_from_harvest) <= 14) closest_bp <- bp_dates[which.min(abs(days_from_harvest))] closest_bp_offset <- as.numeric(closest_bp - harvest_date) return(list( success = TRUE, n_obs = nrow(window_data), breaks_detected = length(bp_dates), breakpoint_dates = bp_dates, days_from_harvest = days_from_harvest, harvest_detected = harvest_detected, closest_breakpoint = closest_bp, closest_bp_offset = closest_bp_offset, bfast_object = bfast_result )) } } # No breaks detected list( success = TRUE, n_obs = nrow(window_data), breaks_detected = 0, harvest_detected = FALSE ) }, error = function(e) { list( success = FALSE, reason = paste0("bfast_error: ", e$message), n_obs = nrow(window_data), breaks_detected = 0, harvest_detected = FALSE ) }) return(result) } # ============================================================================ # ROLLING WINDOW TEST # ============================================================================ cat("============================================================================\n") cat("RUNNING ROLLING WINDOW TEST\n") cat("============================================================================\n\n") test_dates <- seq.Date( from = harvest_date - CONFIG$days_before, to = harvest_date + CONFIG$days_after, by = "1 day" ) cat("Testing", length(test_dates), "different end dates...\n\n") results_df <- data.frame() pb <- txtProgressBar(min = 0, max = length(test_dates), style = 3) for (i in 1:length(test_dates)) { test_date <- test_dates[i] days_from_harvest <- as.numeric(test_date - harvest_date) result <- run_bfast_on_window(field_ts, test_date, harvest_date, CONFIG) results_df <- bind_rows(results_df, data.frame( test_date = test_date, days_from_harvest = days_from_harvest, n_obs = result$n_obs, success = result$success, breaks_detected = result$breaks_detected, harvest_detected = ifelse(is.null(result$harvest_detected), FALSE, result$harvest_detected), closest_bp_offset = ifelse(is.null(result$closest_bp_offset), NA, result$closest_bp_offset), reason = ifelse(is.null(result$reason), "ok", result$reason) )) setTxtProgressBar(pb, i) } close(pb) # ============================================================================ # RESULTS SUMMARY # ============================================================================ cat("\n\n============================================================================\n") cat("RESULTS SUMMARY\n") cat("============================================================================\n\n") cat("Total tests run:", nrow(results_df), "\n") cat("Successful bfast runs:", sum(results_df$success), "\n") cat("Failed runs:", sum(!results_df$success), "\n\n") if (any(!results_df$success)) { cat("Failure reasons:\n") failure_summary <- results_df %>% filter(!success) %>% count(reason) %>% arrange(desc(n)) print(failure_summary, row.names = FALSE) cat("\n") } # When was harvest first detected? first_detection <- results_df %>% filter(harvest_detected == TRUE) %>% arrange(days_from_harvest) %>% slice(1) if (nrow(first_detection) > 0) { cat("🎯 FIRST HARVEST DETECTION:\n") cat(" Test date:", format(first_detection$test_date, "%Y-%m-%d"), "\n") cat(" Days from actual harvest:", first_detection$days_from_harvest, "\n") cat(" Observations used:", first_detection$n_obs, "\n") cat(" Total breaks detected:", first_detection$breaks_detected, "\n") cat(" Closest breakpoint offset:", first_detection$closest_bp_offset, "days\n\n") if (first_detection$days_from_harvest < 0) { cat(" ✓ Detected", abs(first_detection$days_from_harvest), "days BEFORE actual harvest\n") } else if (first_detection$days_from_harvest == 0) { cat(" ✓ Detected ON harvest day\n") } else { cat(" ℹ Detected", first_detection$days_from_harvest, "days AFTER actual harvest\n") } cat("\n") } else { cat("❌ Harvest NOT detected in any test window\n\n") } # Show detection pattern over time cat("============================================================================\n") cat("DETECTION PATTERN DAY-BY-DAY\n") cat("============================================================================\n\n") detection_summary <- results_df %>% mutate( status = case_when( !success ~ paste0("FAIL: ", reason), harvest_detected ~ paste0("✓ DETECTED (", breaks_detected, " breaks)"), breaks_detected > 0 ~ paste0("○ ", breaks_detected, " breaks (not harvest)"), TRUE ~ "○ No breaks" ) ) %>% select( Date = test_date, Days_from_harvest = days_from_harvest, N_obs = n_obs, Status = status, BP_offset = closest_bp_offset ) print(as.data.frame(detection_summary), row.names = FALSE) # ============================================================================ # VISUALIZATION # ============================================================================ cat("\n\n============================================================================\n") cat("GENERATING VISUALIZATION\n") cat("============================================================================\n\n") # Plot 1: CI time series with harvest date and detection windows p1 <- ggplot(field_ts, aes(x = date, y = mean_ci)) + geom_line(color = "darkgreen", linewidth = 1) + geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed", linewidth = 1) + geom_vline(xintercept = harvest_date - CONFIG$days_before, color = "blue", linetype = "dotted") + geom_vline(xintercept = harvest_date + CONFIG$days_after, color = "blue", linetype = "dotted") + annotate("text", x = harvest_date, y = max(field_ts$mean_ci, na.rm = TRUE), label = "Actual Harvest", vjust = -0.5, color = "red") + labs( title = paste0("Field ", CONFIG$test_field, " - CI Time Series"), subtitle = paste0("Season: ", format(season_start, "%Y-%m-%d"), " to ", format(harvest_date + CONFIG$days_after, "%Y-%m-%d")), x = "Date", y = "CI Value" ) + theme_minimal() + theme(plot.title = element_text(face = "bold")) # Plot 2: Detection success over test window p2 <- ggplot(results_df %>% filter(success), aes(x = days_from_harvest, y = as.numeric(harvest_detected))) + geom_line(color = "blue", linewidth = 1) + geom_point(aes(color = harvest_detected), size = 2) + geom_vline(xintercept = 0, color = "red", linetype = "dashed") + scale_color_manual(values = c("FALSE" = "gray", "TRUE" = "green")) + labs( title = "bfast Harvest Detection Over Time", subtitle = paste0("Field ", CONFIG$test_field, " - Harvest ", CONFIG$test_harvest_index), x = "Days from Actual Harvest", y = "Harvest Detected", color = "Detection" ) + theme_minimal() + theme(plot.title = element_text(face = "bold")) # Plot 3: Number of breaks detected p3 <- ggplot(results_df %>% filter(success), aes(x = days_from_harvest, y = breaks_detected)) + geom_line(color = "purple", linewidth = 1) + geom_point(aes(color = harvest_detected), size = 2) + geom_vline(xintercept = 0, color = "red", linetype = "dashed") + scale_color_manual(values = c("FALSE" = "gray", "TRUE" = "green")) + labs( title = "Number of Breakpoints Detected", x = "Days from Actual Harvest", y = "Breaks Detected", color = "Harvest\nDetected" ) + theme_minimal() + theme(plot.title = element_text(face = "bold")) # Save plots output_dir <- here("r_app/experiments/harvest_prediction") ggsave(file.path(output_dir, "bfast_ci_timeseries.png"), p1, width = 10, height = 6, dpi = 300) ggsave(file.path(output_dir, "bfast_detection_pattern.png"), p2, width = 10, height = 6, dpi = 300) ggsave(file.path(output_dir, "bfast_breaks_count.png"), p3, width = 10, height = 6, dpi = 300) cat("Plots saved to:", output_dir, "\n") cat(" - bfast_ci_timeseries.png\n") cat(" - bfast_detection_pattern.png\n") cat(" - bfast_breaks_count.png\n\n") # ============================================================================ # SAVE RESULTS # ============================================================================ output_file <- file.path(output_dir, "bfast_rolling_results.rds") saveRDS(list( config = CONFIG, field = CONFIG$test_field, harvest_date = harvest_date, season_start = season_start, results = results_df, field_ts = field_ts ), output_file) cat("Results saved to:", output_file, "\n\n") cat("============================================================================\n") cat("ANALYSIS COMPLETE\n") cat("============================================================================\n")