# ============================================================================ # BFAST MONITOR TEST - Real-time Harvest Detection # ============================================================================ # Use bfastmonitor() which is designed for: # - A stable historical baseline period # - Monitoring recent period for breaks # - Real-time change detection # ============================================================================ suppressPackageStartupMessages({ library(readxl) library(dplyr) library(tidyr) library(lubridate) library(here) library(bfast) library(zoo) library(ggplot2) }) # Set project directory project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) if (basename(getwd()) == "harvest_prediction") { setwd("../../..") } source(here("r_app", "parameters_project.R")) # ============================================================================ # CONFIGURATION # ============================================================================ CONFIG <- list( # FIELD SELECTION - Change these to test different fields test_field = "00302", # Try: 00007, 00104, 00119, 00120, etc. test_harvest_index = 3, # Which harvest event (1 = first, 2 = second, etc.) # HISTORY/MONITORING SPLIT history_days = 300, # Fixed: use first 300 days as baseline history # Analysis extends to this many days after harvest days_after_harvest = 20, # SMOOTHING - Test different values smoothing_windows = c(1, 7, 14), # 1 = no smoothing, 7 = weekly, 14 = biweekly # BFASTMONITOR PARAMETERS (tweakable!) # 1. Formula: how to model the baseline # - response ~ trend : simple linear trend # - response ~ trend + harmon : add seasonal harmonics formula = response ~ trend, # 2. Order: if using harmon, how many harmonics (1-3 typical) order = 1, # 3. Type: type of monitoring process # - "OLS-MOSUM" (default): moving sum (good for gradual changes) # - "OLS-CUSUM": cumulative sum (good for abrupt changes) # - "RE": recursive estimates # - "ME": moving estimates type = "OLS-MOSUM", # 4. h: bandwidth for moving/recursive estimates (0.15-0.5 typical) # smaller = more sensitive to recent changes h = 0.25, # 5. level: significance level for break detection (0.01-0.1) # lower = stricter (fewer false positives) level = 0.05 ) cat("============================================================================\n") cat("BFAST MONITOR - REAL-TIME HARVEST DETECTION\n") cat("============================================================================\n\n") # ============================================================================ # LOAD DATA # ============================================================================ ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds") ci_data_raw <- readRDS(ci_rds_file) %>% ungroup() time_series_daily <- ci_data_raw %>% mutate(date = as.Date(Date)) %>% select(field_id = field, date, mean_ci = FitData) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(!is.na(season_end)) # Check which fields have CI data fields_with_ci <- unique(time_series_daily$field_id) cat("Fields with CI data:", length(fields_with_ci), "\n") cat("Sample fields:", paste(head(fields_with_ci, 20), collapse = ", "), "\n\n") # Filter harvest data to only fields that have CI data harvest_data_with_ci <- harvest_data %>% filter(field %in% fields_with_ci) cat("Available fields with BOTH harvest data AND CI data:\n") field_summary <- harvest_data_with_ci %>% group_by(field) %>% summarise( n_harvests = n(), first_harvest = min(season_end), last_harvest = max(season_end), .groups = "drop" ) %>% arrange(field) print(field_summary, n = 50) cat("\n") # Get test field and harvest field_harvests <- harvest_data_with_ci %>% filter(field == CONFIG$test_field) %>% arrange(season_end) if (nrow(field_harvests) == 0) { stop("Field ", CONFIG$test_field, " does not have both CI data and harvest records. ", "Please choose from the fields listed above.") } if (nrow(field_harvests) < CONFIG$test_harvest_index) { stop("Field ", CONFIG$test_field, " only has ", nrow(field_harvests), " harvest events, but you requested harvest #", CONFIG$test_harvest_index) } test_harvest <- field_harvests[CONFIG$test_harvest_index, ] harvest_date <- test_harvest$season_end if (CONFIG$test_harvest_index == 1) { season_start <- test_harvest$season_start if (is.na(season_start)) { season_start <- min(time_series_daily$date[time_series_daily$field_id == CONFIG$test_field]) } } else { season_start <- field_harvests$season_end[CONFIG$test_harvest_index - 1] } # Prepare field time series end_date <- harvest_date + CONFIG$days_after_harvest field_ts <- time_series_daily %>% filter(field_id == CONFIG$test_field, date >= season_start, date <= end_date) %>% arrange(date) season_length <- as.numeric(harvest_date - season_start) history_days <- CONFIG$history_days # Use fixed 300 days history_end <- season_start + history_days cat("Field:", CONFIG$test_field, "\n") cat("Season start:", format(season_start, "%Y-%m-%d"), "\n") cat("Harvest date:", format(harvest_date, "%Y-%m-%d"), "\n") cat("Analysis end:", format(end_date, "%Y-%m-%d"), "\n") cat("Season length:", season_length, "days\n\n") cat("History period (baseline):", format(season_start, "%Y-%m-%d"), "to", format(history_end, "%Y-%m-%d"), "(", history_days, "days)\n") cat("Monitoring period:", format(history_end + 1, "%Y-%m-%d"), "to", format(end_date, "%Y-%m-%d"), "\n\n") # ============================================================================ # PREPARE TIME SERIES # ============================================================================ # Create regular time series date_seq <- seq.Date(min(field_ts$date), max(field_ts$date), by = "1 day") ts_regular <- data.frame(date = date_seq) %>% left_join(field_ts, by = "date") # Interpolate missing values ts_regular$mean_ci_interp <- na.approx(ts_regular$mean_ci, rule = 2) # ============================================================================ # TEST DIFFERENT SMOOTHING WINDOWS # ============================================================================ cat("============================================================================\n") cat("TESTING DIFFERENT SMOOTHING WINDOWS\n") cat("============================================================================\n\n") all_results <- list() for (smooth_window in CONFIG$smoothing_windows) { cat("----------------------------------------------------------------------------\n") cat("SMOOTHING WINDOW:", smooth_window, "days\n") cat("----------------------------------------------------------------------------\n\n") # Apply smoothing if (smooth_window == 1) { ts_regular$mean_ci_smooth <- ts_regular$mean_ci_interp smooth_label <- "No smoothing" } else { ts_regular$mean_ci_smooth <- rollmean(ts_regular$mean_ci_interp, k = smooth_window, fill = NA, align = "center") # Fill NAs at edges ts_regular$mean_ci_smooth <- na.approx(ts_regular$mean_ci_smooth, rule = 2) smooth_label <- paste0(smooth_window, "-day moving average") } # Convert to ts object start_year <- as.numeric(format(min(ts_regular$date), "%Y")) start_doy <- as.numeric(format(min(ts_regular$date), "%j")) ts_obj <- ts(ts_regular$mean_ci_smooth, start = c(start_year, start_doy), frequency = 365) # Calculate start point for monitoring period history_start_decimal <- as.numeric(start_year) + (start_doy - 1) / 365 history_end_decimal <- history_start_decimal + (history_days / 365) cat("Time series with", smooth_label, "\n") cat(" Length:", length(ts_obj), "observations\n\n") # ============================================================================ # RUN BFASTMONITOR # ============================================================================ tryCatch({ # Run bfastmonitor bfm_result <- bfastmonitor( data = ts_obj, start = history_end_decimal, formula = CONFIG$formula, order = CONFIG$order, type = CONFIG$type, h = CONFIG$h, level = CONFIG$level ) cat("bfastmonitor completed successfully\n\n") # Check if break was detected if (!is.na(bfm_result$breakpoint)) { # Convert breakpoint back to date bp_decimal <- bfm_result$breakpoint bp_year <- floor(bp_decimal) bp_doy <- round((bp_decimal - bp_year) * 365) + 1 bp_date <- as.Date(paste0(bp_year, "-01-01")) + bp_doy - 1 days_from_harvest <- as.numeric(bp_date - harvest_date) cat("✓ BREAKPOINT DETECTED\n") cat(" Break date:", format(bp_date, "%Y-%m-%d"), "\n") cat(" Days from harvest:", days_from_harvest, "\n") if (abs(days_from_harvest) <= 7) { cat(" >>> ✓✓✓ HARVEST DETECTED WITHIN 7 DAYS! ✓✓✓ <<<\n") } else if (abs(days_from_harvest) <= 14) { cat(" >>> ✓ HARVEST DETECTED WITHIN 14 DAYS <<<\n") } else if (days_from_harvest < 0) { cat(" Break occurred", abs(days_from_harvest), "days BEFORE harvest\n") } else { cat(" Break occurred", days_from_harvest, "days AFTER harvest\n") } cat(" Break magnitude:", round(bfm_result$magnitude, 3), "\n\n") # Store results all_results[[as.character(smooth_window)]] <- list( smooth_window = smooth_window, smooth_label = smooth_label, break_detected = TRUE, break_date = bp_date, days_from_harvest = days_from_harvest, magnitude = bfm_result$magnitude, bfm_result = bfm_result ) } else { cat("❌ No breakpoint detected in monitoring period\n\n") all_results[[as.character(smooth_window)]] <- list( smooth_window = smooth_window, smooth_label = smooth_label, break_detected = FALSE ) } }, error = function(e) { cat("ERROR:", e$message, "\n\n") all_results[[as.character(smooth_window)]] <- list( smooth_window = smooth_window, smooth_label = smooth_label, error = e$message ) }) cat("\n") } # ============================================================================ # SUMMARY OF ALL SMOOTHING TESTS # ============================================================================ cat("============================================================================\n") cat("SUMMARY - EFFECT OF SMOOTHING ON HARVEST DETECTION\n") cat("============================================================================\n\n") summary_df <- data.frame() for (sw in names(all_results)) { result <- all_results[[sw]] if (!is.null(result$error)) { summary_df <- rbind(summary_df, data.frame( smoothing_window = result$smooth_window, label = result$smooth_label, break_detected = "ERROR", break_date = NA, days_from_harvest = NA, magnitude = NA )) } else if (result$break_detected) { summary_df <- rbind(summary_df, data.frame( smoothing_window = result$smooth_window, label = result$smooth_label, break_detected = "YES", break_date = format(result$break_date, "%Y-%m-%d"), days_from_harvest = result$days_from_harvest, magnitude = round(result$magnitude, 3) )) } else { summary_df <- rbind(summary_df, data.frame( smoothing_window = result$smooth_window, label = result$smooth_label, break_detected = "NO", break_date = NA, days_from_harvest = NA, magnitude = NA )) } } print(summary_df, row.names = FALSE) cat("\n") # Find best result (closest to harvest) best_result <- NULL min_days <- Inf for (result in all_results) { if (!is.null(result$days_from_harvest) && !is.na(result$days_from_harvest)) { if (abs(result$days_from_harvest) < min_days) { min_days <- abs(result$days_from_harvest) best_result <- result } } } if (!is.null(best_result)) { cat("BEST RESULT:\n") cat(" Smoothing:", best_result$smooth_label, "\n") cat(" Break date:", format(best_result$break_date, "%Y-%m-%d"), "\n") cat(" Days from harvest:", best_result$days_from_harvest, "\n") cat(" Magnitude:", round(best_result$magnitude, 3), "\n\n") # Save plot for best result output_dir <- here("r_app/experiments/harvest_prediction") png(file.path(output_dir, "bfastmonitor_best_smoothing.png"), width = 12, height = 8, units = "in", res = 300) plot(best_result$bfm_result, main = paste0("bfastmonitor - Field ", CONFIG$test_field, " (", best_result$smooth_label, ")")) abline(v = decimal_date(harvest_date), col = "red", lty = 2, lwd = 2) legend("topleft", legend = c("Actual Harvest"), col = "red", lty = 2, lwd = 2) dev.off() cat("Saved best result plot: bfastmonitor_best_smoothing.png\n") } cat("\n============================================================================\n") cat("ANALYSIS COMPLETE\n") cat("============================================================================\n")