393 lines
13 KiB
R
393 lines
13 KiB
R
# ============================================================================
|
|
# BFAST MONITOR TEST - Real-time Harvest Detection
|
|
# ============================================================================
|
|
# Use bfastmonitor() which is designed for:
|
|
# - A stable historical baseline period
|
|
# - Monitoring recent period for breaks
|
|
# - Real-time change detection
|
|
# ============================================================================
|
|
|
|
suppressPackageStartupMessages({
|
|
library(readxl)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(here)
|
|
library(bfast)
|
|
library(zoo)
|
|
library(ggplot2)
|
|
})
|
|
|
|
# Set project directory
|
|
project_dir <- "esa"
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
|
|
if (basename(getwd()) == "harvest_prediction") {
|
|
setwd("../../..")
|
|
}
|
|
|
|
source(here("r_app", "parameters_project.R"))
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
|
|
CONFIG <- list(
|
|
# FIELD SELECTION - Change these to test different fields
|
|
test_field = "00302", # Try: 00007, 00104, 00119, 00120, etc.
|
|
test_harvest_index = 3, # Which harvest event (1 = first, 2 = second, etc.)
|
|
|
|
# HISTORY/MONITORING SPLIT
|
|
history_days = 300, # Fixed: use first 300 days as baseline history
|
|
|
|
# Analysis extends to this many days after harvest
|
|
days_after_harvest = 20,
|
|
|
|
# SMOOTHING - Test different values
|
|
smoothing_windows = c(1, 7, 14), # 1 = no smoothing, 7 = weekly, 14 = biweekly
|
|
|
|
# BFASTMONITOR PARAMETERS (tweakable!)
|
|
# 1. Formula: how to model the baseline
|
|
# - response ~ trend : simple linear trend
|
|
# - response ~ trend + harmon : add seasonal harmonics
|
|
formula = response ~ trend,
|
|
|
|
# 2. Order: if using harmon, how many harmonics (1-3 typical)
|
|
order = 1,
|
|
|
|
# 3. Type: type of monitoring process
|
|
# - "OLS-MOSUM" (default): moving sum (good for gradual changes)
|
|
# - "OLS-CUSUM": cumulative sum (good for abrupt changes)
|
|
# - "RE": recursive estimates
|
|
# - "ME": moving estimates
|
|
type = "OLS-MOSUM",
|
|
|
|
# 4. h: bandwidth for moving/recursive estimates (0.15-0.5 typical)
|
|
# smaller = more sensitive to recent changes
|
|
h = 0.25,
|
|
|
|
# 5. level: significance level for break detection (0.01-0.1)
|
|
# lower = stricter (fewer false positives)
|
|
level = 0.05
|
|
)
|
|
|
|
cat("============================================================================\n")
|
|
cat("BFAST MONITOR - REAL-TIME HARVEST DETECTION\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
# ============================================================================
|
|
# LOAD DATA
|
|
# ============================================================================
|
|
|
|
ci_rds_file <- here("laravel_app/storage/app", project_dir,
|
|
"Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
|
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
|
|
|
time_series_daily <- ci_data_raw %>%
|
|
mutate(date = as.Date(Date)) %>%
|
|
select(field_id = field, date, mean_ci = FitData) %>%
|
|
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
|
|
arrange(field_id, date)
|
|
|
|
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
|
mutate(
|
|
season_start = as.Date(season_start),
|
|
season_end = as.Date(season_end)
|
|
) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
# Check which fields have CI data
|
|
fields_with_ci <- unique(time_series_daily$field_id)
|
|
|
|
cat("Fields with CI data:", length(fields_with_ci), "\n")
|
|
cat("Sample fields:", paste(head(fields_with_ci, 20), collapse = ", "), "\n\n")
|
|
|
|
# Filter harvest data to only fields that have CI data
|
|
harvest_data_with_ci <- harvest_data %>%
|
|
filter(field %in% fields_with_ci)
|
|
|
|
cat("Available fields with BOTH harvest data AND CI data:\n")
|
|
field_summary <- harvest_data_with_ci %>%
|
|
group_by(field) %>%
|
|
summarise(
|
|
n_harvests = n(),
|
|
first_harvest = min(season_end),
|
|
last_harvest = max(season_end),
|
|
.groups = "drop"
|
|
) %>%
|
|
arrange(field)
|
|
print(field_summary, n = 50)
|
|
cat("\n")
|
|
|
|
# Get test field and harvest
|
|
field_harvests <- harvest_data_with_ci %>%
|
|
filter(field == CONFIG$test_field) %>%
|
|
arrange(season_end)
|
|
|
|
if (nrow(field_harvests) == 0) {
|
|
stop("Field ", CONFIG$test_field, " does not have both CI data and harvest records. ",
|
|
"Please choose from the fields listed above.")
|
|
}
|
|
|
|
if (nrow(field_harvests) < CONFIG$test_harvest_index) {
|
|
stop("Field ", CONFIG$test_field, " only has ", nrow(field_harvests),
|
|
" harvest events, but you requested harvest #", CONFIG$test_harvest_index)
|
|
}
|
|
|
|
test_harvest <- field_harvests[CONFIG$test_harvest_index, ]
|
|
harvest_date <- test_harvest$season_end
|
|
|
|
if (CONFIG$test_harvest_index == 1) {
|
|
season_start <- test_harvest$season_start
|
|
if (is.na(season_start)) {
|
|
season_start <- min(time_series_daily$date[time_series_daily$field_id == CONFIG$test_field])
|
|
}
|
|
} else {
|
|
season_start <- field_harvests$season_end[CONFIG$test_harvest_index - 1]
|
|
}
|
|
|
|
# Prepare field time series
|
|
end_date <- harvest_date + CONFIG$days_after_harvest
|
|
|
|
field_ts <- time_series_daily %>%
|
|
filter(field_id == CONFIG$test_field,
|
|
date >= season_start,
|
|
date <= end_date) %>%
|
|
arrange(date)
|
|
|
|
season_length <- as.numeric(harvest_date - season_start)
|
|
history_days <- CONFIG$history_days # Use fixed 300 days
|
|
history_end <- season_start + history_days
|
|
|
|
cat("Field:", CONFIG$test_field, "\n")
|
|
cat("Season start:", format(season_start, "%Y-%m-%d"), "\n")
|
|
cat("Harvest date:", format(harvest_date, "%Y-%m-%d"), "\n")
|
|
cat("Analysis end:", format(end_date, "%Y-%m-%d"), "\n")
|
|
cat("Season length:", season_length, "days\n\n")
|
|
|
|
cat("History period (baseline):", format(season_start, "%Y-%m-%d"), "to",
|
|
format(history_end, "%Y-%m-%d"), "(", history_days, "days)\n")
|
|
cat("Monitoring period:", format(history_end + 1, "%Y-%m-%d"), "to",
|
|
format(end_date, "%Y-%m-%d"), "\n\n")
|
|
|
|
# ============================================================================
|
|
# PREPARE TIME SERIES
|
|
# ============================================================================
|
|
|
|
# Create regular time series
|
|
date_seq <- seq.Date(min(field_ts$date), max(field_ts$date), by = "1 day")
|
|
ts_regular <- data.frame(date = date_seq) %>%
|
|
left_join(field_ts, by = "date")
|
|
|
|
# Interpolate missing values
|
|
ts_regular$mean_ci_interp <- na.approx(ts_regular$mean_ci, rule = 2)
|
|
|
|
# ============================================================================
|
|
# TEST DIFFERENT SMOOTHING WINDOWS
|
|
# ============================================================================
|
|
|
|
cat("============================================================================\n")
|
|
cat("TESTING DIFFERENT SMOOTHING WINDOWS\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
all_results <- list()
|
|
|
|
for (smooth_window in CONFIG$smoothing_windows) {
|
|
|
|
cat("----------------------------------------------------------------------------\n")
|
|
cat("SMOOTHING WINDOW:", smooth_window, "days\n")
|
|
cat("----------------------------------------------------------------------------\n\n")
|
|
|
|
# Apply smoothing
|
|
if (smooth_window == 1) {
|
|
ts_regular$mean_ci_smooth <- ts_regular$mean_ci_interp
|
|
smooth_label <- "No smoothing"
|
|
} else {
|
|
ts_regular$mean_ci_smooth <- rollmean(ts_regular$mean_ci_interp,
|
|
k = smooth_window,
|
|
fill = NA,
|
|
align = "center")
|
|
# Fill NAs at edges
|
|
ts_regular$mean_ci_smooth <- na.approx(ts_regular$mean_ci_smooth, rule = 2)
|
|
smooth_label <- paste0(smooth_window, "-day moving average")
|
|
}
|
|
|
|
# Convert to ts object
|
|
start_year <- as.numeric(format(min(ts_regular$date), "%Y"))
|
|
start_doy <- as.numeric(format(min(ts_regular$date), "%j"))
|
|
|
|
ts_obj <- ts(ts_regular$mean_ci_smooth,
|
|
start = c(start_year, start_doy),
|
|
frequency = 365)
|
|
|
|
# Calculate start point for monitoring period
|
|
history_start_decimal <- as.numeric(start_year) + (start_doy - 1) / 365
|
|
history_end_decimal <- history_start_decimal + (history_days / 365)
|
|
|
|
|
|
cat("Time series with", smooth_label, "\n")
|
|
cat(" Length:", length(ts_obj), "observations\n\n")
|
|
|
|
# ============================================================================
|
|
# RUN BFASTMONITOR
|
|
# ============================================================================
|
|
|
|
tryCatch({
|
|
# Run bfastmonitor
|
|
bfm_result <- bfastmonitor(
|
|
data = ts_obj,
|
|
start = history_end_decimal,
|
|
formula = CONFIG$formula,
|
|
order = CONFIG$order,
|
|
type = CONFIG$type,
|
|
h = CONFIG$h,
|
|
level = CONFIG$level
|
|
)
|
|
|
|
cat("bfastmonitor completed successfully\n\n")
|
|
|
|
# Check if break was detected
|
|
if (!is.na(bfm_result$breakpoint)) {
|
|
# Convert breakpoint back to date
|
|
bp_decimal <- bfm_result$breakpoint
|
|
bp_year <- floor(bp_decimal)
|
|
bp_doy <- round((bp_decimal - bp_year) * 365) + 1
|
|
bp_date <- as.Date(paste0(bp_year, "-01-01")) + bp_doy - 1
|
|
|
|
days_from_harvest <- as.numeric(bp_date - harvest_date)
|
|
|
|
cat("✓ BREAKPOINT DETECTED\n")
|
|
cat(" Break date:", format(bp_date, "%Y-%m-%d"), "\n")
|
|
cat(" Days from harvest:", days_from_harvest, "\n")
|
|
|
|
if (abs(days_from_harvest) <= 7) {
|
|
cat(" >>> ✓✓✓ HARVEST DETECTED WITHIN 7 DAYS! ✓✓✓ <<<\n")
|
|
} else if (abs(days_from_harvest) <= 14) {
|
|
cat(" >>> ✓ HARVEST DETECTED WITHIN 14 DAYS <<<\n")
|
|
} else if (days_from_harvest < 0) {
|
|
cat(" Break occurred", abs(days_from_harvest), "days BEFORE harvest\n")
|
|
} else {
|
|
cat(" Break occurred", days_from_harvest, "days AFTER harvest\n")
|
|
}
|
|
|
|
cat(" Break magnitude:", round(bfm_result$magnitude, 3), "\n\n")
|
|
|
|
# Store results
|
|
all_results[[as.character(smooth_window)]] <- list(
|
|
smooth_window = smooth_window,
|
|
smooth_label = smooth_label,
|
|
break_detected = TRUE,
|
|
break_date = bp_date,
|
|
days_from_harvest = days_from_harvest,
|
|
magnitude = bfm_result$magnitude,
|
|
bfm_result = bfm_result
|
|
)
|
|
|
|
} else {
|
|
cat("❌ No breakpoint detected in monitoring period\n\n")
|
|
|
|
all_results[[as.character(smooth_window)]] <- list(
|
|
smooth_window = smooth_window,
|
|
smooth_label = smooth_label,
|
|
break_detected = FALSE
|
|
)
|
|
}
|
|
|
|
}, error = function(e) {
|
|
cat("ERROR:", e$message, "\n\n")
|
|
all_results[[as.character(smooth_window)]] <- list(
|
|
smooth_window = smooth_window,
|
|
smooth_label = smooth_label,
|
|
error = e$message
|
|
)
|
|
})
|
|
|
|
cat("\n")
|
|
}
|
|
|
|
# ============================================================================
|
|
# SUMMARY OF ALL SMOOTHING TESTS
|
|
# ============================================================================
|
|
|
|
cat("============================================================================\n")
|
|
cat("SUMMARY - EFFECT OF SMOOTHING ON HARVEST DETECTION\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
summary_df <- data.frame()
|
|
|
|
for (sw in names(all_results)) {
|
|
result <- all_results[[sw]]
|
|
|
|
if (!is.null(result$error)) {
|
|
summary_df <- rbind(summary_df, data.frame(
|
|
smoothing_window = result$smooth_window,
|
|
label = result$smooth_label,
|
|
break_detected = "ERROR",
|
|
break_date = NA,
|
|
days_from_harvest = NA,
|
|
magnitude = NA
|
|
))
|
|
} else if (result$break_detected) {
|
|
summary_df <- rbind(summary_df, data.frame(
|
|
smoothing_window = result$smooth_window,
|
|
label = result$smooth_label,
|
|
break_detected = "YES",
|
|
break_date = format(result$break_date, "%Y-%m-%d"),
|
|
days_from_harvest = result$days_from_harvest,
|
|
magnitude = round(result$magnitude, 3)
|
|
))
|
|
} else {
|
|
summary_df <- rbind(summary_df, data.frame(
|
|
smoothing_window = result$smooth_window,
|
|
label = result$smooth_label,
|
|
break_detected = "NO",
|
|
break_date = NA,
|
|
days_from_harvest = NA,
|
|
magnitude = NA
|
|
))
|
|
}
|
|
}
|
|
|
|
print(summary_df, row.names = FALSE)
|
|
|
|
cat("\n")
|
|
|
|
# Find best result (closest to harvest)
|
|
best_result <- NULL
|
|
min_days <- Inf
|
|
|
|
for (result in all_results) {
|
|
if (!is.null(result$days_from_harvest) && !is.na(result$days_from_harvest)) {
|
|
if (abs(result$days_from_harvest) < min_days) {
|
|
min_days <- abs(result$days_from_harvest)
|
|
best_result <- result
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!is.null(best_result)) {
|
|
cat("BEST RESULT:\n")
|
|
cat(" Smoothing:", best_result$smooth_label, "\n")
|
|
cat(" Break date:", format(best_result$break_date, "%Y-%m-%d"), "\n")
|
|
cat(" Days from harvest:", best_result$days_from_harvest, "\n")
|
|
cat(" Magnitude:", round(best_result$magnitude, 3), "\n\n")
|
|
|
|
# Save plot for best result
|
|
output_dir <- here("r_app/experiments/harvest_prediction")
|
|
|
|
png(file.path(output_dir, "bfastmonitor_best_smoothing.png"),
|
|
width = 12, height = 8, units = "in", res = 300)
|
|
plot(best_result$bfm_result,
|
|
main = paste0("bfastmonitor - Field ", CONFIG$test_field,
|
|
" (", best_result$smooth_label, ")"))
|
|
abline(v = decimal_date(harvest_date), col = "red", lty = 2, lwd = 2)
|
|
legend("topleft", legend = c("Actual Harvest"), col = "red", lty = 2, lwd = 2)
|
|
dev.off()
|
|
|
|
cat("Saved best result plot: bfastmonitor_best_smoothing.png\n")
|
|
}
|
|
|
|
cat("\n============================================================================\n")
|
|
cat("ANALYSIS COMPLETE\n")
|
|
cat("============================================================================\n")
|