SmartCane/r_app/experiments/harvest_prediction/visualize_bfast_decomposition.R
2026-01-06 14:17:37 +01:00

309 lines
11 KiB
R

# ============================================================================
# VISUALIZE BFAST DECOMPOSITION
# ============================================================================
# Create a visual plot showing:
# - Original CI time series
# - Trend component
# - Seasonal component (if fitted)
# - Detected breakpoints
# Similar to the bfast monitor plot
# ============================================================================
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(here)
library(bfast)
library(zoo)
library(ggplot2)
})
# Set project directory
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
if (basename(getwd()) == "harvest_prediction") {
setwd("../../..")
}
source(here("r_app", "parameters_project.R"))
# ============================================================================
# CONFIGURATION
# ============================================================================
CONFIG <- list(
test_field = "KHWC",
test_harvest_index = 2,
# Run bfast up to this many days after harvest
end_days_after_harvest = 20,
# bfast parameters - try different approaches
tests = list(
# Test 1: No seasonal model
list(h = 0.15, season = "none", name = "No Season"),
# Test 2: Harmonic seasonal (might work with less data)
list(h = 0.15, season = "harmonic", name = "Harmonic Season"),
# Test 3: Lower h for more sensitive detection
list(h = 0.10, season = "none", name = "Sensitive (h=0.10)")
)
)
cat("============================================================================\n")
cat("BFAST DECOMPOSITION VISUALIZATION\n")
cat("============================================================================\n\n")
# ============================================================================
# LOAD DATA
# ============================================================================
ci_rds_file <- here("laravel_app/storage/app", project_dir,
"Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series_daily <- ci_data_raw %>%
mutate(date = as.Date(Date)) %>%
select(field_id = field, date, mean_ci = FitData) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
# Get test field and harvest
field_harvests <- harvest_data %>%
filter(field == CONFIG$test_field) %>%
arrange(season_end)
test_harvest <- field_harvests[CONFIG$test_harvest_index, ]
harvest_date <- test_harvest$season_end
if (CONFIG$test_harvest_index == 1) {
season_start <- test_harvest$season_start
if (is.na(season_start)) {
season_start <- min(time_series_daily$date[time_series_daily$field_id == CONFIG$test_field])
}
} else {
season_start <- field_harvests$season_end[CONFIG$test_harvest_index - 1]
}
# Prepare field time series (up to end_days_after_harvest)
end_date <- harvest_date + CONFIG$end_days_after_harvest
field_ts <- time_series_daily %>%
filter(field_id == CONFIG$test_field,
date >= season_start,
date <= end_date) %>%
arrange(date)
cat("Field:", CONFIG$test_field, "\n")
cat("Season:", format(season_start, "%Y-%m-%d"), "to", format(harvest_date, "%Y-%m-%d"), "\n")
cat("Harvest date:", format(harvest_date, "%Y-%m-%d"), "\n")
cat("Analysis end date:", format(end_date, "%Y-%m-%d"),
"(", CONFIG$end_days_after_harvest, "days after harvest)\n\n")
# ============================================================================
# PREPARE TIME SERIES
# ============================================================================
# Create regular time series
date_seq <- seq.Date(min(field_ts$date), max(field_ts$date), by = "1 day")
ts_regular <- data.frame(date = date_seq) %>%
left_join(field_ts, by = "date")
# Interpolate missing values
ts_regular$mean_ci_interp <- na.approx(ts_regular$mean_ci, rule = 2)
# Convert to ts object
start_year <- as.numeric(format(min(ts_regular$date), "%Y"))
start_doy <- as.numeric(format(min(ts_regular$date), "%j"))
ts_obj <- ts(ts_regular$mean_ci_interp,
start = c(start_year, start_doy),
frequency = 365)
# ============================================================================
# RUN BFAST WITH DIFFERENT CONFIGURATIONS
# ============================================================================
output_dir <- here("r_app/experiments/harvest_prediction")
for (test_config in CONFIG$tests) {
cat("============================================================================\n")
cat("TEST:", test_config$name, "\n")
cat(" h =", test_config$h, "\n")
cat(" season =", test_config$season, "\n")
cat("============================================================================\n\n")
tryCatch({
# Run bfast
bfast_result <- bfast(ts_obj,
h = test_config$h,
season = test_config$season,
max.iter = 10)
# Extract components
trend <- bfast_result$output[[1]]$Tt
if (test_config$season != "none") {
seasonal <- bfast_result$output[[1]]$St
} else {
seasonal <- NULL
}
remainder <- bfast_result$output[[1]]$et
# Get breakpoints
bp_obj <- bfast_result$output[[1]]$bp.Vt
if (!is.null(bp_obj) && length(bp_obj$breakpoints) > 0) {
bp_indices <- bp_obj$breakpoints
bp_indices <- bp_indices[!is.na(bp_indices)]
if (length(bp_indices) > 0) {
bp_dates <- ts_regular$date[bp_indices]
bp_values <- trend[bp_indices]
cat("Detected", length(bp_dates), "breakpoints:\n")
for (i in 1:length(bp_dates)) {
days_from_harvest <- as.numeric(bp_dates[i] - harvest_date)
cat(" ", format(bp_dates[i], "%Y-%m-%d"),
" (", days_from_harvest, "days from harvest)\n")
}
cat("\n")
} else {
bp_dates <- NULL
bp_values <- NULL
cat("No breakpoints detected\n\n")
}
} else {
bp_dates <- NULL
bp_values <- NULL
cat("No breakpoints detected\n\n")
}
# Create decomposition plot
plot_data <- data.frame(
date = ts_regular$date,
original = as.numeric(ts_obj),
trend = as.numeric(trend),
remainder = as.numeric(remainder)
)
if (!is.null(seasonal)) {
plot_data$seasonal <- as.numeric(seasonal)
}
# Plot 1: Original + Trend + Breakpoints
p1 <- ggplot(plot_data, aes(x = date)) +
geom_line(aes(y = original), color = "black", alpha = 0.5) +
geom_line(aes(y = trend), color = "blue", linewidth = 1) +
geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed", linewidth = 1) +
labs(
title = paste0("bfast Decomposition: ", test_config$name),
subtitle = paste0("Field ", CONFIG$test_field, " - Black: Original, Blue: Trend"),
x = "Date",
y = "CI Value"
) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
# Add breakpoints if detected
if (!is.null(bp_dates) && length(bp_dates) > 0) {
bp_df <- data.frame(date = bp_dates, y = as.numeric(bp_values))
p1 <- p1 +
geom_vline(xintercept = bp_dates, color = "darkgreen",
linetype = "dotted", linewidth = 0.8) +
geom_point(data = bp_df, aes(x = date, y = y),
color = "darkgreen", size = 3)
}
# Save plot
filename <- paste0("bfast_decomp_", gsub("[^a-zA-Z0-9]", "_", test_config$name), ".png")
ggsave(file.path(output_dir, filename), p1, width = 12, height = 6, dpi = 300)
cat("Saved:", filename, "\n")
# Plot 2: Seasonal component (if exists)
if (!is.null(seasonal)) {
p2 <- ggplot(plot_data, aes(x = date, y = seasonal)) +
geom_line(color = "purple", linewidth = 1) +
geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
labs(
title = paste0("Seasonal Component: ", test_config$name),
x = "Date",
y = "Seasonal Effect"
) +
theme_minimal()
filename_seasonal <- paste0("bfast_seasonal_", gsub("[^a-zA-Z0-9]", "_", test_config$name), ".png")
ggsave(file.path(output_dir, filename_seasonal), p2, width = 12, height = 4, dpi = 300)
cat("Saved:", filename_seasonal, "\n")
}
# Plot 3: Multi-panel decomposition
plot_list <- list()
# Panel 1: Original
plot_list[[1]] <- ggplot(plot_data, aes(x = date, y = original)) +
geom_line(color = "black") +
geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
labs(title = "Original CI", y = "CI") +
theme_minimal()
# Panel 2: Trend
p_trend <- ggplot(plot_data, aes(x = date, y = trend)) +
geom_line(color = "blue", linewidth = 1) +
geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
labs(title = "Trend", y = "Trend") +
theme_minimal()
if (!is.null(bp_dates) && length(bp_dates) > 0) {
bp_df <- data.frame(date = bp_dates, y = as.numeric(bp_values))
p_trend <- p_trend +
geom_vline(xintercept = bp_dates, color = "darkgreen", linetype = "dotted") +
geom_point(data = bp_df, aes(x = date, y = y),
color = "darkgreen", size = 2)
}
plot_list[[2]] <- p_trend
# Panel 3: Seasonal (if exists)
if (!is.null(seasonal)) {
plot_list[[3]] <- ggplot(plot_data, aes(x = date, y = seasonal)) +
geom_line(color = "purple") +
geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
labs(title = "Seasonal", y = "Seasonal") +
theme_minimal()
}
# Panel 4: Remainder
idx <- length(plot_list) + 1
plot_list[[idx]] <- ggplot(plot_data, aes(x = date, y = remainder)) +
geom_line(color = "gray") +
geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
labs(title = "Remainder", y = "Remainder", x = "Date") +
theme_minimal()
# Combine panels
library(patchwork)
combined <- wrap_plots(plot_list, ncol = 1)
filename_multi <- paste0("bfast_multipanel_", gsub("[^a-zA-Z0-9]", "_", test_config$name), ".png")
ggsave(file.path(output_dir, filename_multi), combined, width = 12, height = 8, dpi = 300)
cat("Saved:", filename_multi, "\n\n")
}, error = function(e) {
cat("ERROR:", e$message, "\n\n")
})
}
cat("============================================================================\n")
cat("ANALYSIS COMPLETE\n")
cat("============================================================================\n")
cat("\nAll plots saved to:", output_dir, "\n")