SmartCane/r_app/experiments/harvest_prediction/visualize_bfast_decomposition.R

# ============================================================================
# VISUALIZE BFAST DECOMPOSITION
# ============================================================================
# Create a visual plot showing:
# - Original CI time series
# - Trend component
# - Seasonal component (if fitted)
# - Detected breakpoints
# Similar to the bfast monitor plot
# ============================================================================

suppressPackageStartupMessages({
  library(readxl)
  library(dplyr)
  library(tidyr)
  library(lubridate)
  library(here)
  library(bfast)
  library(zoo)
  library(ggplot2)
})

# Set project directory
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)

if (basename(getwd()) == "harvest_prediction") {
  setwd("../../..")
}

source(here("r_app", "parameters_project.R"))

# ============================================================================
# CONFIGURATION
# ============================================================================

CONFIG <- list(
  test_field = "KHWC",
  test_harvest_index = 2,

  # Run bfast up to this many days after harvest
  end_days_after_harvest = 20,

  # bfast parameters - try different approaches
  tests = list(
    # Test 1: No seasonal model
    list(h = 0.15, season = "none", name = "No Season"),
    # Test 2: Harmonic seasonal (might work with less data)
    list(h = 0.15, season = "harmonic", name = "Harmonic Season"),
    # Test 3: Lower h for more sensitive detection
    list(h = 0.10, season = "none", name = "Sensitive (h=0.10)")
  )
)

cat("============================================================================\n")
cat("BFAST DECOMPOSITION VISUALIZATION\n")
cat("============================================================================\n\n")

# ============================================================================
# LOAD DATA
# ============================================================================

ci_rds_file <- here("laravel_app/storage/app", project_dir,
                    "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()

time_series_daily <- ci_data_raw %>%
  mutate(date = as.Date(Date)) %>%
  select(field_id = field, date, mean_ci = FitData) %>%
  filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
  arrange(field_id, date)

harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
  mutate(
    season_start = as.Date(season_start),
    season_end = as.Date(season_end)
  ) %>%
  filter(!is.na(season_end))

# Get test field and harvest
field_harvests <- harvest_data %>%
  filter(field == CONFIG$test_field) %>%
  arrange(season_end)

test_harvest <- field_harvests[CONFIG$test_harvest_index, ]
harvest_date <- test_harvest$season_end

if (CONFIG$test_harvest_index == 1) {
  season_start <- test_harvest$season_start
  if (is.na(season_start)) {
    season_start <- min(time_series_daily$date[time_series_daily$field_id == CONFIG$test_field])
  }
} else {
  season_start <- field_harvests$season_end[CONFIG$test_harvest_index - 1]
}

# Prepare field time series (up to end_days_after_harvest)
end_date <- harvest_date + CONFIG$end_days_after_harvest

field_ts <- time_series_daily %>%
  filter(field_id == CONFIG$test_field,
         date >= season_start,
         date <= end_date) %>%
  arrange(date)

cat("Field:", CONFIG$test_field, "\n")
cat("Season:", format(season_start, "%Y-%m-%d"), "to", format(harvest_date, "%Y-%m-%d"), "\n")
cat("Harvest date:", format(harvest_date, "%Y-%m-%d"), "\n")
cat("Analysis end date:", format(end_date, "%Y-%m-%d"),
    "(", CONFIG$end_days_after_harvest, "days after harvest)\n\n")

# ============================================================================
# PREPARE TIME SERIES
# ============================================================================

# Create regular time series
date_seq <- seq.Date(min(field_ts$date), max(field_ts$date), by = "1 day")
ts_regular <- data.frame(date = date_seq) %>%
  left_join(field_ts, by = "date")

# Interpolate missing values
ts_regular$mean_ci_interp <- na.approx(ts_regular$mean_ci, rule = 2)

# Convert to ts object
start_year <- as.numeric(format(min(ts_regular$date), "%Y"))
start_doy <- as.numeric(format(min(ts_regular$date), "%j"))

ts_obj <- ts(ts_regular$mean_ci_interp,
             start = c(start_year, start_doy),
             frequency = 365)

# ============================================================================
# RUN BFAST WITH DIFFERENT CONFIGURATIONS
# ============================================================================

output_dir <- here("r_app/experiments/harvest_prediction")

for (test_config in CONFIG$tests) {
  cat("============================================================================\n")
  cat("TEST:", test_config$name, "\n")
  cat("  h =", test_config$h, "\n")
  cat("  season =", test_config$season, "\n")
  cat("============================================================================\n\n")

  tryCatch({
    # Run bfast
    bfast_result <- bfast(ts_obj,
                          h = test_config$h,
                          season = test_config$season,
                          max.iter = 10)

    # Extract components
    trend <- bfast_result$output[[1]]$Tt
    if (test_config$season != "none") {
      seasonal <- bfast_result$output[[1]]$St
    } else {
      seasonal <- NULL
    }
    remainder <- bfast_result$output[[1]]$et

    # Get breakpoints
    bp_obj <- bfast_result$output[[1]]$bp.Vt

    if (!is.null(bp_obj) && length(bp_obj$breakpoints) > 0) {
      bp_indices <- bp_obj$breakpoints
      bp_indices <- bp_indices[!is.na(bp_indices)]

      if (length(bp_indices) > 0) {
        bp_dates <- ts_regular$date[bp_indices]
        bp_values <- trend[bp_indices]

        cat("Detected", length(bp_dates), "breakpoints:\n")
        for (i in 1:length(bp_dates)) {
          days_from_harvest <- as.numeric(bp_dates[i] - harvest_date)
          cat("  ", format(bp_dates[i], "%Y-%m-%d"),
              " (", days_from_harvest, "days from harvest)\n")
        }
        cat("\n")
      } else {
        bp_dates <- NULL
        bp_values <- NULL
        cat("No breakpoints detected\n\n")
      }
    } else {
      bp_dates <- NULL
      bp_values <- NULL
      cat("No breakpoints detected\n\n")
    }

    # Create decomposition plot
    plot_data <- data.frame(
      date = ts_regular$date,
      original = as.numeric(ts_obj),
      trend = as.numeric(trend),
      remainder = as.numeric(remainder)
    )

    if (!is.null(seasonal)) {
      plot_data$seasonal <- as.numeric(seasonal)
    }

    # Plot 1: Original + Trend + Breakpoints
    p1 <- ggplot(plot_data, aes(x = date)) +
      geom_line(aes(y = original), color = "black", alpha = 0.5) +
      geom_line(aes(y = trend), color = "blue", linewidth = 1) +
      geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed", linewidth = 1) +
      labs(
        title = paste0("bfast Decomposition: ", test_config$name),
        subtitle = paste0("Field ", CONFIG$test_field, " - Black: Original, Blue: Trend"),
        x = "Date",
        y = "CI Value"
      ) +
      theme_minimal() +
      theme(plot.title = element_text(face = "bold"))

    # Add breakpoints if detected
    if (!is.null(bp_dates) && length(bp_dates) > 0) {
      bp_df <- data.frame(date = bp_dates, y = as.numeric(bp_values))
      p1 <- p1 +
        geom_vline(xintercept = bp_dates, color = "darkgreen",
                   linetype = "dotted", linewidth = 0.8) +
        geom_point(data = bp_df, aes(x = date, y = y),
                   color = "darkgreen", size = 3)
    }

    # Save plot
    filename <- paste0("bfast_decomp_", gsub("[^a-zA-Z0-9]", "_", test_config$name), ".png")
    ggsave(file.path(output_dir, filename), p1, width = 12, height = 6, dpi = 300)
    cat("Saved:", filename, "\n")

    # Plot 2: Seasonal component (if exists)
    if (!is.null(seasonal)) {
      p2 <- ggplot(plot_data, aes(x = date, y = seasonal)) +
        geom_line(color = "purple", linewidth = 1) +
        geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
        labs(
          title = paste0("Seasonal Component: ", test_config$name),
          x = "Date",
          y = "Seasonal Effect"
        ) +
        theme_minimal()

      filename_seasonal <- paste0("bfast_seasonal_", gsub("[^a-zA-Z0-9]", "_", test_config$name), ".png")
      ggsave(file.path(output_dir, filename_seasonal), p2, width = 12, height = 4, dpi = 300)
      cat("Saved:", filename_seasonal, "\n")
    }

    # Plot 3: Multi-panel decomposition
    plot_list <- list()

    # Panel 1: Original
    plot_list[[1]] <- ggplot(plot_data, aes(x = date, y = original)) +
      geom_line(color = "black") +
      geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
      labs(title = "Original CI", y = "CI") +
      theme_minimal()

    # Panel 2: Trend
    p_trend <- ggplot(plot_data, aes(x = date, y = trend)) +
      geom_line(color = "blue", linewidth = 1) +
      geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
      labs(title = "Trend", y = "Trend") +
      theme_minimal()

    if (!is.null(bp_dates) && length(bp_dates) > 0) {
      bp_df <- data.frame(date = bp_dates, y = as.numeric(bp_values))
      p_trend <- p_trend +
        geom_vline(xintercept = bp_dates, color = "darkgreen", linetype = "dotted") +
        geom_point(data = bp_df, aes(x = date, y = y),
                   color = "darkgreen", size = 2)
    }
    plot_list[[2]] <- p_trend

    # Panel 3: Seasonal (if exists)
    if (!is.null(seasonal)) {
      plot_list[[3]] <- ggplot(plot_data, aes(x = date, y = seasonal)) +
        geom_line(color = "purple") +
        geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
        labs(title = "Seasonal", y = "Seasonal") +
        theme_minimal()
    }

    # Panel 4: Remainder
    idx <- length(plot_list) + 1
    plot_list[[idx]] <- ggplot(plot_data, aes(x = date, y = remainder)) +
      geom_line(color = "gray") +
      geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
      geom_vline(xintercept = harvest_date, color = "red", linetype = "dashed") +
      labs(title = "Remainder", y = "Remainder", x = "Date") +
      theme_minimal()

    # Combine panels
    library(patchwork)
    combined <- wrap_plots(plot_list, ncol = 1)

    filename_multi <- paste0("bfast_multipanel_", gsub("[^a-zA-Z0-9]", "_", test_config$name), ".png")
    ggsave(file.path(output_dir, filename_multi), combined, width = 12, height = 8, dpi = 300)
    cat("Saved:", filename_multi, "\n\n")

  }, error = function(e) {
    cat("ERROR:", e$message, "\n\n")
  })
}

cat("============================================================================\n")
cat("ANALYSIS COMPLETE\n")
cat("============================================================================\n")
cat("\nAll plots saved to:", output_dir, "\n")