SmartCane/r_app/experiments/harvest_prediction/old/visualize_harvest_ci.R
2026-01-06 14:17:37 +01:00

129 lines
4.1 KiB
R

# Visualize CI time series with harvest dates to validate patterns
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(terra)
library(sf)
library(here)
library(ggplot2)
})
# Set project directory
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
# Source required files
cat("Loading project configuration...\n")
source(here("r_app", "parameters_project.R"))
# Read pre-extracted CI data
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series <- ci_data_raw %>%
mutate(
date = as.Date(Date),
week = isoweek(date),
year = isoyear(date)
) %>%
select(
field_id = field,
date,
week,
year,
mean_ci = FitData
) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
# Read actual harvest data
harvest_actual_all <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
fields_with_data <- unique(field_boundaries_sf$field)
harvest_actual <- harvest_actual_all %>%
filter(field %in% fields_with_data) %>%
filter(!is.na(season_end))
cat("Creating visualizations for each field...\n\n")
# Select fields to visualize (ones with missed harvests)
fields_to_plot <- c("00302", "00F25", "00F28", "00P81", "00P82", "KHWA", "KHWB", "LOMDA")
for (field_name in fields_to_plot) {
field_ts <- time_series %>%
filter(field_id == field_name)
field_harvests <- harvest_actual %>%
filter(field == field_name) %>%
arrange(season_end)
if (nrow(field_ts) == 0 || nrow(field_harvests) == 0) {
cat("Skipping field", field_name, "(no data)\n")
next
}
cat("Plotting field:", field_name, "\n")
cat(" Harvests:", nrow(field_harvests), "\n")
cat(" CI observations:", nrow(field_ts), "\n")
# Create plot
p <- ggplot() +
# CI time series
geom_line(data = field_ts, aes(x = date, y = mean_ci),
color = "darkgreen", size = 0.5) +
geom_point(data = field_ts, aes(x = date, y = mean_ci),
color = "darkgreen", size = 0.8, alpha = 0.6) +
# Harvest dates as vertical lines
geom_vline(data = field_harvests, aes(xintercept = season_end),
color = "red", linetype = "dashed", size = 0.8) +
# Add harvest labels
geom_text(data = field_harvests,
aes(x = season_end, y = max(field_ts$mean_ci) * 0.95,
label = format(season_end, "%Y-%m-%d")),
angle = 90, vjust = -0.3, size = 3, color = "red") +
# Horizontal reference lines
geom_hline(yintercept = 2.0, color = "blue", linetype = "dotted", alpha = 0.5) +
geom_hline(yintercept = 2.5, color = "orange", linetype = "dotted", alpha = 0.5) +
# Labels and theme
labs(
title = paste("Field", field_name, "- CI Time Series with Harvest Dates"),
subtitle = paste("Red dashed lines = harvest dates |",
"Blue dotted = CI 2.0 |",
"Orange dotted = CI 2.5"),
x = "Date",
y = "Canopy Index (CI)"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
axis.text.x = element_text(angle = 45, hjust = 1)
) +
scale_x_date(date_breaks = "3 months", date_labels = "%Y-%m")
# Save plot
output_file <- paste0("output/harvest_ci_", field_name, ".png")
ggsave(output_file, p, width = 14, height = 6, dpi = 150)
cat(" Saved:", output_file, "\n\n")
# Also print plot to screen
print(p)
}
cat("\n=== SUMMARY ===\n")
cat("Plots saved to output/ folder\n")
cat("Look for patterns:\n")
cat(" 1. Does CI drop below 2.0-2.5 around harvest dates?\n")
cat(" 2. How long does CI stay low after harvest?\n")
cat(" 3. Are there other low CI periods NOT associated with harvest?\n")
cat(" 4. Is there a consistent time offset between harvest date and minimum CI?\n")