SmartCane/r_app/experiments/harvest_prediction/old/examine_missed_harvests.R
2026-01-06 14:17:37 +01:00

157 lines
5.1 KiB
R

# Examine CI patterns for fields where detection failed
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(here)
library(ggplot2)
})
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
source(here("r_app", "parameters_project.R"))
# Load daily CI data
ci_data_raw <- readRDS(here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% ungroup()
time_series <- ci_data_raw %>%
mutate(
date = as.Date(Date),
week = isoweek(date),
year = isoyear(date)
) %>%
select(field_id = field, date, week, year, mean_ci = FitData) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
# Load harvest data
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(field %in% unique(field_boundaries_sf$field)) %>%
filter(!is.na(season_end))
cat("=== EXAMINING MISSED HARVESTS ===\n\n")
# Focus on specific fields mentioned: 00302, 00F28
test_fields <- c("00302", "00F28")
for (field_name in test_fields) {
cat("\n\n===============================================\n")
cat("FIELD:", field_name, "\n")
cat("===============================================\n\n")
# Get harvest dates for this field
field_harvests <- harvest_actual %>%
filter(field == field_name) %>%
arrange(season_end) %>%
mutate(harvest_week = isoweek(season_end))
cat("Harvest dates:\n")
print(field_harvests %>% select(year, season_end, harvest_week))
# Get time series for this field
field_ts <- time_series %>%
filter(field_id == field_name) %>%
arrange(date) %>%
mutate(
# Add rolling average
ci_smooth = zoo::rollmean(mean_ci, k = 7, fill = NA, align = "center"),
# Add week-over-week change
ci_lag7 = lag(ci_smooth, 7),
ci_drop = ci_lag7 - ci_smooth
)
# For each harvest, show ±30 days of data
for (i in 1:nrow(field_harvests)) {
h_date <- field_harvests$season_end[i]
h_year <- field_harvests$year[i]
cat("\n--- Harvest", i, ":", as.character(h_date), "(Year:", h_year, ") ---\n")
window_data <- field_ts %>%
filter(date >= (h_date - 30), date <= (h_date + 30)) %>%
mutate(
days_from_harvest = as.numeric(date - h_date),
is_harvest_week = abs(days_from_harvest) <= 3
)
if (nrow(window_data) > 0) {
cat("\nCI values around harvest (±30 days):\n")
# Summary by week relative to harvest
weekly_summary <- window_data %>%
mutate(week_offset = floor(days_from_harvest / 7)) %>%
group_by(week_offset) %>%
summarise(
n_days = n(),
mean_ci = mean(mean_ci, na.rm = TRUE),
min_ci = min(mean_ci, na.rm = TRUE),
max_ci = max(mean_ci, na.rm = TRUE),
mean_drop = mean(ci_drop, na.rm = TRUE),
max_drop = max(ci_drop, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(week_offset)
print(weekly_summary)
# Check detection conditions
cat("\nDetection analysis:\n")
# Count days meeting different thresholds
low_ci_1.5 <- sum(window_data$mean_ci < 1.5, na.rm = TRUE)
low_ci_2.0 <- sum(window_data$mean_ci < 2.0, na.rm = TRUE)
low_ci_2.5 <- sum(window_data$mean_ci < 2.5, na.rm = TRUE)
drop_0.3 <- sum(window_data$ci_drop > 0.3, na.rm = TRUE)
drop_0.5 <- sum(window_data$ci_drop > 0.5, na.rm = TRUE)
drop_0.8 <- sum(window_data$ci_drop > 0.8, na.rm = TRUE)
cat(" Days with CI < 1.5:", low_ci_1.5, "\n")
cat(" Days with CI < 2.0:", low_ci_2.0, "\n")
cat(" Days with CI < 2.5:", low_ci_2.5, "\n")
cat(" Days with drop > 0.3:", drop_0.3, "\n")
cat(" Days with drop > 0.5:", drop_0.5, "\n")
cat(" Days with drop > 0.8:", drop_0.8, "\n")
# Check if there's a sustained low period
consecutive_low <- window_data %>%
mutate(is_low = mean_ci < 2.0) %>%
filter(is_low) %>%
mutate(
date_diff = as.numeric(date - lag(date)),
new_group = is.na(date_diff) | date_diff > 3
) %>%
mutate(group_id = cumsum(new_group)) %>%
group_by(group_id) %>%
summarise(
start = min(date),
end = max(date),
duration = n(),
mean_ci = mean(mean_ci),
.groups = "drop"
) %>%
arrange(desc(duration))
if (nrow(consecutive_low) > 0) {
cat("\nLongest consecutive low CI periods (CI < 2.0):\n")
print(head(consecutive_low, 3))
} else {
cat("\nNo sustained low CI periods found (CI < 2.0)\n")
}
} else {
cat("No data available in this window\n")
}
}
}
cat("\n\n=== SUMMARY ===\n")
cat("This analysis shows why harvests were missed.\n")
cat("If CI doesn't drop low enough OR doesn't stay low for enough consecutive days,\n")
cat("the detection algorithm won't trigger.\n")