SmartCane/r_app/experiments/harvest_prediction/old/operational_harvest_alerts.R
2026-01-06 14:17:37 +01:00

501 lines
17 KiB
R

# ============================================================================
# OPERATIONAL HARVEST ALERT SYSTEM
# Two-stage detection optimized for daily factory operations
# ============================================================================
# STAGE 1: Advance Warning (2-3 weeks ahead)
# - 7-day rolling avg CI < 2.5 for 5+ consecutive days
# - Alerts factory to monitor field closely
# - Escalates over time: WATCH → PREPARE → IMMINENT
#
# STAGE 2: Harvest Confirmation (day after harvest)
# - Sharp drop (≥1.0) within 3-7 days AND CI stays below 2.0
# - Confirms harvest occurred
# - Prioritizes Stage 1 alerted fields
# ============================================================================
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(here)
library(zoo) # For rolling averages
})
# Set project directory
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
if (basename(getwd()) == "harvest_prediction") {
setwd("../../..")
}
source(here("r_app", "parameters_project.R"))
# ============================================================================
# CONFIGURATION
# ============================================================================
CONFIG <- list(
# STAGE 1: Advance warning thresholds
rolling_window_days = 7, # Rolling average window
ci_threshold_rolling = 2.5, # 7-day avg below this
sustained_days = 5, # Consecutive days below threshold
min_field_age_days = 240, # 8 months minimum
# Alert escalation timing (days since first Stage 1 alert)
watch_days = 0, # 0-7 days: WATCH
prepare_days = 7, # 7-14 days: PREPARE
imminent_days = 14, # 14+ days: IMMINENT
# STAGE 2: Harvest confirmation thresholds
sharp_drop_threshold = 1.0, # CI drop within window
sharp_drop_window = 7, # Days to measure drop
post_harvest_ci = 2.0, # CI stays below this after harvest
confirmation_days = 2, # Days to confirm stable low CI
# Validation settings
test_window_days = 21
)
cat("============================================================================\n")
cat("OPERATIONAL HARVEST ALERT SYSTEM\n")
cat("Optimized for daily factory operations\n")
cat("============================================================================\n\n")
cat("STAGE 1 - ADVANCE WARNING:\n")
cat(" - 7-day rolling avg CI <", CONFIG$ci_threshold_rolling, "for", CONFIG$sustained_days, "consecutive days\n")
cat(" - Provides 2-3 weeks advance notice\n")
cat(" - Escalates: WATCH → PREPARE → IMMINENT\n\n")
cat("STAGE 2 - HARVEST CONFIRMATION:\n")
cat(" - Sharp drop (≥", CONFIG$sharp_drop_threshold, ") within", CONFIG$sharp_drop_window, "days\n")
cat(" - AND CI stays below", CONFIG$post_harvest_ci, "for", CONFIG$confirmation_days, "days\n")
cat(" - Detects day after harvest (better confidence)\n\n")
# ============================================================================
# LOAD DATA
# ============================================================================
cat("=== LOADING DATA ===\n\n")
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series_daily <- ci_data_raw %>%
mutate(date = as.Date(Date)) %>%
select(field_id = field, date, mean_ci = FitData) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
fields_with_ci <- unique(time_series_daily$field_id)
harvest_data_filtered <- harvest_data %>%
filter(field %in% fields_with_ci) %>%
arrange(field, season_end)
cat("Fields:", length(fields_with_ci), "\n")
cat("Harvest events:", nrow(harvest_data_filtered), "\n\n")
# ============================================================================
# CALCULATE ROLLING AVERAGES
# ============================================================================
cat("=== CALCULATING 7-DAY ROLLING AVERAGES ===\n\n")
time_series_with_rolling <- time_series_daily %>%
group_by(field_id) %>%
arrange(date) %>%
mutate(
ci_rolling_7d = rollapply(mean_ci, width = CONFIG$rolling_window_days,
FUN = mean, align = "right", fill = NA, na.rm = TRUE)
) %>%
ungroup()
cat("Rolling averages calculated\n\n")
# ============================================================================
# STAGE 1: ADVANCE WARNING DETECTION
# ============================================================================
detect_stage1_alert <- function(field_ts, check_date, last_harvest_date,
first_alert_date = NULL, config = CONFIG) {
# Check field age
if (is.null(last_harvest_date) || is.na(last_harvest_date)) {
earliest_date <- min(field_ts$date, na.rm = TRUE)
field_age <- as.numeric(check_date - earliest_date)
} else {
field_age <- as.numeric(check_date - last_harvest_date)
}
if (field_age < config$min_field_age_days) {
return(list(
stage1_alert = FALSE,
stage1_level = "too_young",
consecutive_days = 0,
rolling_ci = NA,
first_alert_date = NA
))
}
# Get rolling average on check date
current_rolling <- field_ts %>%
filter(date == check_date) %>%
pull(ci_rolling_7d)
if (length(current_rolling) == 0 || is.na(current_rolling[1])) {
return(list(
stage1_alert = FALSE,
stage1_level = "no_data",
consecutive_days = 0,
rolling_ci = NA,
first_alert_date = NA
))
}
current_rolling <- current_rolling[1]
# Count consecutive days with rolling avg below threshold
recent_data <- field_ts %>%
filter(date <= check_date, date >= check_date - 30) %>%
arrange(desc(date))
consecutive_days <- 0
for (i in 1:nrow(recent_data)) {
if (!is.na(recent_data$ci_rolling_7d[i]) &&
recent_data$ci_rolling_7d[i] <= config$ci_threshold_rolling) {
consecutive_days <- consecutive_days + 1
} else {
break
}
}
# Determine alert status and level
stage1_alert <- FALSE
stage1_level <- "none"
new_first_alert_date <- first_alert_date
if (consecutive_days >= config$sustained_days) {
stage1_alert <- TRUE
# Track when alert first triggered
if (is.null(first_alert_date) || is.na(first_alert_date)) {
new_first_alert_date <- check_date
}
# Escalate alert level based on days since first alert
if (!is.null(new_first_alert_date) && !is.na(new_first_alert_date)) {
days_since_first_alert <- as.numeric(check_date - new_first_alert_date)
if (days_since_first_alert >= config$imminent_days) {
stage1_level <- "IMMINENT" # 14+ days: harvest very soon
} else if (days_since_first_alert >= config$prepare_days) {
stage1_level <- "PREPARE" # 7-14 days: get ready
} else {
stage1_level <- "WATCH" # 0-7 days: monitor closely
}
} else {
stage1_level <- "WATCH"
}
}
return(list(
stage1_alert = stage1_alert,
stage1_level = stage1_level,
consecutive_days = consecutive_days,
rolling_ci = current_rolling,
first_alert_date = new_first_alert_date
))
}
# ============================================================================
# STAGE 2: HARVEST CONFIRMATION DETECTION
# ============================================================================
detect_stage2_alert <- function(field_ts, check_date, config = CONFIG) {
# Get current CI
current_ci <- field_ts %>%
filter(date == check_date) %>%
pull(mean_ci)
if (length(current_ci) == 0 || is.na(current_ci[1])) {
return(list(
stage2_alert = FALSE,
stage2_level = "no_data",
ci_drop = NA,
current_ci = NA
))
}
current_ci <- current_ci[1]
# Get CI from 7 days ago
baseline_ci <- field_ts %>%
filter(date >= check_date - config$sharp_drop_window - 3,
date <= check_date - config$sharp_drop_window + 3) %>%
summarise(mean_ci = mean(mean_ci, na.rm = TRUE)) %>%
pull(mean_ci)
if (length(baseline_ci) == 0 || is.na(baseline_ci)) {
return(list(
stage2_alert = FALSE,
stage2_level = "no_baseline",
ci_drop = NA,
current_ci = current_ci
))
}
# Calculate drop
ci_drop <- baseline_ci - current_ci
# Check for sharp drop AND sustained low CI
stage2_alert <- FALSE
stage2_level <- "none"
if (ci_drop >= config$sharp_drop_threshold &&
current_ci <= config$post_harvest_ci) {
# Confirm CI stays low for multiple days
recent_low_days <- field_ts %>%
filter(date <= check_date, date >= check_date - config$confirmation_days) %>%
filter(mean_ci <= config$post_harvest_ci) %>%
nrow()
if (recent_low_days >= config$confirmation_days) {
stage2_alert <- TRUE
stage2_level <- "CONFIRMED"
} else {
stage2_alert <- TRUE
stage2_level <- "POSSIBLE"
}
}
return(list(
stage2_alert = stage2_alert,
stage2_level = stage2_level,
ci_drop = ci_drop,
current_ci = current_ci,
baseline_ci = baseline_ci
))
}
# ============================================================================
# VALIDATION FUNCTION
# ============================================================================
validate_operational_system <- function(field_id) {
field_ts <- time_series_with_rolling %>%
filter(field_id == !!field_id) %>%
arrange(date)
field_harvests <- harvest_data_filtered %>%
filter(field == field_id) %>%
arrange(season_end)
if (nrow(field_harvests) == 0) return(NULL)
all_results <- data.frame()
for (h in 1:nrow(field_harvests)) {
harvest_date <- field_harvests$season_end[h]
last_harvest <- if (h == 1) NA else field_harvests$season_end[h - 1]
test_dates_seq <- seq.Date(
from = harvest_date - CONFIG$test_window_days,
to = harvest_date + 14,
by = "1 day"
)
first_alert_date_tracked <- NA
for (i in 1:length(test_dates_seq)) {
test_date <- test_dates_seq[i]
days_from_harvest <- as.numeric(test_date - harvest_date)
# Stage 1 with alert escalation
stage1 <- detect_stage1_alert(field_ts, test_date, last_harvest,
first_alert_date_tracked, CONFIG)
# Update tracked first alert date
if (stage1$stage1_alert && !is.na(stage1$first_alert_date)) {
first_alert_date_tracked <- stage1$first_alert_date
}
# Stage 2
stage2 <- detect_stage2_alert(field_ts, test_date, CONFIG)
if (length(stage1$rolling_ci) > 0 && !is.na(stage1$rolling_ci)) {
all_results <- bind_rows(all_results, data.frame(
field = field_id,
harvest_event = h,
harvest_date = harvest_date,
test_date = test_date,
days_from_harvest = days_from_harvest,
stage1_alert = stage1$stage1_alert,
stage1_level = stage1$stage1_level,
stage2_alert = stage2$stage2_alert,
stage2_level = stage2$stage2_level,
rolling_ci = stage1$rolling_ci,
consecutive_days = stage1$consecutive_days,
ci_drop = ifelse(is.null(stage2$ci_drop), NA, stage2$ci_drop)
))
}
}
}
return(all_results)
}
# ============================================================================
# RUN FULL VALIDATION
# ============================================================================
cat("============================================================================\n")
cat("VALIDATING ON FULL DATASET\n")
cat("============================================================================\n\n")
all_results <- data.frame()
summary_stats <- data.frame()
fields_to_test <- unique(harvest_data_filtered$field)
total_fields <- length(fields_to_test)
cat("Testing", total_fields, "fields...\n\n")
pb <- txtProgressBar(min = 0, max = total_fields, style = 3)
for (f in 1:total_fields) {
field_id <- fields_to_test[f]
field_results <- validate_operational_system(field_id)
if (!is.null(field_results) && nrow(field_results) > 0) {
all_results <- bind_rows(all_results, field_results)
# Calculate success rates
field_harvests_count <- length(unique(field_results$harvest_event))
# Stage 1: Any alert in 7-21 days before harvest
stage1_success <- field_results %>%
filter(stage1_alert == TRUE,
days_from_harvest >= -21,
days_from_harvest <= -7) %>%
distinct(harvest_event) %>%
nrow()
# Stage 2: Detection within 1-3 days after harvest
stage2_success <- field_results %>%
filter(stage2_alert == TRUE,
stage2_level == "CONFIRMED",
days_from_harvest >= 0,
days_from_harvest <= 3) %>%
distinct(harvest_event) %>%
nrow()
summary_stats <- bind_rows(summary_stats, data.frame(
field = field_id,
total_harvests = field_harvests_count,
stage1_success = stage1_success,
stage2_success = stage2_success,
stage1_rate = round(100 * stage1_success / field_harvests_count, 1),
stage2_rate = round(100 * stage2_success / field_harvests_count, 1)
))
}
setTxtProgressBar(pb, f)
}
close(pb)
# ============================================================================
# RESULTS
# ============================================================================
cat("\n\n============================================================================\n")
cat("RESULTS BY FIELD\n")
cat("============================================================================\n\n")
print(summary_stats, row.names = FALSE)
cat("\n============================================================================\n")
cat("OVERALL PERFORMANCE\n")
cat("============================================================================\n\n")
total_harvests <- sum(summary_stats$total_harvests)
total_stage1 <- sum(summary_stats$stage1_success)
total_stage2 <- sum(summary_stats$stage2_success)
cat("Total harvest events:", total_harvests, "\n\n")
cat("STAGE 1 - ADVANCE WARNING (7-21 days ahead):\n")
cat(" Success:", total_stage1, "/", total_harvests,
"(", round(100 * total_stage1 / total_harvests, 1), "% )\n")
cat(" Fields with >50% success:", sum(summary_stats$stage1_rate > 50), "/", total_fields, "\n\n")
cat("STAGE 2 - HARVEST CONFIRMATION (0-3 days after):\n")
cat(" Success:", total_stage2, "/", total_harvests,
"(", round(100 * total_stage2 / total_harvests, 1), "% )\n")
cat(" Fields with >50% success:", sum(summary_stats$stage2_rate > 50), "/", total_fields, "\n\n")
# Alert escalation analysis
if (nrow(all_results) > 0) {
cat("STAGE 1 ALERT ESCALATION BREAKDOWN:\n")
escalation_breakdown <- all_results %>%
filter(stage1_alert == TRUE, days_from_harvest < 0) %>%
group_by(stage1_level) %>%
summarise(count = n()) %>%
arrange(match(stage1_level, c("WATCH", "PREPARE", "IMMINENT")))
print(escalation_breakdown, row.names = FALSE)
cat("\n")
}
cat("============================================================================\n")
cat("TOP PERFORMING FIELDS\n")
cat("============================================================================\n\n")
cat("STAGE 1 (Advance Warning):\n")
top_stage1 <- summary_stats %>% arrange(desc(stage1_rate)) %>% head(5)
print(top_stage1, row.names = FALSE)
cat("\n\nSTAGE 2 (Harvest Confirmation):\n")
top_stage2 <- summary_stats %>% arrange(desc(stage2_rate)) %>% head(5)
print(top_stage2, row.names = FALSE)
cat("\n============================================================================\n")
cat("OPERATIONAL IMPLEMENTATION\n")
cat("============================================================================\n\n")
cat("🏭 DAILY WORKFLOW:\n\n")
cat(" 1. Run this script each morning\n")
cat(" 2. Review ALL ACTIVE ALERTS (status report for all fields)\n\n")
cat(" STAGE 1 ESCALATION:\n")
cat(" - WATCH: Field entered harvest window, monitor closely\n")
cat(" - PREPARE: 1 week in alert, prepare logistics (7-14 days total)\n")
cat(" - IMMINENT: 2+ weeks in alert, harvest very soon (14+ days total)\n\n")
cat(" STAGE 2 CONFIRMATION:\n")
cat(" - POSSIBLE: Sharp CI drop detected, likely harvested\n")
cat(" - CONFIRMED: Sustained low CI for 2+ days, harvest confirmed\n\n")
cat(" Priority: Stage 1 alerted fields get Stage 2 monitoring\n")
cat(" Detection: Day after harvest (better satellite coverage = higher confidence)\n\n")
# Save results
output_file <- here("r_app/experiments/harvest_prediction/operational_validation_results.rds")
saveRDS(list(
all_results = all_results,
summary = summary_stats,
config = CONFIG
), output_file)
cat("============================================================================\n")
cat("Results saved to:", output_file, "\n")
cat("============================================================================\n")