# ============================================================================ # OPERATIONAL HARVEST ALERT SYSTEM # Two-stage detection optimized for daily factory operations # ============================================================================ # STAGE 1: Advance Warning (2-3 weeks ahead) # - 7-day rolling avg CI < 2.5 for 5+ consecutive days # - Alerts factory to monitor field closely # - Escalates over time: WATCH → PREPARE → IMMINENT # # STAGE 2: Harvest Confirmation (day after harvest) # - Sharp drop (≥1.0) within 3-7 days AND CI stays below 2.0 # - Confirms harvest occurred # - Prioritizes Stage 1 alerted fields # ============================================================================ suppressPackageStartupMessages({ library(readxl) library(dplyr) library(tidyr) library(lubridate) library(here) library(zoo) # For rolling averages }) # Set project directory project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) if (basename(getwd()) == "harvest_prediction") { setwd("../../..") } source(here("r_app", "parameters_project.R")) # ============================================================================ # CONFIGURATION # ============================================================================ CONFIG <- list( # STAGE 1: Advance warning thresholds rolling_window_days = 7, # Rolling average window ci_threshold_rolling = 2.5, # 7-day avg below this sustained_days = 5, # Consecutive days below threshold min_field_age_days = 240, # 8 months minimum # Alert escalation timing (days since first Stage 1 alert) watch_days = 0, # 0-7 days: WATCH prepare_days = 7, # 7-14 days: PREPARE imminent_days = 14, # 14+ days: IMMINENT # STAGE 2: Harvest confirmation thresholds sharp_drop_threshold = 1.0, # CI drop within window sharp_drop_window = 7, # Days to measure drop post_harvest_ci = 2.0, # CI stays below this after harvest confirmation_days = 2, # Days to confirm stable low CI # Validation settings test_window_days = 21 ) cat("============================================================================\n") cat("OPERATIONAL HARVEST ALERT SYSTEM\n") cat("Optimized for daily factory operations\n") cat("============================================================================\n\n") cat("STAGE 1 - ADVANCE WARNING:\n") cat(" - 7-day rolling avg CI <", CONFIG$ci_threshold_rolling, "for", CONFIG$sustained_days, "consecutive days\n") cat(" - Provides 2-3 weeks advance notice\n") cat(" - Escalates: WATCH → PREPARE → IMMINENT\n\n") cat("STAGE 2 - HARVEST CONFIRMATION:\n") cat(" - Sharp drop (≥", CONFIG$sharp_drop_threshold, ") within", CONFIG$sharp_drop_window, "days\n") cat(" - AND CI stays below", CONFIG$post_harvest_ci, "for", CONFIG$confirmation_days, "days\n") cat(" - Detects day after harvest (better confidence)\n\n") # ============================================================================ # LOAD DATA # ============================================================================ cat("=== LOADING DATA ===\n\n") ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds") ci_data_raw <- readRDS(ci_rds_file) %>% ungroup() time_series_daily <- ci_data_raw %>% mutate(date = as.Date(Date)) %>% select(field_id = field, date, mean_ci = FitData) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(!is.na(season_end)) fields_with_ci <- unique(time_series_daily$field_id) harvest_data_filtered <- harvest_data %>% filter(field %in% fields_with_ci) %>% arrange(field, season_end) cat("Fields:", length(fields_with_ci), "\n") cat("Harvest events:", nrow(harvest_data_filtered), "\n\n") # ============================================================================ # CALCULATE ROLLING AVERAGES # ============================================================================ cat("=== CALCULATING 7-DAY ROLLING AVERAGES ===\n\n") time_series_with_rolling <- time_series_daily %>% group_by(field_id) %>% arrange(date) %>% mutate( ci_rolling_7d = rollapply(mean_ci, width = CONFIG$rolling_window_days, FUN = mean, align = "right", fill = NA, na.rm = TRUE) ) %>% ungroup() cat("Rolling averages calculated\n\n") # ============================================================================ # STAGE 1: ADVANCE WARNING DETECTION # ============================================================================ detect_stage1_alert <- function(field_ts, check_date, last_harvest_date, first_alert_date = NULL, config = CONFIG) { # Check field age if (is.null(last_harvest_date) || is.na(last_harvest_date)) { earliest_date <- min(field_ts$date, na.rm = TRUE) field_age <- as.numeric(check_date - earliest_date) } else { field_age <- as.numeric(check_date - last_harvest_date) } if (field_age < config$min_field_age_days) { return(list( stage1_alert = FALSE, stage1_level = "too_young", consecutive_days = 0, rolling_ci = NA, first_alert_date = NA )) } # Get rolling average on check date current_rolling <- field_ts %>% filter(date == check_date) %>% pull(ci_rolling_7d) if (length(current_rolling) == 0 || is.na(current_rolling[1])) { return(list( stage1_alert = FALSE, stage1_level = "no_data", consecutive_days = 0, rolling_ci = NA, first_alert_date = NA )) } current_rolling <- current_rolling[1] # Count consecutive days with rolling avg below threshold recent_data <- field_ts %>% filter(date <= check_date, date >= check_date - 30) %>% arrange(desc(date)) consecutive_days <- 0 for (i in 1:nrow(recent_data)) { if (!is.na(recent_data$ci_rolling_7d[i]) && recent_data$ci_rolling_7d[i] <= config$ci_threshold_rolling) { consecutive_days <- consecutive_days + 1 } else { break } } # Determine alert status and level stage1_alert <- FALSE stage1_level <- "none" new_first_alert_date <- first_alert_date if (consecutive_days >= config$sustained_days) { stage1_alert <- TRUE # Track when alert first triggered if (is.null(first_alert_date) || is.na(first_alert_date)) { new_first_alert_date <- check_date } # Escalate alert level based on days since first alert if (!is.null(new_first_alert_date) && !is.na(new_first_alert_date)) { days_since_first_alert <- as.numeric(check_date - new_first_alert_date) if (days_since_first_alert >= config$imminent_days) { stage1_level <- "IMMINENT" # 14+ days: harvest very soon } else if (days_since_first_alert >= config$prepare_days) { stage1_level <- "PREPARE" # 7-14 days: get ready } else { stage1_level <- "WATCH" # 0-7 days: monitor closely } } else { stage1_level <- "WATCH" } } return(list( stage1_alert = stage1_alert, stage1_level = stage1_level, consecutive_days = consecutive_days, rolling_ci = current_rolling, first_alert_date = new_first_alert_date )) } # ============================================================================ # STAGE 2: HARVEST CONFIRMATION DETECTION # ============================================================================ detect_stage2_alert <- function(field_ts, check_date, config = CONFIG) { # Get current CI current_ci <- field_ts %>% filter(date == check_date) %>% pull(mean_ci) if (length(current_ci) == 0 || is.na(current_ci[1])) { return(list( stage2_alert = FALSE, stage2_level = "no_data", ci_drop = NA, current_ci = NA )) } current_ci <- current_ci[1] # Get CI from 7 days ago baseline_ci <- field_ts %>% filter(date >= check_date - config$sharp_drop_window - 3, date <= check_date - config$sharp_drop_window + 3) %>% summarise(mean_ci = mean(mean_ci, na.rm = TRUE)) %>% pull(mean_ci) if (length(baseline_ci) == 0 || is.na(baseline_ci)) { return(list( stage2_alert = FALSE, stage2_level = "no_baseline", ci_drop = NA, current_ci = current_ci )) } # Calculate drop ci_drop <- baseline_ci - current_ci # Check for sharp drop AND sustained low CI stage2_alert <- FALSE stage2_level <- "none" if (ci_drop >= config$sharp_drop_threshold && current_ci <= config$post_harvest_ci) { # Confirm CI stays low for multiple days recent_low_days <- field_ts %>% filter(date <= check_date, date >= check_date - config$confirmation_days) %>% filter(mean_ci <= config$post_harvest_ci) %>% nrow() if (recent_low_days >= config$confirmation_days) { stage2_alert <- TRUE stage2_level <- "CONFIRMED" } else { stage2_alert <- TRUE stage2_level <- "POSSIBLE" } } return(list( stage2_alert = stage2_alert, stage2_level = stage2_level, ci_drop = ci_drop, current_ci = current_ci, baseline_ci = baseline_ci )) } # ============================================================================ # VALIDATION FUNCTION # ============================================================================ validate_operational_system <- function(field_id) { field_ts <- time_series_with_rolling %>% filter(field_id == !!field_id) %>% arrange(date) field_harvests <- harvest_data_filtered %>% filter(field == field_id) %>% arrange(season_end) if (nrow(field_harvests) == 0) return(NULL) all_results <- data.frame() for (h in 1:nrow(field_harvests)) { harvest_date <- field_harvests$season_end[h] last_harvest <- if (h == 1) NA else field_harvests$season_end[h - 1] test_dates_seq <- seq.Date( from = harvest_date - CONFIG$test_window_days, to = harvest_date + 14, by = "1 day" ) first_alert_date_tracked <- NA for (i in 1:length(test_dates_seq)) { test_date <- test_dates_seq[i] days_from_harvest <- as.numeric(test_date - harvest_date) # Stage 1 with alert escalation stage1 <- detect_stage1_alert(field_ts, test_date, last_harvest, first_alert_date_tracked, CONFIG) # Update tracked first alert date if (stage1$stage1_alert && !is.na(stage1$first_alert_date)) { first_alert_date_tracked <- stage1$first_alert_date } # Stage 2 stage2 <- detect_stage2_alert(field_ts, test_date, CONFIG) if (length(stage1$rolling_ci) > 0 && !is.na(stage1$rolling_ci)) { all_results <- bind_rows(all_results, data.frame( field = field_id, harvest_event = h, harvest_date = harvest_date, test_date = test_date, days_from_harvest = days_from_harvest, stage1_alert = stage1$stage1_alert, stage1_level = stage1$stage1_level, stage2_alert = stage2$stage2_alert, stage2_level = stage2$stage2_level, rolling_ci = stage1$rolling_ci, consecutive_days = stage1$consecutive_days, ci_drop = ifelse(is.null(stage2$ci_drop), NA, stage2$ci_drop) )) } } } return(all_results) } # ============================================================================ # RUN FULL VALIDATION # ============================================================================ cat("============================================================================\n") cat("VALIDATING ON FULL DATASET\n") cat("============================================================================\n\n") all_results <- data.frame() summary_stats <- data.frame() fields_to_test <- unique(harvest_data_filtered$field) total_fields <- length(fields_to_test) cat("Testing", total_fields, "fields...\n\n") pb <- txtProgressBar(min = 0, max = total_fields, style = 3) for (f in 1:total_fields) { field_id <- fields_to_test[f] field_results <- validate_operational_system(field_id) if (!is.null(field_results) && nrow(field_results) > 0) { all_results <- bind_rows(all_results, field_results) # Calculate success rates field_harvests_count <- length(unique(field_results$harvest_event)) # Stage 1: Any alert in 7-21 days before harvest stage1_success <- field_results %>% filter(stage1_alert == TRUE, days_from_harvest >= -21, days_from_harvest <= -7) %>% distinct(harvest_event) %>% nrow() # Stage 2: Detection within 1-3 days after harvest stage2_success <- field_results %>% filter(stage2_alert == TRUE, stage2_level == "CONFIRMED", days_from_harvest >= 0, days_from_harvest <= 3) %>% distinct(harvest_event) %>% nrow() summary_stats <- bind_rows(summary_stats, data.frame( field = field_id, total_harvests = field_harvests_count, stage1_success = stage1_success, stage2_success = stage2_success, stage1_rate = round(100 * stage1_success / field_harvests_count, 1), stage2_rate = round(100 * stage2_success / field_harvests_count, 1) )) } setTxtProgressBar(pb, f) } close(pb) # ============================================================================ # RESULTS # ============================================================================ cat("\n\n============================================================================\n") cat("RESULTS BY FIELD\n") cat("============================================================================\n\n") print(summary_stats, row.names = FALSE) cat("\n============================================================================\n") cat("OVERALL PERFORMANCE\n") cat("============================================================================\n\n") total_harvests <- sum(summary_stats$total_harvests) total_stage1 <- sum(summary_stats$stage1_success) total_stage2 <- sum(summary_stats$stage2_success) cat("Total harvest events:", total_harvests, "\n\n") cat("STAGE 1 - ADVANCE WARNING (7-21 days ahead):\n") cat(" Success:", total_stage1, "/", total_harvests, "(", round(100 * total_stage1 / total_harvests, 1), "% )\n") cat(" Fields with >50% success:", sum(summary_stats$stage1_rate > 50), "/", total_fields, "\n\n") cat("STAGE 2 - HARVEST CONFIRMATION (0-3 days after):\n") cat(" Success:", total_stage2, "/", total_harvests, "(", round(100 * total_stage2 / total_harvests, 1), "% )\n") cat(" Fields with >50% success:", sum(summary_stats$stage2_rate > 50), "/", total_fields, "\n\n") # Alert escalation analysis if (nrow(all_results) > 0) { cat("STAGE 1 ALERT ESCALATION BREAKDOWN:\n") escalation_breakdown <- all_results %>% filter(stage1_alert == TRUE, days_from_harvest < 0) %>% group_by(stage1_level) %>% summarise(count = n()) %>% arrange(match(stage1_level, c("WATCH", "PREPARE", "IMMINENT"))) print(escalation_breakdown, row.names = FALSE) cat("\n") } cat("============================================================================\n") cat("TOP PERFORMING FIELDS\n") cat("============================================================================\n\n") cat("STAGE 1 (Advance Warning):\n") top_stage1 <- summary_stats %>% arrange(desc(stage1_rate)) %>% head(5) print(top_stage1, row.names = FALSE) cat("\n\nSTAGE 2 (Harvest Confirmation):\n") top_stage2 <- summary_stats %>% arrange(desc(stage2_rate)) %>% head(5) print(top_stage2, row.names = FALSE) cat("\n============================================================================\n") cat("OPERATIONAL IMPLEMENTATION\n") cat("============================================================================\n\n") cat("🏭 DAILY WORKFLOW:\n\n") cat(" 1. Run this script each morning\n") cat(" 2. Review ALL ACTIVE ALERTS (status report for all fields)\n\n") cat(" STAGE 1 ESCALATION:\n") cat(" - WATCH: Field entered harvest window, monitor closely\n") cat(" - PREPARE: 1 week in alert, prepare logistics (7-14 days total)\n") cat(" - IMMINENT: 2+ weeks in alert, harvest very soon (14+ days total)\n\n") cat(" STAGE 2 CONFIRMATION:\n") cat(" - POSSIBLE: Sharp CI drop detected, likely harvested\n") cat(" - CONFIRMED: Sustained low CI for 2+ days, harvest confirmed\n\n") cat(" Priority: Stage 1 alerted fields get Stage 2 monitoring\n") cat(" Detection: Day after harvest (better satellite coverage = higher confidence)\n\n") # Save results output_file <- here("r_app/experiments/harvest_prediction/operational_validation_results.rds") saveRDS(list( all_results = all_results, summary = summary_stats, config = CONFIG ), output_file) cat("============================================================================\n") cat("Results saved to:", output_file, "\n") cat("============================================================================\n")