# ============================================================================ # COMPLETE HARVEST ALERT SYSTEM # Two-stage approach for factory logistics planning # ============================================================================ # STAGE 1: HARVEST WINDOW PREDICTION (7-21 days ahead) # Alert factory that harvest is coming soon # # STAGE 2: HARVEST EVENT DETECTION (0-7 days after) # Confirm that harvest has actually occurred # ============================================================================ suppressPackageStartupMessages({ library(readxl) library(dplyr) library(tidyr) library(lubridate) library(here) library(ggplot2) }) # Set project directory project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) # Navigate to project root if in experiments folder if (basename(getwd()) == "harvest_prediction") { setwd("../../..") } source(here("r_app", "parameters_project.R")) # ============================================================================ # CONFIGURATION # ============================================================================ CONFIG <- list( # STAGE 1: Prediction thresholds min_field_age_days = 240, ci_threshold_low = 2.5, ci_threshold_very_low = 1.5, sustained_low_days = 5, min_days_since_harvest = 200, # STAGE 2: Detection thresholds (independent of Stage 1) harvest_confirmed_ci = 1.5, # Sustained very low CI = harvest occurred confirmation_days = 3 # Consecutive days below threshold ) cat("============================================================================\n") cat("COMPLETE HARVEST ALERT SYSTEM - TWO STAGE APPROACH\n") cat("============================================================================\n\n") cat("STAGE 1: HARVEST WINDOW PREDICTION\n") cat(" - Alert when CI sustained low (crop mature)\n") cat(" - Provides 7-21 days advance warning\n") cat(" - Factory can plan logistics\n\n") cat("STAGE 2: HARVEST EVENT DETECTION\n") cat(" - Detect sustained very low CI (bare soil)\n") cat(" - CI < 1.5 for 3 consecutive days\n") cat(" - Independent of Stage 1\n") cat(" - Confirms harvest has occurred\n\n") cat("Configuration:\n") cat(" Min field age:", CONFIG$min_field_age_days, "days\n") cat(" Mature crop CI:", CONFIG$ci_threshold_low, "\n") cat(" Sustained low days:", CONFIG$sustained_low_days, "\n") cat(" Harvest confirmed CI:", CONFIG$harvest_confirmed_ci, "\n") cat(" Confirmation days:", CONFIG$confirmation_days, "\n\n") # ============================================================================ # LOAD DATA # ============================================================================ cat("=== LOADING DATA ===\n\n") ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds") ci_data_raw <- readRDS(ci_rds_file) %>% ungroup() time_series_daily <- ci_data_raw %>% mutate(date = as.Date(Date)) %>% select(field_id = field, date, mean_ci = FitData) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(!is.na(season_end)) fields_with_ci <- unique(time_series_daily$field_id) harvest_data_filtered <- harvest_data %>% filter(field %in% fields_with_ci) %>% arrange(field, season_end) cat("Fields with CI data:", length(fields_with_ci), "\n") cat("Total harvest events:", nrow(harvest_data_filtered), "\n\n") # ============================================================================ # STAGE 1: HARVEST WINDOW PREDICTION # ============================================================================ predict_harvest_window <- function(field_ts, check_date, last_harvest_date, config = CONFIG) { current_ci <- field_ts %>% filter(date == check_date) %>% pull(mean_ci) if (length(current_ci) == 0 || is.na(current_ci[1])) { return(list(stage1_alert = FALSE, stage1_level = "no_data", consecutive_days = 0, current_ci = NA)) } # Take first value if multiple current_ci <- current_ci[1] # Calculate field age if (is.null(last_harvest_date) || is.na(last_harvest_date)) { earliest_date <- min(field_ts$date, na.rm = TRUE) field_age <- as.numeric(check_date - earliest_date) } else { field_age <- as.numeric(check_date - last_harvest_date) } if (field_age < config$min_field_age_days) { return(list(stage1_alert = FALSE, stage1_level = "too_young", consecutive_days = 0)) } # Count consecutive days with CI below threshold recent_data <- field_ts %>% filter(date <= check_date, date >= check_date - 30) %>% arrange(desc(date)) consecutive_days_low <- 0 for (i in 1:nrow(recent_data)) { if (recent_data$mean_ci[i] <= config$ci_threshold_low) { consecutive_days_low <- consecutive_days_low + 1 } else { break } } mean_ci_sustained <- if (consecutive_days_low > 0) { recent_data %>% slice(1:consecutive_days_low) %>% summarise(mean = mean(mean_ci, na.rm = TRUE)) %>% pull(mean) } else { NA } # Determine alert level stage1_alert <- FALSE stage1_level <- "none" if (consecutive_days_low >= config$sustained_low_days) { stage1_alert <- TRUE if (!is.na(mean_ci_sustained) && mean_ci_sustained <= config$ci_threshold_very_low) { stage1_level <- "imminent" # 7 days } else { stage1_level <- "likely" # 7-14 days } } else if (consecutive_days_low >= 3) { stage1_alert <- TRUE stage1_level <- "possible" # 14-21 days } return(list( stage1_alert = stage1_alert, stage1_level = stage1_level, consecutive_days = consecutive_days_low, current_ci = current_ci, mean_ci_sustained = mean_ci_sustained )) } # ============================================================================ # STAGE 2: HARVEST EVENT DETECTION # ============================================================================ detect_harvest_event <- function(field_ts, check_date, last_harvest_date, config = CONFIG) { current_ci <- field_ts %>% filter(date == check_date) %>% pull(mean_ci) if (length(current_ci) == 0 || is.na(current_ci[1])) { return(list(stage2_alert = FALSE, stage2_level = "no_data", current_ci = NA)) } # Take first value if multiple (shouldn't happen but safety) current_ci <- current_ci[1] # STAGE 2: Detect sustained very low CI (bare soil after harvest) # Independent of Stage 1 - works in parallel stage2_alert <- FALSE stage2_level <- "none" # Get recent days for consecutive low CI check recent_window <- field_ts %>% filter(date <= check_date, date >= check_date - config$confirmation_days + 1) %>% arrange(date) # Count consecutive days below harvest confirmation threshold if (nrow(recent_window) >= config$confirmation_days) { consecutive_low_days <- 0 for (i in nrow(recent_window):1) { if (!is.na(recent_window$mean_ci[i]) && recent_window$mean_ci[i] <= config$harvest_confirmed_ci) { consecutive_low_days <- consecutive_low_days + 1 } else { break } } # Sustained very low CI = harvest occurred if (consecutive_low_days >= config$confirmation_days) { stage2_alert <- TRUE stage2_level <- "confirmed" } } return(list( stage2_alert = stage2_alert, stage2_level = stage2_level, current_ci = current_ci, consecutive_low_days = if (exists("consecutive_low_days")) consecutive_low_days else 0 )) } # ============================================================================ # COMBINED VALIDATION # ============================================================================ validate_two_stage_system <- function(field_id) { field_ts <- time_series_daily %>% filter(field_id == !!field_id) %>% arrange(date) field_harvests <- harvest_data_filtered %>% filter(field == field_id) %>% arrange(season_end) if (nrow(field_harvests) == 0) return(NULL) all_results <- data.frame() for (h in 1:nrow(field_harvests)) { harvest_date <- field_harvests$season_end[h] last_harvest <- if (h == 1) NA else field_harvests$season_end[h - 1] # Test -21 to +14 days test_dates_seq <- seq.Date( from = harvest_date - 21, to = harvest_date + 14, by = "1 day" ) for (i in 1:length(test_dates_seq)) { test_date <- test_dates_seq[i] days_from_harvest <- as.numeric(test_date - harvest_date) stage1 <- predict_harvest_window(field_ts, test_date, last_harvest, CONFIG) stage2 <- detect_harvest_event(field_ts, test_date, last_harvest, CONFIG) # Only add row if we have valid data if (length(stage1$current_ci) > 0 && !is.null(stage1$current_ci)) { all_results <- bind_rows(all_results, data.frame( field = field_id, harvest_event = h, harvest_date = harvest_date, test_date = test_date, days_from_harvest = days_from_harvest, stage1_alert = stage1$stage1_alert, stage1_level = stage1$stage1_level, stage2_alert = stage2$stage2_alert, stage2_level = stage2$stage2_level, current_ci = stage1$current_ci, consecutive_days = stage1$consecutive_days )) } } } return(all_results) } # ============================================================================ # RUN FULL DATASET VALIDATION # ============================================================================ cat("============================================================================\n") cat("VALIDATING TWO-STAGE SYSTEM ON FULL DATASET\n") cat("============================================================================\n\n") all_fields_results <- data.frame() summary_by_field <- data.frame() fields_to_test <- unique(harvest_data_filtered$field) total_fields <- length(fields_to_test) cat("Testing", total_fields, "fields...\n\n") pb <- txtProgressBar(min = 0, max = total_fields, style = 3) for (f in 1:total_fields) { field_id <- fields_to_test[f] field_results <- validate_two_stage_system(field_id) if (!is.null(field_results)) { all_fields_results <- bind_rows(all_fields_results, field_results) # Calculate summary for this field field_harvests_count <- length(unique(field_results$harvest_event)) # Stage 1: First prediction in optimal window (7-21 days ahead) stage1_optimal <- field_results %>% filter(stage1_alert == TRUE, days_from_harvest >= -21, days_from_harvest <= -7) %>% group_by(harvest_event) %>% slice(1) %>% ungroup() # Stage 2: Detection within 7 days after harvest stage2_detections <- field_results %>% filter(stage2_alert == TRUE, days_from_harvest >= 0, days_from_harvest <= 7) %>% group_by(harvest_event) %>% slice(1) %>% ungroup() summary_by_field <- bind_rows(summary_by_field, data.frame( field = field_id, total_harvests = field_harvests_count, stage1_optimal = nrow(stage1_optimal), stage2_detected = nrow(stage2_detections), stage1_rate = round(100 * nrow(stage1_optimal) / field_harvests_count, 1), stage2_rate = round(100 * nrow(stage2_detections) / field_harvests_count, 1) )) } setTxtProgressBar(pb, f) } close(pb) cat("\n\n============================================================================\n") cat("RESULTS BY FIELD\n") cat("============================================================================\n\n") print(summary_by_field, row.names = FALSE) # ============================================================================ # OVERALL SUMMARY # ============================================================================ cat("\n============================================================================\n") cat("OVERALL SUMMARY ACROSS ALL FIELDS\n") cat("============================================================================\n\n") total_harvests <- sum(summary_by_field$total_harvests) total_stage1_optimal <- sum(summary_by_field$stage1_optimal) total_stage2_detected <- sum(summary_by_field$stage2_detected) cat("Total harvest events tested:", total_harvests, "\n\n") cat("STAGE 1 - HARVEST WINDOW PREDICTION:\n") cat(" Predictions in optimal window (7-21 days ahead):", total_stage1_optimal, "/", total_harvests, "\n") cat(" Success rate:", round(100 * total_stage1_optimal / total_harvests, 1), "%\n\n") cat("STAGE 2 - HARVEST EVENT DETECTION:\n") cat(" Detections within 7 days after harvest:", total_stage2_detected, "/", total_harvests, "\n") cat(" Success rate:", round(100 * total_stage2_detected / total_harvests, 1), "%\n\n") cat("COMBINED SYSTEM PERFORMANCE:\n") cat(" Fields with >50% Stage 1 success:", sum(summary_by_field$stage1_rate > 50), "/", total_fields, "\n") cat(" Fields with >50% Stage 2 success:", sum(summary_by_field$stage2_rate > 50), "/", total_fields, "\n\n") # Find best and worst performing fields cat("BEST PERFORMING FIELDS (Stage 1):\n") top_fields <- summary_by_field %>% arrange(desc(stage1_rate)) %>% head(5) print(top_fields, row.names = FALSE) cat("\n\nWORST PERFORMING FIELDS (Stage 1):\n") bottom_fields <- summary_by_field %>% arrange(stage1_rate) %>% head(5) print(bottom_fields, row.names = FALSE) cat("\n============================================================================\n") cat("FACTORY CLIENT INTERPRETATION\n") cat("============================================================================\n\n") cat("🏭 TWO-STAGE ALERT SYSTEM:\n\n") cat(" STAGE 1: ADVANCE WARNING (7-21 days ahead)\n") cat(" - Factory receives prediction when crop is mature\n") cat(" - Allows planning of processing capacity\n") cat(" - Coordinate transport and labor\n") cat(" - Success rate:", round(100 * total_stage1_optimal / total_harvests, 1), "%\n\n") cat(" STAGE 2: HARVEST CONFIRMATION (0-7 days after)\n") cat(" - Confirms harvest has actually occurred\n") cat(" - Detects bare soil signature (CI < 1.0)\n") cat(" - Triggers processing logistics\n") cat(" - Success rate:", round(100 * total_stage2_detected / total_harvests, 1), "%\n\n") cat("📊 OPERATIONAL WORKFLOW:\n") cat(" 1. Field shows sustained low CI → Stage 1 alert\n") cat(" 2. Factory prepares for harvest in 1-3 weeks\n") cat(" 3. CI drops to bare soil → Stage 2 alert\n") cat(" 4. Factory confirms harvest and processes cane\n\n") cat("============================================================================\n") cat("ANALYSIS COMPLETE\n") cat("============================================================================\n") # Save detailed results output_file <- here("r_app/experiments/harvest_prediction/two_stage_validation_results.rds") saveRDS(list( all_results = all_fields_results, summary = summary_by_field, config = CONFIG ), output_file) cat("\nDetailed results saved to:", output_file, "\n")