# ============================================================================ # SIMPLIFIED TWO-STAGE HARVEST ALERTS # Based on stateful logic but adapted for daily operations # ============================================================================ # STAGE 1: "Harvest will happen soon" (not "in exactly 14 days") # - Field was recently mature (CI > 3.5) # - CI dropped below 2.5 for 14+ consecutive days # - Alert: "Harvest expected soon - monitor this field" # # STAGE 2: "Harvest has occurred" # - CI stabilized at very low level (< 2.0) # - Low variability for 5+ days (stable bare soil) # - Alert: "Harvest completed in recent days" # ============================================================================ suppressPackageStartupMessages({ library(readxl) library(dplyr) library(tidyr) library(lubridate) library(here) library(zoo) }) project_dir <- "esa" assign("project_dir", project_dir, envir = .GlobalEnv) if (basename(getwd()) == "harvest_prediction") { setwd("../../..") } source(here("r_app", "parameters_project.R")) # ============================================================================ # CONFIGURATION # ============================================================================ CONFIG <- list( # STAGE 1: Harvest approaching mature_ci = 3.5, # Field must have been mature (CI > this) harvest_ci = 2.5, # Alert when below this consecutive_days = 14, # Days below threshold to trigger lookback_mature = 60, # Days to check for previous mature state min_field_age = 240, # 8 months minimum # STAGE 2: Harvest completed completed_ci = 2.0, # Very low CI threshold stable_days = 5, # Days of stable low CI max_variability = 0.3, # Max SD for "stable" # Validation test_window_days = 21 ) cat("============================================================================\n") cat("SIMPLIFIED HARVEST ALERT SYSTEM\n") cat("============================================================================\n\n") cat("STAGE 1 - HARVEST APPROACHING:\n") cat(" - Field was mature (CI >", CONFIG$mature_ci, ") in last", CONFIG$lookback_mature, "days\n") cat(" - CI drops below", CONFIG$harvest_ci, "for", CONFIG$consecutive_days, "consecutive days\n") cat(" - Alert: 'Harvest expected soon - prepare logistics'\n\n") cat("STAGE 2 - HARVEST COMPLETED:\n") cat(" - CI below", CONFIG$completed_ci, "with low variability\n") cat(" - Stable for", CONFIG$stable_days, "days\n") cat(" - Alert: 'Harvest completed in recent days'\n\n") # ============================================================================ # LOAD DATA # ============================================================================ cat("=== LOADING DATA ===\n\n") ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds") ci_data_raw <- readRDS(ci_rds_file) %>% ungroup() time_series_daily <- ci_data_raw %>% mutate(date = as.Date(Date)) %>% select(field_id = field, date, mean_ci = FitData) %>% filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>% arrange(field_id, date) harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>% mutate( season_start = as.Date(season_start), season_end = as.Date(season_end) ) %>% filter(!is.na(season_end)) fields_with_ci <- unique(time_series_daily$field_id) harvest_data_filtered <- harvest_data %>% filter(field %in% fields_with_ci) %>% arrange(field, season_end) cat("Fields:", length(fields_with_ci), "\n") cat("Harvest events:", nrow(harvest_data_filtered), "\n\n") # ============================================================================ # STAGE 1: HARVEST APPROACHING # ============================================================================ detect_approaching <- function(field_ts, check_date, last_harvest_date, config = CONFIG) { # Check field age if (is.null(last_harvest_date) || is.na(last_harvest_date)) { earliest_date <- min(field_ts$date, na.rm = TRUE) field_age <- as.numeric(check_date - earliest_date) } else { field_age <- as.numeric(check_date - last_harvest_date) } if (field_age < config$min_field_age) { return(list(alert = FALSE, reason = "too_young", consecutive_days = 0)) } # Check if field was recently mature recent_data <- field_ts %>% filter(date >= check_date - config$lookback_mature, date < check_date) was_mature <- any(recent_data$mean_ci > config$mature_ci, na.rm = TRUE) if (!was_mature) { return(list(alert = FALSE, reason = "never_mature", consecutive_days = 0)) } # Count consecutive days below harvest threshold recent_ci <- field_ts %>% filter(date <= check_date, date >= check_date - 30) %>% arrange(desc(date)) consecutive_days <- 0 for (i in 1:nrow(recent_ci)) { if (!is.na(recent_ci$mean_ci[i]) && recent_ci$mean_ci[i] <= config$harvest_ci) { consecutive_days <- consecutive_days + 1 } else { break } } alert <- consecutive_days >= config$consecutive_days return(list( alert = alert, reason = ifelse(alert, "APPROACHING", "not_sustained"), consecutive_days = consecutive_days )) } # ============================================================================ # STAGE 2: HARVEST COMPLETED # ============================================================================ detect_completed <- function(field_ts, check_date, config = CONFIG) { # Get recent CI data recent_data <- field_ts %>% filter(date <= check_date, date >= check_date - config$stable_days) %>% arrange(date) if (nrow(recent_data) < config$stable_days) { return(list(alert = FALSE, reason = "insufficient_data")) } # Check if all recent days are below completed_ci threshold all_low <- all(recent_data$mean_ci <= config$completed_ci, na.rm = TRUE) if (!all_low) { return(list(alert = FALSE, reason = "not_low_enough")) } # Check variability (stable signal) ci_sd <- sd(recent_data$mean_ci, na.rm = TRUE) if (ci_sd > config$max_variability) { return(list(alert = FALSE, reason = "too_variable")) } return(list( alert = TRUE, reason = "COMPLETED", mean_ci = mean(recent_data$mean_ci, na.rm = TRUE), sd_ci = ci_sd )) } # ============================================================================ # VALIDATION # ============================================================================ validate_simplified_system <- function(field_id) { field_ts <- time_series_daily %>% filter(field_id == !!field_id) %>% arrange(date) field_harvests <- harvest_data_filtered %>% filter(field == field_id) %>% arrange(season_end) if (nrow(field_harvests) == 0) return(NULL) all_results <- data.frame() for (h in 1:nrow(field_harvests)) { harvest_date <- field_harvests$season_end[h] last_harvest <- if (h == 1) NA else field_harvests$season_end[h - 1] test_dates_seq <- seq.Date( from = harvest_date - CONFIG$test_window_days, to = harvest_date + 14, by = "1 day" ) for (i in 1:length(test_dates_seq)) { test_date <- test_dates_seq[i] days_from_harvest <- as.numeric(test_date - harvest_date) stage1 <- detect_approaching(field_ts, test_date, last_harvest, CONFIG) stage2 <- detect_completed(field_ts, test_date, CONFIG) all_results <- bind_rows(all_results, data.frame( field = field_id, harvest_event = h, harvest_date = harvest_date, test_date = test_date, days_from_harvest = days_from_harvest, stage1_alert = stage1$alert, stage1_reason = stage1$reason, stage2_alert = stage2$alert, stage2_reason = stage2$reason, consecutive_days = stage1$consecutive_days )) } } return(all_results) } # ============================================================================ # RUN VALIDATION # ============================================================================ cat("============================================================================\n") cat("VALIDATING ON FULL DATASET\n") cat("============================================================================\n\n") all_results <- data.frame() summary_stats <- data.frame() fields_to_test <- unique(harvest_data_filtered$field) total_fields <- length(fields_to_test) cat("Testing", total_fields, "fields...\n\n") pb <- txtProgressBar(min = 0, max = total_fields, style = 3) for (f in 1:total_fields) { field_id <- fields_to_test[f] field_results <- validate_simplified_system(field_id) if (!is.null(field_results) && nrow(field_results) > 0) { all_results <- bind_rows(all_results, field_results) field_harvests_count <- length(unique(field_results$harvest_event)) # Stage 1: Any alert before harvest stage1_success <- field_results %>% filter(stage1_alert == TRUE, days_from_harvest < 0) %>% distinct(harvest_event) %>% nrow() # Stage 2: Detection within 1-7 days after stage2_success <- field_results %>% filter(stage2_alert == TRUE, days_from_harvest >= 1, days_from_harvest <= 7) %>% distinct(harvest_event) %>% nrow() summary_stats <- bind_rows(summary_stats, data.frame( field = field_id, total_harvests = field_harvests_count, stage1_success = stage1_success, stage2_success = stage2_success, stage1_rate = round(100 * stage1_success / field_harvests_count, 1), stage2_rate = round(100 * stage2_success / field_harvests_count, 1) )) } setTxtProgressBar(pb, f) } close(pb) # ============================================================================ # RESULTS # ============================================================================ cat("\n\n============================================================================\n") cat("RESULTS BY FIELD\n") cat("============================================================================\n\n") print(summary_stats, row.names = FALSE) cat("\n============================================================================\n") cat("OVERALL PERFORMANCE\n") cat("============================================================================\n\n") total_harvests <- sum(summary_stats$total_harvests) total_stage1 <- sum(summary_stats$stage1_success) total_stage2 <- sum(summary_stats$stage2_success) cat("Total harvest events:", total_harvests, "\n\n") cat("STAGE 1 - HARVEST APPROACHING (any time before harvest):\n") cat(" Success:", total_stage1, "/", total_harvests, "(", round(100 * total_stage1 / total_harvests, 1), "% )\n") cat(" Fields with >50% success:", sum(summary_stats$stage1_rate > 50), "/", total_fields, "\n\n") cat("STAGE 2 - HARVEST COMPLETED (1-7 days after):\n") cat(" Success:", total_stage2, "/", total_harvests, "(", round(100 * total_stage2 / total_harvests, 1), "% )\n") cat(" Fields with >50% success:", sum(summary_stats$stage2_rate > 50), "/", total_fields, "\n\n") cat("============================================================================\n") cat("KEY INSIGHT\n") cat("============================================================================\n\n") cat("This approach doesn't predict 'harvest in X days' - it says:\n") cat(" STAGE 1: 'Harvest will happen soon' (field mature → declining)\n") cat(" STAGE 2: 'Harvest occurred recently' (bare soil detected)\n\n") cat("No exact timing - just actionable binary alerts for factory planning\n\n") # Save results output_file <- here("r_app/experiments/harvest_prediction/simplified_validation_results.rds") saveRDS(list( all_results = all_results, summary = summary_stats, config = CONFIG ), output_file) cat("============================================================================\n") cat("Results saved to:", output_file, "\n") cat("============================================================================\n")