SmartCane/r_app/experiments/harvest_prediction/harvest_alert_system.R
2026-01-06 14:17:37 +01:00

423 lines
15 KiB
R

# ============================================================================
# COMPLETE HARVEST ALERT SYSTEM
# Two-stage approach for factory logistics planning
# ============================================================================
# STAGE 1: HARVEST WINDOW PREDICTION (7-21 days ahead)
# Alert factory that harvest is coming soon
#
# STAGE 2: HARVEST EVENT DETECTION (0-7 days after)
# Confirm that harvest has actually occurred
# ============================================================================
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(here)
library(ggplot2)
})
# Set project directory
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
# Navigate to project root if in experiments folder
if (basename(getwd()) == "harvest_prediction") {
setwd("../../..")
}
source(here("r_app", "parameters_project.R"))
# ============================================================================
# CONFIGURATION
# ============================================================================
CONFIG <- list(
# STAGE 1: Prediction thresholds
min_field_age_days = 240,
ci_threshold_low = 2.5,
ci_threshold_very_low = 1.5,
sustained_low_days = 5,
min_days_since_harvest = 200,
# STAGE 2: Detection thresholds (independent of Stage 1)
harvest_confirmed_ci = 1.5, # Sustained very low CI = harvest occurred
confirmation_days = 3 # Consecutive days below threshold
)
cat("============================================================================\n")
cat("COMPLETE HARVEST ALERT SYSTEM - TWO STAGE APPROACH\n")
cat("============================================================================\n\n")
cat("STAGE 1: HARVEST WINDOW PREDICTION\n")
cat(" - Alert when CI sustained low (crop mature)\n")
cat(" - Provides 7-21 days advance warning\n")
cat(" - Factory can plan logistics\n\n")
cat("STAGE 2: HARVEST EVENT DETECTION\n")
cat(" - Detect sustained very low CI (bare soil)\n")
cat(" - CI < 1.5 for 3 consecutive days\n")
cat(" - Independent of Stage 1\n")
cat(" - Confirms harvest has occurred\n\n")
cat("Configuration:\n")
cat(" Min field age:", CONFIG$min_field_age_days, "days\n")
cat(" Mature crop CI:", CONFIG$ci_threshold_low, "\n")
cat(" Sustained low days:", CONFIG$sustained_low_days, "\n")
cat(" Harvest confirmed CI:", CONFIG$harvest_confirmed_ci, "\n")
cat(" Confirmation days:", CONFIG$confirmation_days, "\n\n")
# ============================================================================
# LOAD DATA
# ============================================================================
cat("=== LOADING DATA ===\n\n")
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series_daily <- ci_data_raw %>%
mutate(date = as.Date(Date)) %>%
select(field_id = field, date, mean_ci = FitData) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
fields_with_ci <- unique(time_series_daily$field_id)
harvest_data_filtered <- harvest_data %>%
filter(field %in% fields_with_ci) %>%
arrange(field, season_end)
cat("Fields with CI data:", length(fields_with_ci), "\n")
cat("Total harvest events:", nrow(harvest_data_filtered), "\n\n")
# ============================================================================
# STAGE 1: HARVEST WINDOW PREDICTION
# ============================================================================
predict_harvest_window <- function(field_ts, check_date, last_harvest_date, config = CONFIG) {
current_ci <- field_ts %>%
filter(date == check_date) %>%
pull(mean_ci)
if (length(current_ci) == 0 || is.na(current_ci[1])) {
return(list(stage1_alert = FALSE, stage1_level = "no_data", consecutive_days = 0, current_ci = NA))
}
# Take first value if multiple
current_ci <- current_ci[1]
# Calculate field age
if (is.null(last_harvest_date) || is.na(last_harvest_date)) {
earliest_date <- min(field_ts$date, na.rm = TRUE)
field_age <- as.numeric(check_date - earliest_date)
} else {
field_age <- as.numeric(check_date - last_harvest_date)
}
if (field_age < config$min_field_age_days) {
return(list(stage1_alert = FALSE, stage1_level = "too_young", consecutive_days = 0))
}
# Count consecutive days with CI below threshold
recent_data <- field_ts %>%
filter(date <= check_date, date >= check_date - 30) %>%
arrange(desc(date))
consecutive_days_low <- 0
for (i in 1:nrow(recent_data)) {
if (recent_data$mean_ci[i] <= config$ci_threshold_low) {
consecutive_days_low <- consecutive_days_low + 1
} else {
break
}
}
mean_ci_sustained <- if (consecutive_days_low > 0) {
recent_data %>% slice(1:consecutive_days_low) %>%
summarise(mean = mean(mean_ci, na.rm = TRUE)) %>% pull(mean)
} else {
NA
}
# Determine alert level
stage1_alert <- FALSE
stage1_level <- "none"
if (consecutive_days_low >= config$sustained_low_days) {
stage1_alert <- TRUE
if (!is.na(mean_ci_sustained) && mean_ci_sustained <= config$ci_threshold_very_low) {
stage1_level <- "imminent" # 7 days
} else {
stage1_level <- "likely" # 7-14 days
}
} else if (consecutive_days_low >= 3) {
stage1_alert <- TRUE
stage1_level <- "possible" # 14-21 days
}
return(list(
stage1_alert = stage1_alert,
stage1_level = stage1_level,
consecutive_days = consecutive_days_low,
current_ci = current_ci,
mean_ci_sustained = mean_ci_sustained
))
}
# ============================================================================
# STAGE 2: HARVEST EVENT DETECTION
# ============================================================================
detect_harvest_event <- function(field_ts, check_date, last_harvest_date, config = CONFIG) {
current_ci <- field_ts %>%
filter(date == check_date) %>%
pull(mean_ci)
if (length(current_ci) == 0 || is.na(current_ci[1])) {
return(list(stage2_alert = FALSE, stage2_level = "no_data", current_ci = NA))
}
# Take first value if multiple (shouldn't happen but safety)
current_ci <- current_ci[1]
# STAGE 2: Detect sustained very low CI (bare soil after harvest)
# Independent of Stage 1 - works in parallel
stage2_alert <- FALSE
stage2_level <- "none"
# Get recent days for consecutive low CI check
recent_window <- field_ts %>%
filter(date <= check_date,
date >= check_date - config$confirmation_days + 1) %>%
arrange(date)
# Count consecutive days below harvest confirmation threshold
if (nrow(recent_window) >= config$confirmation_days) {
consecutive_low_days <- 0
for (i in nrow(recent_window):1) {
if (!is.na(recent_window$mean_ci[i]) &&
recent_window$mean_ci[i] <= config$harvest_confirmed_ci) {
consecutive_low_days <- consecutive_low_days + 1
} else {
break
}
}
# Sustained very low CI = harvest occurred
if (consecutive_low_days >= config$confirmation_days) {
stage2_alert <- TRUE
stage2_level <- "confirmed"
}
}
return(list(
stage2_alert = stage2_alert,
stage2_level = stage2_level,
current_ci = current_ci,
consecutive_low_days = if (exists("consecutive_low_days")) consecutive_low_days else 0
))
}
# ============================================================================
# COMBINED VALIDATION
# ============================================================================
validate_two_stage_system <- function(field_id) {
field_ts <- time_series_daily %>%
filter(field_id == !!field_id) %>%
arrange(date)
field_harvests <- harvest_data_filtered %>%
filter(field == field_id) %>%
arrange(season_end)
if (nrow(field_harvests) == 0) return(NULL)
all_results <- data.frame()
for (h in 1:nrow(field_harvests)) {
harvest_date <- field_harvests$season_end[h]
last_harvest <- if (h == 1) NA else field_harvests$season_end[h - 1]
# Test -21 to +14 days
test_dates_seq <- seq.Date(
from = harvest_date - 21,
to = harvest_date + 14,
by = "1 day"
)
for (i in 1:length(test_dates_seq)) {
test_date <- test_dates_seq[i]
days_from_harvest <- as.numeric(test_date - harvest_date)
stage1 <- predict_harvest_window(field_ts, test_date, last_harvest, CONFIG)
stage2 <- detect_harvest_event(field_ts, test_date, last_harvest, CONFIG)
# Only add row if we have valid data
if (length(stage1$current_ci) > 0 && !is.null(stage1$current_ci)) {
all_results <- bind_rows(all_results, data.frame(
field = field_id,
harvest_event = h,
harvest_date = harvest_date,
test_date = test_date,
days_from_harvest = days_from_harvest,
stage1_alert = stage1$stage1_alert,
stage1_level = stage1$stage1_level,
stage2_alert = stage2$stage2_alert,
stage2_level = stage2$stage2_level,
current_ci = stage1$current_ci,
consecutive_days = stage1$consecutive_days
))
}
}
}
return(all_results)
}
# ============================================================================
# RUN FULL DATASET VALIDATION
# ============================================================================
cat("============================================================================\n")
cat("VALIDATING TWO-STAGE SYSTEM ON FULL DATASET\n")
cat("============================================================================\n\n")
all_fields_results <- data.frame()
summary_by_field <- data.frame()
fields_to_test <- unique(harvest_data_filtered$field)
total_fields <- length(fields_to_test)
cat("Testing", total_fields, "fields...\n\n")
pb <- txtProgressBar(min = 0, max = total_fields, style = 3)
for (f in 1:total_fields) {
field_id <- fields_to_test[f]
field_results <- validate_two_stage_system(field_id)
if (!is.null(field_results)) {
all_fields_results <- bind_rows(all_fields_results, field_results)
# Calculate summary for this field
field_harvests_count <- length(unique(field_results$harvest_event))
# Stage 1: First prediction in optimal window (7-21 days ahead)
stage1_optimal <- field_results %>%
filter(stage1_alert == TRUE, days_from_harvest >= -21, days_from_harvest <= -7) %>%
group_by(harvest_event) %>%
slice(1) %>%
ungroup()
# Stage 2: Detection within 7 days after harvest
stage2_detections <- field_results %>%
filter(stage2_alert == TRUE, days_from_harvest >= 0, days_from_harvest <= 7) %>%
group_by(harvest_event) %>%
slice(1) %>%
ungroup()
summary_by_field <- bind_rows(summary_by_field, data.frame(
field = field_id,
total_harvests = field_harvests_count,
stage1_optimal = nrow(stage1_optimal),
stage2_detected = nrow(stage2_detections),
stage1_rate = round(100 * nrow(stage1_optimal) / field_harvests_count, 1),
stage2_rate = round(100 * nrow(stage2_detections) / field_harvests_count, 1)
))
}
setTxtProgressBar(pb, f)
}
close(pb)
cat("\n\n============================================================================\n")
cat("RESULTS BY FIELD\n")
cat("============================================================================\n\n")
print(summary_by_field, row.names = FALSE)
# ============================================================================
# OVERALL SUMMARY
# ============================================================================
cat("\n============================================================================\n")
cat("OVERALL SUMMARY ACROSS ALL FIELDS\n")
cat("============================================================================\n\n")
total_harvests <- sum(summary_by_field$total_harvests)
total_stage1_optimal <- sum(summary_by_field$stage1_optimal)
total_stage2_detected <- sum(summary_by_field$stage2_detected)
cat("Total harvest events tested:", total_harvests, "\n\n")
cat("STAGE 1 - HARVEST WINDOW PREDICTION:\n")
cat(" Predictions in optimal window (7-21 days ahead):", total_stage1_optimal, "/", total_harvests, "\n")
cat(" Success rate:", round(100 * total_stage1_optimal / total_harvests, 1), "%\n\n")
cat("STAGE 2 - HARVEST EVENT DETECTION:\n")
cat(" Detections within 7 days after harvest:", total_stage2_detected, "/", total_harvests, "\n")
cat(" Success rate:", round(100 * total_stage2_detected / total_harvests, 1), "%\n\n")
cat("COMBINED SYSTEM PERFORMANCE:\n")
cat(" Fields with >50% Stage 1 success:", sum(summary_by_field$stage1_rate > 50), "/", total_fields, "\n")
cat(" Fields with >50% Stage 2 success:", sum(summary_by_field$stage2_rate > 50), "/", total_fields, "\n\n")
# Find best and worst performing fields
cat("BEST PERFORMING FIELDS (Stage 1):\n")
top_fields <- summary_by_field %>% arrange(desc(stage1_rate)) %>% head(5)
print(top_fields, row.names = FALSE)
cat("\n\nWORST PERFORMING FIELDS (Stage 1):\n")
bottom_fields <- summary_by_field %>% arrange(stage1_rate) %>% head(5)
print(bottom_fields, row.names = FALSE)
cat("\n============================================================================\n")
cat("FACTORY CLIENT INTERPRETATION\n")
cat("============================================================================\n\n")
cat("🏭 TWO-STAGE ALERT SYSTEM:\n\n")
cat(" STAGE 1: ADVANCE WARNING (7-21 days ahead)\n")
cat(" - Factory receives prediction when crop is mature\n")
cat(" - Allows planning of processing capacity\n")
cat(" - Coordinate transport and labor\n")
cat(" - Success rate:", round(100 * total_stage1_optimal / total_harvests, 1), "%\n\n")
cat(" STAGE 2: HARVEST CONFIRMATION (0-7 days after)\n")
cat(" - Confirms harvest has actually occurred\n")
cat(" - Detects bare soil signature (CI < 1.0)\n")
cat(" - Triggers processing logistics\n")
cat(" - Success rate:", round(100 * total_stage2_detected / total_harvests, 1), "%\n\n")
cat("📊 OPERATIONAL WORKFLOW:\n")
cat(" 1. Field shows sustained low CI → Stage 1 alert\n")
cat(" 2. Factory prepares for harvest in 1-3 weeks\n")
cat(" 3. CI drops to bare soil → Stage 2 alert\n")
cat(" 4. Factory confirms harvest and processes cane\n\n")
cat("============================================================================\n")
cat("ANALYSIS COMPLETE\n")
cat("============================================================================\n")
# Save detailed results
output_file <- here("r_app/experiments/harvest_prediction/two_stage_validation_results.rds")
saveRDS(list(
all_results = all_fields_results,
summary = summary_by_field,
config = CONFIG
), output_file)
cat("\nDetailed results saved to:", output_file, "\n")