423 lines
15 KiB
R
423 lines
15 KiB
R
# ============================================================================
|
|
# COMPLETE HARVEST ALERT SYSTEM
|
|
# Two-stage approach for factory logistics planning
|
|
# ============================================================================
|
|
# STAGE 1: HARVEST WINDOW PREDICTION (7-21 days ahead)
|
|
# Alert factory that harvest is coming soon
|
|
#
|
|
# STAGE 2: HARVEST EVENT DETECTION (0-7 days after)
|
|
# Confirm that harvest has actually occurred
|
|
# ============================================================================
|
|
|
|
suppressPackageStartupMessages({
|
|
library(readxl)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(here)
|
|
library(ggplot2)
|
|
})
|
|
|
|
# Set project directory
|
|
project_dir <- "esa"
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
|
|
# Navigate to project root if in experiments folder
|
|
if (basename(getwd()) == "harvest_prediction") {
|
|
setwd("../../..")
|
|
}
|
|
|
|
source(here("r_app", "parameters_project.R"))
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
|
|
CONFIG <- list(
|
|
# STAGE 1: Prediction thresholds
|
|
min_field_age_days = 240,
|
|
ci_threshold_low = 2.5,
|
|
ci_threshold_very_low = 1.5,
|
|
sustained_low_days = 5,
|
|
min_days_since_harvest = 200,
|
|
|
|
# STAGE 2: Detection thresholds (independent of Stage 1)
|
|
harvest_confirmed_ci = 1.5, # Sustained very low CI = harvest occurred
|
|
confirmation_days = 3 # Consecutive days below threshold
|
|
)
|
|
|
|
cat("============================================================================\n")
|
|
cat("COMPLETE HARVEST ALERT SYSTEM - TWO STAGE APPROACH\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
cat("STAGE 1: HARVEST WINDOW PREDICTION\n")
|
|
cat(" - Alert when CI sustained low (crop mature)\n")
|
|
cat(" - Provides 7-21 days advance warning\n")
|
|
cat(" - Factory can plan logistics\n\n")
|
|
|
|
cat("STAGE 2: HARVEST EVENT DETECTION\n")
|
|
cat(" - Detect sustained very low CI (bare soil)\n")
|
|
cat(" - CI < 1.5 for 3 consecutive days\n")
|
|
cat(" - Independent of Stage 1\n")
|
|
cat(" - Confirms harvest has occurred\n\n")
|
|
|
|
cat("Configuration:\n")
|
|
cat(" Min field age:", CONFIG$min_field_age_days, "days\n")
|
|
cat(" Mature crop CI:", CONFIG$ci_threshold_low, "\n")
|
|
cat(" Sustained low days:", CONFIG$sustained_low_days, "\n")
|
|
cat(" Harvest confirmed CI:", CONFIG$harvest_confirmed_ci, "\n")
|
|
cat(" Confirmation days:", CONFIG$confirmation_days, "\n\n")
|
|
|
|
# ============================================================================
|
|
# LOAD DATA
|
|
# ============================================================================
|
|
|
|
cat("=== LOADING DATA ===\n\n")
|
|
|
|
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
|
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
|
|
|
time_series_daily <- ci_data_raw %>%
|
|
mutate(date = as.Date(Date)) %>%
|
|
select(field_id = field, date, mean_ci = FitData) %>%
|
|
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
|
|
arrange(field_id, date)
|
|
|
|
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
|
mutate(
|
|
season_start = as.Date(season_start),
|
|
season_end = as.Date(season_end)
|
|
) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
fields_with_ci <- unique(time_series_daily$field_id)
|
|
harvest_data_filtered <- harvest_data %>%
|
|
filter(field %in% fields_with_ci) %>%
|
|
arrange(field, season_end)
|
|
|
|
cat("Fields with CI data:", length(fields_with_ci), "\n")
|
|
cat("Total harvest events:", nrow(harvest_data_filtered), "\n\n")
|
|
|
|
# ============================================================================
|
|
# STAGE 1: HARVEST WINDOW PREDICTION
|
|
# ============================================================================
|
|
|
|
predict_harvest_window <- function(field_ts, check_date, last_harvest_date, config = CONFIG) {
|
|
current_ci <- field_ts %>%
|
|
filter(date == check_date) %>%
|
|
pull(mean_ci)
|
|
|
|
if (length(current_ci) == 0 || is.na(current_ci[1])) {
|
|
return(list(stage1_alert = FALSE, stage1_level = "no_data", consecutive_days = 0, current_ci = NA))
|
|
}
|
|
|
|
# Take first value if multiple
|
|
current_ci <- current_ci[1]
|
|
|
|
# Calculate field age
|
|
if (is.null(last_harvest_date) || is.na(last_harvest_date)) {
|
|
earliest_date <- min(field_ts$date, na.rm = TRUE)
|
|
field_age <- as.numeric(check_date - earliest_date)
|
|
} else {
|
|
field_age <- as.numeric(check_date - last_harvest_date)
|
|
}
|
|
|
|
if (field_age < config$min_field_age_days) {
|
|
return(list(stage1_alert = FALSE, stage1_level = "too_young", consecutive_days = 0))
|
|
}
|
|
|
|
# Count consecutive days with CI below threshold
|
|
recent_data <- field_ts %>%
|
|
filter(date <= check_date, date >= check_date - 30) %>%
|
|
arrange(desc(date))
|
|
|
|
consecutive_days_low <- 0
|
|
for (i in 1:nrow(recent_data)) {
|
|
if (recent_data$mean_ci[i] <= config$ci_threshold_low) {
|
|
consecutive_days_low <- consecutive_days_low + 1
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
mean_ci_sustained <- if (consecutive_days_low > 0) {
|
|
recent_data %>% slice(1:consecutive_days_low) %>%
|
|
summarise(mean = mean(mean_ci, na.rm = TRUE)) %>% pull(mean)
|
|
} else {
|
|
NA
|
|
}
|
|
|
|
# Determine alert level
|
|
stage1_alert <- FALSE
|
|
stage1_level <- "none"
|
|
|
|
if (consecutive_days_low >= config$sustained_low_days) {
|
|
stage1_alert <- TRUE
|
|
if (!is.na(mean_ci_sustained) && mean_ci_sustained <= config$ci_threshold_very_low) {
|
|
stage1_level <- "imminent" # 7 days
|
|
} else {
|
|
stage1_level <- "likely" # 7-14 days
|
|
}
|
|
} else if (consecutive_days_low >= 3) {
|
|
stage1_alert <- TRUE
|
|
stage1_level <- "possible" # 14-21 days
|
|
}
|
|
|
|
return(list(
|
|
stage1_alert = stage1_alert,
|
|
stage1_level = stage1_level,
|
|
consecutive_days = consecutive_days_low,
|
|
current_ci = current_ci,
|
|
mean_ci_sustained = mean_ci_sustained
|
|
))
|
|
}
|
|
|
|
# ============================================================================
|
|
# STAGE 2: HARVEST EVENT DETECTION
|
|
# ============================================================================
|
|
|
|
detect_harvest_event <- function(field_ts, check_date, last_harvest_date, config = CONFIG) {
|
|
current_ci <- field_ts %>%
|
|
filter(date == check_date) %>%
|
|
pull(mean_ci)
|
|
|
|
if (length(current_ci) == 0 || is.na(current_ci[1])) {
|
|
return(list(stage2_alert = FALSE, stage2_level = "no_data", current_ci = NA))
|
|
}
|
|
|
|
# Take first value if multiple (shouldn't happen but safety)
|
|
current_ci <- current_ci[1]
|
|
|
|
# STAGE 2: Detect sustained very low CI (bare soil after harvest)
|
|
# Independent of Stage 1 - works in parallel
|
|
stage2_alert <- FALSE
|
|
stage2_level <- "none"
|
|
|
|
# Get recent days for consecutive low CI check
|
|
recent_window <- field_ts %>%
|
|
filter(date <= check_date,
|
|
date >= check_date - config$confirmation_days + 1) %>%
|
|
arrange(date)
|
|
|
|
# Count consecutive days below harvest confirmation threshold
|
|
if (nrow(recent_window) >= config$confirmation_days) {
|
|
consecutive_low_days <- 0
|
|
|
|
for (i in nrow(recent_window):1) {
|
|
if (!is.na(recent_window$mean_ci[i]) &&
|
|
recent_window$mean_ci[i] <= config$harvest_confirmed_ci) {
|
|
consecutive_low_days <- consecutive_low_days + 1
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
# Sustained very low CI = harvest occurred
|
|
if (consecutive_low_days >= config$confirmation_days) {
|
|
stage2_alert <- TRUE
|
|
stage2_level <- "confirmed"
|
|
}
|
|
}
|
|
|
|
return(list(
|
|
stage2_alert = stage2_alert,
|
|
stage2_level = stage2_level,
|
|
current_ci = current_ci,
|
|
consecutive_low_days = if (exists("consecutive_low_days")) consecutive_low_days else 0
|
|
))
|
|
}
|
|
|
|
# ============================================================================
|
|
# COMBINED VALIDATION
|
|
# ============================================================================
|
|
|
|
validate_two_stage_system <- function(field_id) {
|
|
field_ts <- time_series_daily %>%
|
|
filter(field_id == !!field_id) %>%
|
|
arrange(date)
|
|
|
|
field_harvests <- harvest_data_filtered %>%
|
|
filter(field == field_id) %>%
|
|
arrange(season_end)
|
|
|
|
if (nrow(field_harvests) == 0) return(NULL)
|
|
|
|
all_results <- data.frame()
|
|
|
|
for (h in 1:nrow(field_harvests)) {
|
|
harvest_date <- field_harvests$season_end[h]
|
|
last_harvest <- if (h == 1) NA else field_harvests$season_end[h - 1]
|
|
|
|
# Test -21 to +14 days
|
|
test_dates_seq <- seq.Date(
|
|
from = harvest_date - 21,
|
|
to = harvest_date + 14,
|
|
by = "1 day"
|
|
)
|
|
|
|
for (i in 1:length(test_dates_seq)) {
|
|
test_date <- test_dates_seq[i]
|
|
days_from_harvest <- as.numeric(test_date - harvest_date)
|
|
|
|
stage1 <- predict_harvest_window(field_ts, test_date, last_harvest, CONFIG)
|
|
stage2 <- detect_harvest_event(field_ts, test_date, last_harvest, CONFIG)
|
|
|
|
# Only add row if we have valid data
|
|
if (length(stage1$current_ci) > 0 && !is.null(stage1$current_ci)) {
|
|
all_results <- bind_rows(all_results, data.frame(
|
|
field = field_id,
|
|
harvest_event = h,
|
|
harvest_date = harvest_date,
|
|
test_date = test_date,
|
|
days_from_harvest = days_from_harvest,
|
|
stage1_alert = stage1$stage1_alert,
|
|
stage1_level = stage1$stage1_level,
|
|
stage2_alert = stage2$stage2_alert,
|
|
stage2_level = stage2$stage2_level,
|
|
current_ci = stage1$current_ci,
|
|
consecutive_days = stage1$consecutive_days
|
|
))
|
|
}
|
|
}
|
|
}
|
|
|
|
return(all_results)
|
|
}
|
|
|
|
# ============================================================================
|
|
# RUN FULL DATASET VALIDATION
|
|
# ============================================================================
|
|
|
|
cat("============================================================================\n")
|
|
cat("VALIDATING TWO-STAGE SYSTEM ON FULL DATASET\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
all_fields_results <- data.frame()
|
|
summary_by_field <- data.frame()
|
|
|
|
fields_to_test <- unique(harvest_data_filtered$field)
|
|
total_fields <- length(fields_to_test)
|
|
|
|
cat("Testing", total_fields, "fields...\n\n")
|
|
|
|
pb <- txtProgressBar(min = 0, max = total_fields, style = 3)
|
|
|
|
for (f in 1:total_fields) {
|
|
field_id <- fields_to_test[f]
|
|
|
|
field_results <- validate_two_stage_system(field_id)
|
|
|
|
if (!is.null(field_results)) {
|
|
all_fields_results <- bind_rows(all_fields_results, field_results)
|
|
|
|
# Calculate summary for this field
|
|
field_harvests_count <- length(unique(field_results$harvest_event))
|
|
|
|
# Stage 1: First prediction in optimal window (7-21 days ahead)
|
|
stage1_optimal <- field_results %>%
|
|
filter(stage1_alert == TRUE, days_from_harvest >= -21, days_from_harvest <= -7) %>%
|
|
group_by(harvest_event) %>%
|
|
slice(1) %>%
|
|
ungroup()
|
|
|
|
# Stage 2: Detection within 7 days after harvest
|
|
stage2_detections <- field_results %>%
|
|
filter(stage2_alert == TRUE, days_from_harvest >= 0, days_from_harvest <= 7) %>%
|
|
group_by(harvest_event) %>%
|
|
slice(1) %>%
|
|
ungroup()
|
|
|
|
summary_by_field <- bind_rows(summary_by_field, data.frame(
|
|
field = field_id,
|
|
total_harvests = field_harvests_count,
|
|
stage1_optimal = nrow(stage1_optimal),
|
|
stage2_detected = nrow(stage2_detections),
|
|
stage1_rate = round(100 * nrow(stage1_optimal) / field_harvests_count, 1),
|
|
stage2_rate = round(100 * nrow(stage2_detections) / field_harvests_count, 1)
|
|
))
|
|
}
|
|
|
|
setTxtProgressBar(pb, f)
|
|
}
|
|
|
|
close(pb)
|
|
|
|
cat("\n\n============================================================================\n")
|
|
cat("RESULTS BY FIELD\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
print(summary_by_field, row.names = FALSE)
|
|
|
|
# ============================================================================
|
|
# OVERALL SUMMARY
|
|
# ============================================================================
|
|
|
|
cat("\n============================================================================\n")
|
|
cat("OVERALL SUMMARY ACROSS ALL FIELDS\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
total_harvests <- sum(summary_by_field$total_harvests)
|
|
total_stage1_optimal <- sum(summary_by_field$stage1_optimal)
|
|
total_stage2_detected <- sum(summary_by_field$stage2_detected)
|
|
|
|
cat("Total harvest events tested:", total_harvests, "\n\n")
|
|
|
|
cat("STAGE 1 - HARVEST WINDOW PREDICTION:\n")
|
|
cat(" Predictions in optimal window (7-21 days ahead):", total_stage1_optimal, "/", total_harvests, "\n")
|
|
cat(" Success rate:", round(100 * total_stage1_optimal / total_harvests, 1), "%\n\n")
|
|
|
|
cat("STAGE 2 - HARVEST EVENT DETECTION:\n")
|
|
cat(" Detections within 7 days after harvest:", total_stage2_detected, "/", total_harvests, "\n")
|
|
cat(" Success rate:", round(100 * total_stage2_detected / total_harvests, 1), "%\n\n")
|
|
|
|
cat("COMBINED SYSTEM PERFORMANCE:\n")
|
|
cat(" Fields with >50% Stage 1 success:", sum(summary_by_field$stage1_rate > 50), "/", total_fields, "\n")
|
|
cat(" Fields with >50% Stage 2 success:", sum(summary_by_field$stage2_rate > 50), "/", total_fields, "\n\n")
|
|
|
|
# Find best and worst performing fields
|
|
cat("BEST PERFORMING FIELDS (Stage 1):\n")
|
|
top_fields <- summary_by_field %>% arrange(desc(stage1_rate)) %>% head(5)
|
|
print(top_fields, row.names = FALSE)
|
|
|
|
cat("\n\nWORST PERFORMING FIELDS (Stage 1):\n")
|
|
bottom_fields <- summary_by_field %>% arrange(stage1_rate) %>% head(5)
|
|
print(bottom_fields, row.names = FALSE)
|
|
|
|
cat("\n============================================================================\n")
|
|
cat("FACTORY CLIENT INTERPRETATION\n")
|
|
cat("============================================================================\n\n")
|
|
|
|
cat("🏭 TWO-STAGE ALERT SYSTEM:\n\n")
|
|
|
|
cat(" STAGE 1: ADVANCE WARNING (7-21 days ahead)\n")
|
|
cat(" - Factory receives prediction when crop is mature\n")
|
|
cat(" - Allows planning of processing capacity\n")
|
|
cat(" - Coordinate transport and labor\n")
|
|
cat(" - Success rate:", round(100 * total_stage1_optimal / total_harvests, 1), "%\n\n")
|
|
|
|
cat(" STAGE 2: HARVEST CONFIRMATION (0-7 days after)\n")
|
|
cat(" - Confirms harvest has actually occurred\n")
|
|
cat(" - Detects bare soil signature (CI < 1.0)\n")
|
|
cat(" - Triggers processing logistics\n")
|
|
cat(" - Success rate:", round(100 * total_stage2_detected / total_harvests, 1), "%\n\n")
|
|
|
|
cat("📊 OPERATIONAL WORKFLOW:\n")
|
|
cat(" 1. Field shows sustained low CI → Stage 1 alert\n")
|
|
cat(" 2. Factory prepares for harvest in 1-3 weeks\n")
|
|
cat(" 3. CI drops to bare soil → Stage 2 alert\n")
|
|
cat(" 4. Factory confirms harvest and processes cane\n\n")
|
|
|
|
cat("============================================================================\n")
|
|
cat("ANALYSIS COMPLETE\n")
|
|
cat("============================================================================\n")
|
|
|
|
# Save detailed results
|
|
output_file <- here("r_app/experiments/harvest_prediction/two_stage_validation_results.rds")
|
|
saveRDS(list(
|
|
all_results = all_fields_results,
|
|
summary = summary_by_field,
|
|
config = CONFIG
|
|
), output_file)
|
|
|
|
cat("\nDetailed results saved to:", output_file, "\n")
|