SmartCane/r_app/experiments/harvest_prediction/old/simplified_harvest_alerts.R
2026-01-06 14:17:37 +01:00

342 lines
12 KiB
R

# ============================================================================
# SIMPLIFIED TWO-STAGE HARVEST ALERTS
# Based on stateful logic but adapted for daily operations
# ============================================================================
# STAGE 1: "Harvest will happen soon" (not "in exactly 14 days")
# - Field was recently mature (CI > 3.5)
# - CI dropped below 2.5 for 14+ consecutive days
# - Alert: "Harvest expected soon - monitor this field"
#
# STAGE 2: "Harvest has occurred"
# - CI stabilized at very low level (< 2.0)
# - Low variability for 5+ days (stable bare soil)
# - Alert: "Harvest completed in recent days"
# ============================================================================
suppressPackageStartupMessages({
library(readxl)
library(dplyr)
library(tidyr)
library(lubridate)
library(here)
library(zoo)
})
project_dir <- "esa"
assign("project_dir", project_dir, envir = .GlobalEnv)
if (basename(getwd()) == "harvest_prediction") {
setwd("../../..")
}
source(here("r_app", "parameters_project.R"))
# ============================================================================
# CONFIGURATION
# ============================================================================
CONFIG <- list(
# STAGE 1: Harvest approaching
mature_ci = 3.5, # Field must have been mature (CI > this)
harvest_ci = 2.5, # Alert when below this
consecutive_days = 14, # Days below threshold to trigger
lookback_mature = 60, # Days to check for previous mature state
min_field_age = 240, # 8 months minimum
# STAGE 2: Harvest completed
completed_ci = 2.0, # Very low CI threshold
stable_days = 5, # Days of stable low CI
max_variability = 0.3, # Max SD for "stable"
# Validation
test_window_days = 21
)
cat("============================================================================\n")
cat("SIMPLIFIED HARVEST ALERT SYSTEM\n")
cat("============================================================================\n\n")
cat("STAGE 1 - HARVEST APPROACHING:\n")
cat(" - Field was mature (CI >", CONFIG$mature_ci, ") in last", CONFIG$lookback_mature, "days\n")
cat(" - CI drops below", CONFIG$harvest_ci, "for", CONFIG$consecutive_days, "consecutive days\n")
cat(" - Alert: 'Harvest expected soon - prepare logistics'\n\n")
cat("STAGE 2 - HARVEST COMPLETED:\n")
cat(" - CI below", CONFIG$completed_ci, "with low variability\n")
cat(" - Stable for", CONFIG$stable_days, "days\n")
cat(" - Alert: 'Harvest completed in recent days'\n\n")
# ============================================================================
# LOAD DATA
# ============================================================================
cat("=== LOADING DATA ===\n\n")
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
time_series_daily <- ci_data_raw %>%
mutate(date = as.Date(Date)) %>%
select(field_id = field, date, mean_ci = FitData) %>%
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
arrange(field_id, date)
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
mutate(
season_start = as.Date(season_start),
season_end = as.Date(season_end)
) %>%
filter(!is.na(season_end))
fields_with_ci <- unique(time_series_daily$field_id)
harvest_data_filtered <- harvest_data %>%
filter(field %in% fields_with_ci) %>%
arrange(field, season_end)
cat("Fields:", length(fields_with_ci), "\n")
cat("Harvest events:", nrow(harvest_data_filtered), "\n\n")
# ============================================================================
# STAGE 1: HARVEST APPROACHING
# ============================================================================
detect_approaching <- function(field_ts, check_date, last_harvest_date, config = CONFIG) {
# Check field age
if (is.null(last_harvest_date) || is.na(last_harvest_date)) {
earliest_date <- min(field_ts$date, na.rm = TRUE)
field_age <- as.numeric(check_date - earliest_date)
} else {
field_age <- as.numeric(check_date - last_harvest_date)
}
if (field_age < config$min_field_age) {
return(list(alert = FALSE, reason = "too_young", consecutive_days = 0))
}
# Check if field was recently mature
recent_data <- field_ts %>%
filter(date >= check_date - config$lookback_mature, date < check_date)
was_mature <- any(recent_data$mean_ci > config$mature_ci, na.rm = TRUE)
if (!was_mature) {
return(list(alert = FALSE, reason = "never_mature", consecutive_days = 0))
}
# Count consecutive days below harvest threshold
recent_ci <- field_ts %>%
filter(date <= check_date, date >= check_date - 30) %>%
arrange(desc(date))
consecutive_days <- 0
for (i in 1:nrow(recent_ci)) {
if (!is.na(recent_ci$mean_ci[i]) && recent_ci$mean_ci[i] <= config$harvest_ci) {
consecutive_days <- consecutive_days + 1
} else {
break
}
}
alert <- consecutive_days >= config$consecutive_days
return(list(
alert = alert,
reason = ifelse(alert, "APPROACHING", "not_sustained"),
consecutive_days = consecutive_days
))
}
# ============================================================================
# STAGE 2: HARVEST COMPLETED
# ============================================================================
detect_completed <- function(field_ts, check_date, config = CONFIG) {
# Get recent CI data
recent_data <- field_ts %>%
filter(date <= check_date, date >= check_date - config$stable_days) %>%
arrange(date)
if (nrow(recent_data) < config$stable_days) {
return(list(alert = FALSE, reason = "insufficient_data"))
}
# Check if all recent days are below completed_ci threshold
all_low <- all(recent_data$mean_ci <= config$completed_ci, na.rm = TRUE)
if (!all_low) {
return(list(alert = FALSE, reason = "not_low_enough"))
}
# Check variability (stable signal)
ci_sd <- sd(recent_data$mean_ci, na.rm = TRUE)
if (ci_sd > config$max_variability) {
return(list(alert = FALSE, reason = "too_variable"))
}
return(list(
alert = TRUE,
reason = "COMPLETED",
mean_ci = mean(recent_data$mean_ci, na.rm = TRUE),
sd_ci = ci_sd
))
}
# ============================================================================
# VALIDATION
# ============================================================================
validate_simplified_system <- function(field_id) {
field_ts <- time_series_daily %>%
filter(field_id == !!field_id) %>%
arrange(date)
field_harvests <- harvest_data_filtered %>%
filter(field == field_id) %>%
arrange(season_end)
if (nrow(field_harvests) == 0) return(NULL)
all_results <- data.frame()
for (h in 1:nrow(field_harvests)) {
harvest_date <- field_harvests$season_end[h]
last_harvest <- if (h == 1) NA else field_harvests$season_end[h - 1]
test_dates_seq <- seq.Date(
from = harvest_date - CONFIG$test_window_days,
to = harvest_date + 14,
by = "1 day"
)
for (i in 1:length(test_dates_seq)) {
test_date <- test_dates_seq[i]
days_from_harvest <- as.numeric(test_date - harvest_date)
stage1 <- detect_approaching(field_ts, test_date, last_harvest, CONFIG)
stage2 <- detect_completed(field_ts, test_date, CONFIG)
all_results <- bind_rows(all_results, data.frame(
field = field_id,
harvest_event = h,
harvest_date = harvest_date,
test_date = test_date,
days_from_harvest = days_from_harvest,
stage1_alert = stage1$alert,
stage1_reason = stage1$reason,
stage2_alert = stage2$alert,
stage2_reason = stage2$reason,
consecutive_days = stage1$consecutive_days
))
}
}
return(all_results)
}
# ============================================================================
# RUN VALIDATION
# ============================================================================
cat("============================================================================\n")
cat("VALIDATING ON FULL DATASET\n")
cat("============================================================================\n\n")
all_results <- data.frame()
summary_stats <- data.frame()
fields_to_test <- unique(harvest_data_filtered$field)
total_fields <- length(fields_to_test)
cat("Testing", total_fields, "fields...\n\n")
pb <- txtProgressBar(min = 0, max = total_fields, style = 3)
for (f in 1:total_fields) {
field_id <- fields_to_test[f]
field_results <- validate_simplified_system(field_id)
if (!is.null(field_results) && nrow(field_results) > 0) {
all_results <- bind_rows(all_results, field_results)
field_harvests_count <- length(unique(field_results$harvest_event))
# Stage 1: Any alert before harvest
stage1_success <- field_results %>%
filter(stage1_alert == TRUE, days_from_harvest < 0) %>%
distinct(harvest_event) %>%
nrow()
# Stage 2: Detection within 1-7 days after
stage2_success <- field_results %>%
filter(stage2_alert == TRUE, days_from_harvest >= 1, days_from_harvest <= 7) %>%
distinct(harvest_event) %>%
nrow()
summary_stats <- bind_rows(summary_stats, data.frame(
field = field_id,
total_harvests = field_harvests_count,
stage1_success = stage1_success,
stage2_success = stage2_success,
stage1_rate = round(100 * stage1_success / field_harvests_count, 1),
stage2_rate = round(100 * stage2_success / field_harvests_count, 1)
))
}
setTxtProgressBar(pb, f)
}
close(pb)
# ============================================================================
# RESULTS
# ============================================================================
cat("\n\n============================================================================\n")
cat("RESULTS BY FIELD\n")
cat("============================================================================\n\n")
print(summary_stats, row.names = FALSE)
cat("\n============================================================================\n")
cat("OVERALL PERFORMANCE\n")
cat("============================================================================\n\n")
total_harvests <- sum(summary_stats$total_harvests)
total_stage1 <- sum(summary_stats$stage1_success)
total_stage2 <- sum(summary_stats$stage2_success)
cat("Total harvest events:", total_harvests, "\n\n")
cat("STAGE 1 - HARVEST APPROACHING (any time before harvest):\n")
cat(" Success:", total_stage1, "/", total_harvests,
"(", round(100 * total_stage1 / total_harvests, 1), "% )\n")
cat(" Fields with >50% success:", sum(summary_stats$stage1_rate > 50), "/", total_fields, "\n\n")
cat("STAGE 2 - HARVEST COMPLETED (1-7 days after):\n")
cat(" Success:", total_stage2, "/", total_harvests,
"(", round(100 * total_stage2 / total_harvests, 1), "% )\n")
cat(" Fields with >50% success:", sum(summary_stats$stage2_rate > 50), "/", total_fields, "\n\n")
cat("============================================================================\n")
cat("KEY INSIGHT\n")
cat("============================================================================\n\n")
cat("This approach doesn't predict 'harvest in X days' - it says:\n")
cat(" STAGE 1: 'Harvest will happen soon' (field mature → declining)\n")
cat(" STAGE 2: 'Harvest occurred recently' (bare soil detected)\n\n")
cat("No exact timing - just actionable binary alerts for factory planning\n\n")
# Save results
output_file <- here("r_app/experiments/harvest_prediction/simplified_validation_results.rds")
saveRDS(list(
all_results = all_results,
summary = summary_stats,
config = CONFIG
), output_file)
cat("============================================================================\n")
cat("Results saved to:", output_file, "\n")
cat("============================================================================\n")