423 lines
15 KiB
R
423 lines
15 KiB
R
# ============================================================================
|
|
# HARVEST WINDOW PREDICTION - FORWARD-LOOKING SYSTEM
|
|
# Predict harvest 7-14 days AHEAD for factory logistics planning
|
|
# ============================================================================
|
|
# Use case: Factory needs advance warning when harvest is imminent
|
|
# Strategy: Detect when field enters "harvest-ready window" based on
|
|
# sustained low CI indicating crop maturation complete
|
|
# ============================================================================
|
|
|
|
suppressPackageStartupMessages({
|
|
library(readxl)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(here)
|
|
library(ggplot2)
|
|
})
|
|
|
|
# Set project directory
|
|
project_dir <- "esa"
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
|
|
# Navigate to project root if in experiments folder
|
|
if (basename(getwd()) == "harvest_prediction") {
|
|
setwd("../../..")
|
|
}
|
|
|
|
source(here("r_app", "parameters_project.R"))
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
|
|
CONFIG <- list(
|
|
# Minimum field age before harvest is possible (8 months)
|
|
min_field_age_days = 240,
|
|
|
|
# CI thresholds for maturity assessment
|
|
ci_threshold_low = 2.5, # Below this = mature crop
|
|
ci_threshold_very_low = 1.5, # Below this = very mature/bare patches
|
|
|
|
# Sustained low CI indicates "harvest window"
|
|
sustained_low_days = 5, # CI below threshold for N consecutive days
|
|
|
|
# Advanced warning levels
|
|
warning_early = 14, # Days ahead for early warning
|
|
warning_imminent = 7, # Days ahead for imminent warning
|
|
|
|
# Minimum days since last harvest (ratoon cycle)
|
|
min_days_since_harvest = 200,
|
|
|
|
# Validation window
|
|
test_window_days = 21 # Test ±21 days around actual harvest
|
|
)
|
|
|
|
cat("=== HARVEST WINDOW PREDICTION CONFIGURATION ===\n\n")
|
|
cat("Goal: Predict harvest 7-14 days AHEAD for factory planning\n\n")
|
|
cat("Minimum field age:", CONFIG$min_field_age_days, "days (", round(CONFIG$min_field_age_days/30, 1), "months )\n")
|
|
cat("CI thresholds: Low =", CONFIG$ci_threshold_low, "| Very Low =", CONFIG$ci_threshold_very_low, "\n")
|
|
cat("Sustained low CI requirement:", CONFIG$sustained_low_days, "consecutive days\n")
|
|
cat("Warning levels: Early =", CONFIG$warning_early, "days | Imminent =", CONFIG$warning_imminent, "days\n\n")
|
|
|
|
# ============================================================================
|
|
# LOAD DATA
|
|
# ============================================================================
|
|
|
|
cat("=== LOADING DATA ===\n\n")
|
|
|
|
# Load CI time series
|
|
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
|
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
|
|
|
time_series_daily <- ci_data_raw %>%
|
|
mutate(date = as.Date(Date)) %>%
|
|
select(field_id = field, date, mean_ci = FitData) %>%
|
|
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
|
|
arrange(field_id, date)
|
|
|
|
# Load harvest data
|
|
harvest_data <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
|
mutate(
|
|
season_start = as.Date(season_start),
|
|
season_end = as.Date(season_end)
|
|
) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
# Get fields with both CI and harvest data
|
|
fields_with_ci <- unique(time_series_daily$field_id)
|
|
harvest_data_filtered <- harvest_data %>%
|
|
filter(field %in% fields_with_ci) %>%
|
|
arrange(field, season_end)
|
|
|
|
cat("Fields with CI data:", length(fields_with_ci), "\n")
|
|
cat("Fields with harvest records:", length(unique(harvest_data_filtered$field)), "\n")
|
|
cat("Total harvest events:", nrow(harvest_data_filtered), "\n\n")
|
|
|
|
# ============================================================================
|
|
# PREDICTION FUNCTION
|
|
# ============================================================================
|
|
|
|
predict_harvest_window <- function(field_ts, check_date, last_harvest_date, config = CONFIG) {
|
|
# Predict if harvest is likely in next 7-14 days based on sustained low CI
|
|
#
|
|
# Logic:
|
|
# 1. Check field age (must be ≥ 240 days)
|
|
# 2. Check CI has been below threshold for N consecutive days
|
|
# 3. Assess severity (low vs very low CI)
|
|
# 4. Return prediction confidence and expected harvest window
|
|
|
|
# Get current CI
|
|
current_ci <- field_ts %>%
|
|
filter(date == check_date) %>%
|
|
pull(mean_ci)
|
|
|
|
if (length(current_ci) == 0) {
|
|
return(list(
|
|
predicted = FALSE,
|
|
confidence = "no_data",
|
|
current_ci = NA,
|
|
consecutive_days_low = 0,
|
|
field_age = NA,
|
|
harvest_window = "unknown"
|
|
))
|
|
}
|
|
|
|
# Calculate field age
|
|
if (is.null(last_harvest_date) || is.na(last_harvest_date)) {
|
|
# First harvest - use earliest CI date as planting proxy
|
|
earliest_date <- min(field_ts$date, na.rm = TRUE)
|
|
field_age <- as.numeric(check_date - earliest_date)
|
|
} else {
|
|
field_age <- as.numeric(check_date - last_harvest_date)
|
|
}
|
|
|
|
# Check minimum age requirement
|
|
if (field_age < config$min_field_age_days) {
|
|
return(list(
|
|
predicted = FALSE,
|
|
confidence = "too_young",
|
|
current_ci = current_ci,
|
|
consecutive_days_low = 0,
|
|
field_age = field_age,
|
|
harvest_window = "not_ready"
|
|
))
|
|
}
|
|
|
|
# Count consecutive days with CI below threshold (looking backward from check_date)
|
|
recent_data <- field_ts %>%
|
|
filter(date <= check_date, date >= check_date - 30) %>%
|
|
arrange(desc(date))
|
|
|
|
consecutive_days_low <- 0
|
|
for (i in 1:nrow(recent_data)) {
|
|
if (recent_data$mean_ci[i] <= config$ci_threshold_low) {
|
|
consecutive_days_low <- consecutive_days_low + 1
|
|
} else {
|
|
break # Stop at first day above threshold
|
|
}
|
|
}
|
|
|
|
# Calculate mean CI over sustained period
|
|
mean_ci_sustained <- if (consecutive_days_low > 0) {
|
|
recent_data %>%
|
|
slice(1:consecutive_days_low) %>%
|
|
summarise(mean = mean(mean_ci, na.rm = TRUE)) %>%
|
|
pull(mean)
|
|
} else {
|
|
NA
|
|
}
|
|
|
|
# Determine prediction confidence and harvest window
|
|
predicted <- FALSE
|
|
confidence <- "none"
|
|
harvest_window <- "not_ready"
|
|
|
|
if (consecutive_days_low >= config$sustained_low_days) {
|
|
predicted <- TRUE
|
|
|
|
# Assess severity based on mean CI during sustained period
|
|
if (!is.na(mean_ci_sustained) && mean_ci_sustained <= config$ci_threshold_very_low) {
|
|
confidence <- "imminent" # Very low CI = harvest within 7 days
|
|
harvest_window <- "7_days"
|
|
} else {
|
|
confidence <- "likely" # Low CI = harvest within 7-14 days
|
|
harvest_window <- "7_14_days"
|
|
}
|
|
} else if (consecutive_days_low >= 2) {
|
|
# Starting to show maturity signals
|
|
predicted <- TRUE
|
|
confidence <- "possible"
|
|
harvest_window <- "14_21_days"
|
|
}
|
|
|
|
return(list(
|
|
predicted = predicted,
|
|
confidence = confidence,
|
|
current_ci = current_ci,
|
|
mean_ci_sustained = mean_ci_sustained,
|
|
consecutive_days_low = consecutive_days_low,
|
|
field_age = field_age,
|
|
harvest_window = harvest_window
|
|
))
|
|
}
|
|
|
|
# ============================================================================
|
|
# VALIDATION FUNCTION
|
|
# ============================================================================
|
|
|
|
validate_harvest_prediction <- function(field_id, test_field = NULL) {
|
|
# Test prediction accuracy by checking ±21 days around actual harvest dates
|
|
|
|
# Get field data
|
|
field_ts <- time_series_daily %>%
|
|
filter(field_id == !!field_id) %>%
|
|
arrange(date)
|
|
|
|
field_harvests <- harvest_data_filtered %>%
|
|
filter(field == field_id) %>%
|
|
arrange(season_end)
|
|
|
|
if (nrow(field_harvests) == 0) {
|
|
cat("No harvest records for field", field_id, "\n")
|
|
return(NULL)
|
|
}
|
|
|
|
cat("\n", rep("=", 80), "\n", sep = "")
|
|
cat("Testing field:", field_id, "\n")
|
|
cat("Field has", nrow(field_harvests), "recorded harvest events\n")
|
|
cat(rep("=", 80), "\n\n", sep = "")
|
|
|
|
all_results <- list()
|
|
detection_timing <- data.frame()
|
|
|
|
# Test each harvest event
|
|
for (h in 1:nrow(field_harvests)) {
|
|
harvest_date <- field_harvests$season_end[h]
|
|
|
|
# Get previous harvest for field age calculation
|
|
if (h == 1) {
|
|
last_harvest <- NA
|
|
} else {
|
|
last_harvest <- field_harvests$season_end[h - 1]
|
|
}
|
|
|
|
# Test dates from -21 to +14 days around harvest
|
|
test_dates_seq <- seq.Date(
|
|
from = harvest_date - CONFIG$test_window_days,
|
|
to = harvest_date + 14,
|
|
by = "1 day"
|
|
)
|
|
|
|
# Run prediction for each test date
|
|
event_results <- data.frame()
|
|
first_detection <- NULL
|
|
|
|
for (i in 1:length(test_dates_seq)) {
|
|
test_date <- test_dates_seq[i]
|
|
days_from_harvest <- as.numeric(test_date - harvest_date)
|
|
|
|
result <- predict_harvest_window(field_ts, test_date, last_harvest, CONFIG)
|
|
|
|
# Track first detection
|
|
if (result$predicted && is.null(first_detection)) {
|
|
first_detection <- list(
|
|
date = test_date,
|
|
days_before = -days_from_harvest,
|
|
confidence = result$confidence,
|
|
consecutive_days = result$consecutive_days_low,
|
|
mean_ci = result$mean_ci_sustained,
|
|
harvest_window = result$harvest_window
|
|
)
|
|
}
|
|
|
|
event_results <- bind_rows(event_results, data.frame(
|
|
harvest_event = h,
|
|
harvest_date = harvest_date,
|
|
test_date = test_date,
|
|
days_from_harvest = days_from_harvest,
|
|
predicted = result$predicted,
|
|
confidence = result$confidence,
|
|
current_ci = result$current_ci,
|
|
consecutive_days_low = result$consecutive_days_low,
|
|
field_age = result$field_age,
|
|
harvest_window = result$harvest_window
|
|
))
|
|
}
|
|
|
|
all_results[[h]] <- event_results
|
|
|
|
# Print harvest event summary
|
|
cat("--- Harvest Event", h, ":", format(harvest_date, "%Y-%m-%d"), "---\n")
|
|
|
|
if (!is.null(first_detection)) {
|
|
cat("✓ First prediction:", format(first_detection$date, "%Y-%m-%d"),
|
|
"(", first_detection$days_before, "days before harvest )\n")
|
|
cat(" Confidence:", first_detection$confidence, "\n")
|
|
cat(" Harvest window:", first_detection$harvest_window, "\n")
|
|
cat(" Consecutive days low CI:", first_detection$consecutive_days, "\n")
|
|
cat(" Mean CI during period:", round(first_detection$mean_ci, 2), "\n")
|
|
|
|
# Categorize detection timing
|
|
if (first_detection$days_before >= 7 && first_detection$days_before <= 21) {
|
|
cat(" ✓ GOOD: Detected in optimal window (7-21 days ahead)\n")
|
|
} else if (first_detection$days_before > 21) {
|
|
cat(" ⚠️ EARLY: Detected >21 days ahead\n")
|
|
} else if (first_detection$days_before >= 0) {
|
|
cat(" ⚠️ LATE: Detected <7 days ahead\n")
|
|
} else {
|
|
cat(" ✗ MISSED: Detected after harvest\n")
|
|
}
|
|
} else {
|
|
cat("✗ No prediction detected\n")
|
|
}
|
|
|
|
cat("\n")
|
|
|
|
# Build detection timing matrix
|
|
timing_row <- data.frame(harvest_event = h)
|
|
for (offset in c(-21, -14, -7, -3, -1, 0, 1, 3, 7, 14)) {
|
|
detected_on_day <- event_results %>%
|
|
filter(days_from_harvest == offset) %>%
|
|
pull(predicted)
|
|
|
|
timing_row[[paste0("d", ifelse(offset >= 0, "_plus_", "_minus_"), abs(offset))]] <-
|
|
ifelse(length(detected_on_day) > 0 && detected_on_day, "YES", "NO")
|
|
}
|
|
detection_timing <- bind_rows(detection_timing, timing_row)
|
|
}
|
|
|
|
# Print detection timing table
|
|
cat("\n", rep("=", 80), "\n", sep = "")
|
|
cat("PREDICTION TIMING TABLE\n")
|
|
cat("Columns: Days relative to harvest date\n")
|
|
cat(rep("=", 80), "\n\n", sep = "")
|
|
print(detection_timing, row.names = FALSE)
|
|
|
|
# Calculate summary statistics
|
|
all_results_df <- bind_rows(all_results)
|
|
|
|
# Find optimal prediction window (7-21 days before)
|
|
optimal_detections <- all_results_df %>%
|
|
filter(predicted == TRUE, days_from_harvest >= -21, days_from_harvest <= -7) %>%
|
|
group_by(harvest_event) %>%
|
|
slice(1) %>% # First detection in optimal window
|
|
ungroup()
|
|
|
|
early_detections <- all_results_df %>%
|
|
filter(predicted == TRUE, days_from_harvest < -21) %>%
|
|
group_by(harvest_event) %>%
|
|
slice(1) %>%
|
|
ungroup()
|
|
|
|
late_detections <- all_results_df %>%
|
|
filter(predicted == TRUE, days_from_harvest > -7) %>%
|
|
group_by(harvest_event) %>%
|
|
slice(1) %>%
|
|
ungroup()
|
|
|
|
total_harvests <- nrow(field_harvests)
|
|
|
|
cat("\n", rep("=", 80), "\n", sep = "")
|
|
cat("VALIDATION SUMMARY\n")
|
|
cat(rep("=", 80), "\n\n", sep = "")
|
|
cat("Total harvest events tested:", total_harvests, "\n\n")
|
|
cat("Predictions in OPTIMAL window (7-21 days ahead):", nrow(optimal_detections), "/", total_harvests,
|
|
"(", round(100 * nrow(optimal_detections) / total_harvests, 1), "% )\n")
|
|
cat("Predictions TOO EARLY (>21 days ahead):", nrow(early_detections), "\n")
|
|
cat("Predictions TOO LATE (<7 days ahead):", nrow(late_detections), "\n")
|
|
cat("Missed harvests:", total_harvests - nrow(optimal_detections) - nrow(early_detections) - nrow(late_detections), "\n\n")
|
|
|
|
# Overall detection rate
|
|
detected_total <- all_results_df %>%
|
|
filter(predicted == TRUE, days_from_harvest <= 0) %>%
|
|
distinct(harvest_event) %>%
|
|
nrow()
|
|
|
|
cat("Overall detection rate (any time before harvest):", detected_total, "/", total_harvests,
|
|
"(", round(100 * detected_total / total_harvests, 1), "% )\n\n")
|
|
|
|
# Return detailed results
|
|
invisible(list(
|
|
all_results = all_results_df,
|
|
detection_timing = detection_timing,
|
|
optimal_detections = optimal_detections,
|
|
summary = data.frame(
|
|
field = field_id,
|
|
total_harvests = total_harvests,
|
|
optimal_window = nrow(optimal_detections),
|
|
too_early = nrow(early_detections),
|
|
too_late = nrow(late_detections),
|
|
missed = total_harvests - detected_total,
|
|
detection_rate = round(100 * detected_total / total_harvests, 1)
|
|
)
|
|
))
|
|
}
|
|
|
|
# ============================================================================
|
|
# RUN VALIDATION
|
|
# ============================================================================
|
|
|
|
# Test on Field 00110 (from your graphs)
|
|
test_field <- "00110"
|
|
results <- validate_harvest_prediction(test_field)
|
|
|
|
cat("\n", rep("=", 80), "\n", sep = "")
|
|
cat("INTERPRETATION FOR FACTORY CLIENT\n")
|
|
cat(rep("=", 80), "\n\n", sep = "")
|
|
cat("This system provides ADVANCE WARNING when harvest is likely imminent:\n\n")
|
|
cat(" 📊 HARVEST WINDOW PREDICTIONS:\n")
|
|
cat(" - '7_days': Harvest expected within 7 days (IMMINENT)\n")
|
|
cat(" - '7_14_days': Harvest expected in 7-14 days (LIKELY)\n")
|
|
cat(" - '14_21_days': Harvest possible in 14-21 days (WATCH)\n\n")
|
|
cat(" ⚙️ DETECTION LOGIC:\n")
|
|
cat(" - CI below 2.5 for", CONFIG$sustained_low_days, "consecutive days = crop mature\n")
|
|
cat(" - Very low CI (<1.5) = harvest imminent (7 days)\n")
|
|
cat(" - Low CI (1.5-2.5) = harvest likely (7-14 days)\n\n")
|
|
cat(" 🏭 FACTORY USE CASE:\n")
|
|
cat(" - Factory gets 7-21 days advance notice to plan logistics\n")
|
|
cat(" - Can schedule processing capacity and transport\n")
|
|
cat(" - Avoids surprise harvest deliveries\n\n")
|
|
|
|
cat("=== ANALYSIS COMPLETE ===\n")
|