273 lines
10 KiB
R
273 lines
10 KiB
R
# State-based harvest detection considering crop lifecycle
|
|
# Detects: GROWING → MATURING → DECLINING → HARVEST → RECOVERING
|
|
suppressPackageStartupMessages({
|
|
library(readxl)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(terra)
|
|
library(sf)
|
|
library(here)
|
|
})
|
|
|
|
# Set project directory
|
|
project_dir <- "esa"
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
|
|
source(here("r_app", "parameters_project.R"))
|
|
|
|
# Read pre-extracted CI data
|
|
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
|
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
|
|
|
time_series_daily <- ci_data_raw %>%
|
|
mutate(
|
|
date = as.Date(Date),
|
|
week = isoweek(date),
|
|
year = isoyear(date)
|
|
) %>%
|
|
select(
|
|
field_id = field,
|
|
date,
|
|
week,
|
|
year,
|
|
mean_ci = FitData
|
|
) %>%
|
|
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
|
|
arrange(field_id, date)
|
|
|
|
cat("Loaded", nrow(time_series_daily), "daily observations\n\n")
|
|
|
|
# ==============================================================================
|
|
# STATE-BASED HARVEST DETECTION
|
|
# ==============================================================================
|
|
|
|
detect_harvest_stateful <- function(daily_ts, field_name,
|
|
mature_ci = 3.5, # CI > this = mature crop
|
|
harvest_ci = 2.5, # CI < this = harvest phase
|
|
mature_window = 30, # Days to confirm mature state
|
|
decline_rate = -0.02, # CI/day decline rate to detect pre-harvest
|
|
harvest_min_days = 14, # Minimum days below harvest_ci (increased to delay detection)
|
|
recovery_threshold = 3.0) { # CI rising above this = recovery
|
|
|
|
field_ts <- daily_ts %>%
|
|
filter(field_id == field_name) %>%
|
|
arrange(date) %>%
|
|
mutate(
|
|
# Smoothing: 7-day rolling median to reduce noise
|
|
ci_smooth = zoo::rollmedian(mean_ci, k = 7, fill = NA, align = "center"),
|
|
ci_smooth = ifelse(is.na(ci_smooth), mean_ci, ci_smooth),
|
|
|
|
# Trend: 14-day rolling slope (CI change rate)
|
|
ci_trend = (ci_smooth - lag(ci_smooth, 14)) / 14,
|
|
|
|
# Rolling statistics for context
|
|
ci_mean_60d = zoo::rollmean(ci_smooth, k = 60, fill = NA, align = "right"),
|
|
ci_max_60d = zoo::rollmax(ci_smooth, k = 60, fill = NA, align = "right")
|
|
)
|
|
|
|
if (nrow(field_ts) < 100) {
|
|
return(tibble(
|
|
field_id = character(),
|
|
harvest_date = as.Date(character()),
|
|
harvest_week = numeric(),
|
|
harvest_year = numeric(),
|
|
state = character(),
|
|
ci_at_harvest = numeric()
|
|
))
|
|
}
|
|
|
|
# State machine: track crop lifecycle states
|
|
field_ts <- field_ts %>%
|
|
mutate(
|
|
# Define states based on CI level and trend
|
|
is_mature = ci_smooth > mature_ci & ci_mean_60d > mature_ci,
|
|
is_declining = ci_trend < decline_rate & !is.na(ci_trend),
|
|
is_harvest = ci_smooth < harvest_ci,
|
|
is_recovering = ci_smooth > recovery_threshold & ci_trend > 0.01
|
|
)
|
|
|
|
# Detect harvest events: MATURE phase → CI drops below threshold → declare harvest
|
|
harvests <- tibble()
|
|
i <- mature_window + 1
|
|
last_harvest_date <- as.Date("1900-01-01")
|
|
consecutive_low_days <- 0
|
|
potential_harvest_start <- NA
|
|
|
|
while (i <= nrow(field_ts)) {
|
|
current_date <- field_ts$date[i]
|
|
days_since_last_harvest <- as.numeric(current_date - last_harvest_date)
|
|
|
|
# Only look for new harvest if enough time has passed (min 6 months)
|
|
if (days_since_last_harvest > 180) {
|
|
|
|
# Check if currently in low CI period
|
|
if (field_ts$is_harvest[i]) {
|
|
if (consecutive_low_days == 0) {
|
|
# Start of new low period - check if came from mature state
|
|
recent_was_mature <- any(field_ts$is_mature[(max(1,i-60)):(i-1)], na.rm = TRUE)
|
|
|
|
if (recent_was_mature) {
|
|
potential_harvest_start <- current_date
|
|
consecutive_low_days <- 1
|
|
}
|
|
} else {
|
|
consecutive_low_days <- consecutive_low_days + 1
|
|
}
|
|
|
|
# Declare harvest after consecutive low days threshold met
|
|
if (consecutive_low_days == harvest_min_days) {
|
|
harvests <- bind_rows(harvests, tibble(
|
|
field_id = field_name,
|
|
harvest_date = potential_harvest_start,
|
|
harvest_week = isoweek(potential_harvest_start),
|
|
harvest_year = isoyear(potential_harvest_start),
|
|
state = "APPROACHING", # Stage 1: CI declining, harvest approaching
|
|
alert_message = "⚠️ Field CI declining - harvest expected in 2-4 weeks",
|
|
ci_at_harvest = field_ts$ci_smooth[field_ts$date == potential_harvest_start],
|
|
low_days = consecutive_low_days
|
|
))
|
|
|
|
last_harvest_date <- potential_harvest_start
|
|
}
|
|
} else {
|
|
# CI rose above threshold - reset counter
|
|
consecutive_low_days <- 0
|
|
potential_harvest_start <- NA
|
|
}
|
|
}
|
|
|
|
i <- i + 1
|
|
}
|
|
|
|
# ============================================================================
|
|
# STAGE 2: Detect harvest completion (CI stabilized at low level)
|
|
# ============================================================================
|
|
|
|
# For each detected "APPROACHING" harvest, check if we can upgrade to "COMPLETED"
|
|
if (nrow(harvests) > 0) {
|
|
for (h in 1:nrow(harvests)) {
|
|
if (harvests$state[h] == "APPROACHING") {
|
|
approach_date <- harvests$harvest_date[h]
|
|
|
|
# Look 7-21 days after approach detection for stabilization
|
|
stable_window <- field_ts %>%
|
|
filter(date >= approach_date + 7, date <= approach_date + 21)
|
|
|
|
if (nrow(stable_window) >= 7) {
|
|
# Calculate stability: low CI with low variability
|
|
stable_window <- stable_window %>%
|
|
mutate(
|
|
ci_sd_7d = zoo::rollapply(ci_smooth, width = 7, FUN = sd, fill = NA, align = "center")
|
|
)
|
|
|
|
# Check if CI is stable (SD < 0.3) and low (< 2.0) for at least 7 days
|
|
stable_days <- stable_window %>%
|
|
filter(!is.na(ci_sd_7d), ci_sd_7d < 0.3, ci_smooth < 2.0) %>%
|
|
nrow()
|
|
|
|
if (stable_days >= 7) {
|
|
# Upgrade to COMPLETED
|
|
harvests$state[h] <- "COMPLETED"
|
|
harvests$alert_message[h] <- "✓ Harvest likely completed in recent days - CI stable at low level"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Remove the low_days column before returning to match expected schema
|
|
harvests <- harvests %>% select(-low_days, -alert_message)
|
|
|
|
return(harvests)
|
|
}
|
|
|
|
cat("Running state-based harvest detection...\n")
|
|
all_harvests <- lapply(unique(time_series_daily$field_id), function(field_name) {
|
|
detect_harvest_stateful(daily_ts = time_series_daily, field_name)
|
|
}) %>% bind_rows()
|
|
|
|
cat("Detected", nrow(all_harvests), "harvest events\n")
|
|
cat(" APPROACHING (CI declining):", sum(all_harvests$state == "APPROACHING"), "\n")
|
|
cat(" COMPLETED (CI stable low):", sum(all_harvests$state == "COMPLETED"), "\n\n")
|
|
|
|
# ==============================================================================
|
|
# COMPARE WITH ACTUAL HARVEST DATA
|
|
# ==============================================================================
|
|
|
|
harvest_actual_all <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
|
mutate(
|
|
season_start = as.Date(season_start),
|
|
season_end = as.Date(season_end)
|
|
) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
fields_with_data <- unique(field_boundaries_sf$field)
|
|
|
|
harvest_actual <- harvest_actual_all %>%
|
|
filter(field %in% fields_with_data) %>%
|
|
filter(!is.na(season_end)) %>%
|
|
mutate(
|
|
actual_harvest_week = isoweek(season_end),
|
|
actual_harvest_year = isoyear(season_end)
|
|
)
|
|
|
|
cat("=== COMPARISON: STATE-BASED DETECTION vs ACTUAL ===\n\n")
|
|
|
|
harvest_actual2 <- harvest_actual %>%
|
|
select(field, actual_week = actual_harvest_week, actual_year = actual_harvest_year)
|
|
|
|
harvest_detected2 <- all_harvests %>%
|
|
select(field_id, detected_week = harvest_week, detected_year = harvest_year,
|
|
state, ci_at_harvest)
|
|
|
|
comparison <- harvest_actual2 %>%
|
|
full_join(
|
|
harvest_detected2,
|
|
by = c("field" = "field_id", "actual_year" = "detected_year")
|
|
) %>%
|
|
mutate(
|
|
week_difference_signed = ifelse(!is.na(actual_week) & !is.na(detected_week),
|
|
detected_week - actual_week, NA), # Negative = detected early
|
|
week_difference = abs(week_difference_signed),
|
|
status = case_when(
|
|
!is.na(actual_week) & !is.na(detected_week) & week_difference <= 2 ~ "✓ MATCHED",
|
|
!is.na(actual_week) & !is.na(detected_week) & week_difference > 2 ~ paste0("⚠ MISMATCH (", ifelse(week_difference_signed < 0, week_difference_signed, paste0("+", week_difference_signed)), "w)"),
|
|
is.na(actual_week) & !is.na(detected_week) ~ "⚠ FALSE POSITIVE",
|
|
!is.na(actual_week) & is.na(detected_week) ~ "✗ MISSED",
|
|
TRUE ~ "Unknown"
|
|
)
|
|
) %>%
|
|
select(field, actual_year, actual_week, detected_week, week_diff = week_difference_signed,
|
|
status, state, ci_at_harvest) %>%
|
|
filter(!is.na(actual_week)) %>% # Only compare against actual recorded harvests
|
|
arrange(field, actual_year)
|
|
|
|
cat("Filtered to only fields with recorded harvest dates\n")
|
|
cat("(Removed rows where actual_week = NA)\n\n")
|
|
print(comparison, n = 100)
|
|
|
|
cat("\n\n=== SUMMARY STATISTICS (FILTERED DATA ONLY) ===\n")
|
|
matched <- sum(comparison$status == "✓ MATCHED", na.rm = TRUE)
|
|
false_pos <- sum(comparison$status == "⚠ FALSE POSITIVE", na.rm = TRUE)
|
|
missed <- sum(comparison$status == "✗ MISSED", na.rm = TRUE)
|
|
mismatch <- sum(grepl("MISMATCH", comparison$status), na.rm = TRUE)
|
|
|
|
cat("Total actual harvest events (with records):", nrow(harvest_actual), "\n")
|
|
cat("Total rows in filtered comparison:", nrow(comparison), "\n\n")
|
|
|
|
cat("✓ MATCHED (±2 weeks):", matched, "\n")
|
|
cat("⚠ WEEK MISMATCH (>2 weeks):", mismatch, "\n")
|
|
cat("⚠ FALSE POSITIVES:", false_pos, "\n")
|
|
cat("✗ MISSED:", missed, "\n\n")
|
|
|
|
if (nrow(harvest_actual) > 0) {
|
|
cat("Detection rate:", round(100 * (matched + mismatch) / nrow(harvest_actual), 1), "%\n")
|
|
cat("Accuracy (within 2 weeks):", round(100 * matched / nrow(harvest_actual), 1), "%\n")
|
|
}
|
|
|
|
cat("\n\nDetection approach: STATE-BASED\n")
|
|
cat("States: MATURE (CI>3.5) → DECLINING (slope<-0.02) → HARVEST (CI<2.5) → RECOVERY (CI rising)\n")
|
|
cat("Natural duplicate prevention: Must be 6+ months since last harvest to enter new cycle\n")
|
|
cat("Confirmation: Only counts as harvest if followed by recovery (CI rising)\n")
|