325 lines
11 KiB
R
325 lines
11 KiB
R
# Deep analysis of CI patterns ±30 days around actual harvest dates
|
|
# Goal: Find the exact signature of harvest - decline, bottom, stabilization
|
|
|
|
suppressPackageStartupMessages({
|
|
library(readxl)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(here)
|
|
library(ggplot2)
|
|
})
|
|
|
|
project_dir <- "esa"
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
source(here("r_app", "parameters_project.R"))
|
|
|
|
# Read daily CI data
|
|
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
|
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
|
|
|
daily_ci <- ci_data_raw %>%
|
|
mutate(date = as.Date(Date)) %>%
|
|
select(field_id = field, date, ci = FitData) %>%
|
|
arrange(field_id, date)
|
|
|
|
# Read actual harvest data
|
|
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
|
mutate(
|
|
season_start = as.Date(season_start),
|
|
season_end = as.Date(season_end)
|
|
) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
cat("=== ANALYZING CI PATTERNS AROUND ACTUAL HARVEST DATES ===\n\n")
|
|
|
|
# For each harvest, get ±30 days of data
|
|
harvest_windows <- list()
|
|
|
|
for (i in 1:nrow(harvest_actual)) {
|
|
harvest <- harvest_actual[i, ]
|
|
field <- harvest$field
|
|
harvest_date <- harvest$season_end
|
|
|
|
# Get CI data ±30 days
|
|
window_data <- daily_ci %>%
|
|
filter(field_id == field,
|
|
date >= (harvest_date - 30),
|
|
date <= (harvest_date + 30)) %>%
|
|
arrange(date) %>%
|
|
mutate(
|
|
day_offset = as.numeric(date - harvest_date), # Negative = before, positive = after
|
|
|
|
# Calculate daily changes
|
|
ci_change = ci - lag(ci),
|
|
ci_change_3d = ci - lag(ci, 3),
|
|
ci_change_7d = ci - lag(ci, 7),
|
|
|
|
# Calculate acceleration (rate of change of change)
|
|
ci_acceleration = ci_change - lag(ci_change),
|
|
|
|
# Rolling statistics
|
|
ci_mean_7d = zoo::rollmean(ci, k = 7, fill = NA, align = "center"),
|
|
ci_sd_7d = zoo::rollapply(ci, width = 7, FUN = sd, fill = NA, align = "center"),
|
|
ci_min_7d = zoo::rollapply(ci, width = 7, FUN = min, fill = NA, align = "center"),
|
|
|
|
# Detect stable periods (low variability)
|
|
is_stable = !is.na(ci_sd_7d) & ci_sd_7d < 0.3 & ci < 2.5
|
|
)
|
|
|
|
if (nrow(window_data) > 0) {
|
|
window_data$field_id <- field
|
|
window_data$harvest_date <- harvest_date
|
|
window_data$harvest_id <- i
|
|
harvest_windows[[i]] <- window_data
|
|
}
|
|
}
|
|
|
|
all_windows <- bind_rows(harvest_windows)
|
|
|
|
cat(sprintf("Analyzed %d harvest events with ±30 day windows\n\n", length(harvest_windows)))
|
|
|
|
# ============================================================================
|
|
# ANALYSIS 1: What happens at the exact harvest date?
|
|
# ============================================================================
|
|
|
|
cat("=== ANALYSIS 1: CI AT HARVEST DATE (day 0) ===\n")
|
|
|
|
harvest_day_stats <- all_windows %>%
|
|
filter(day_offset == 0) %>%
|
|
summarise(
|
|
count = n(),
|
|
mean_ci = mean(ci, na.rm = TRUE),
|
|
median_ci = median(ci, na.rm = TRUE),
|
|
sd_ci = sd(ci, na.rm = TRUE),
|
|
min_ci = min(ci, na.rm = TRUE),
|
|
max_ci = max(ci, na.rm = TRUE),
|
|
q25 = quantile(ci, 0.25, na.rm = TRUE),
|
|
q75 = quantile(ci, 0.75, na.rm = TRUE)
|
|
)
|
|
|
|
print(harvest_day_stats)
|
|
|
|
# ============================================================================
|
|
# ANALYSIS 2: When is the absolute minimum CI?
|
|
# ============================================================================
|
|
|
|
cat("\n=== ANALYSIS 2: WHEN DOES CI REACH MINIMUM? ===\n")
|
|
|
|
min_ci_timing <- all_windows %>%
|
|
group_by(harvest_id, field_id, harvest_date) %>%
|
|
summarise(
|
|
min_ci_value = min(ci, na.rm = TRUE),
|
|
min_ci_day = day_offset[which.min(ci)],
|
|
.groups = "drop"
|
|
)
|
|
|
|
cat(sprintf("\nWhen does MINIMUM CI occur relative to harvest date:\n"))
|
|
cat(sprintf(" Mean offset: %.1f days (%.1f = before harvest, + = after)\n",
|
|
mean(min_ci_timing$min_ci_day, na.rm = TRUE),
|
|
mean(min_ci_timing$min_ci_day, na.rm = TRUE)))
|
|
cat(sprintf(" Median offset: %.1f days\n", median(min_ci_timing$min_ci_day, na.rm = TRUE)))
|
|
cat(sprintf(" Range: %.0f to %.0f days\n",
|
|
min(min_ci_timing$min_ci_day, na.rm = TRUE),
|
|
max(min_ci_timing$min_ci_day, na.rm = TRUE)))
|
|
|
|
timing_distribution <- min_ci_timing %>%
|
|
mutate(timing_category = case_when(
|
|
min_ci_day < -7 ~ "Before harvest (>7d early)",
|
|
min_ci_day >= -7 & min_ci_day < 0 ~ "Just before harvest (0-7d early)",
|
|
min_ci_day == 0 ~ "On harvest date",
|
|
min_ci_day > 0 & min_ci_day <= 7 ~ "Just after harvest (0-7d late)",
|
|
min_ci_day > 7 ~ "After harvest (>7d late)"
|
|
)) %>%
|
|
count(timing_category)
|
|
|
|
print(timing_distribution)
|
|
|
|
# ============================================================================
|
|
# ANALYSIS 3: Decline rate before harvest
|
|
# ============================================================================
|
|
|
|
cat("\n=== ANALYSIS 3: DECLINE PATTERN BEFORE HARVEST ===\n")
|
|
|
|
decline_stats <- all_windows %>%
|
|
filter(day_offset >= -30 & day_offset < 0) %>%
|
|
group_by(week_before = ceiling(abs(day_offset) / 7)) %>%
|
|
summarise(
|
|
mean_ci = mean(ci, na.rm = TRUE),
|
|
mean_daily_change = mean(ci_change, na.rm = TRUE),
|
|
mean_7d_change = mean(ci_change_7d, na.rm = TRUE),
|
|
count = n(),
|
|
.groups = "drop"
|
|
) %>%
|
|
arrange(desc(week_before))
|
|
|
|
cat("\nCI decline by week before harvest:\n")
|
|
print(decline_stats)
|
|
|
|
# ============================================================================
|
|
# ANALYSIS 4: Stabilization after harvest
|
|
# ============================================================================
|
|
|
|
cat("\n=== ANALYSIS 4: WHEN DOES CI STABILIZE (stop declining)? ===\n")
|
|
|
|
stabilization <- all_windows %>%
|
|
filter(day_offset >= 0 & day_offset <= 30) %>%
|
|
group_by(day_offset) %>%
|
|
summarise(
|
|
mean_ci = mean(ci, na.rm = TRUE),
|
|
sd_ci = sd(ci, na.rm = TRUE),
|
|
pct_stable = 100 * mean(is_stable, na.rm = TRUE),
|
|
mean_daily_change = mean(ci_change, na.rm = TRUE),
|
|
.groups = "drop"
|
|
)
|
|
|
|
cat("\nPost-harvest stabilization (first 14 days):\n")
|
|
print(stabilization %>% filter(day_offset <= 14))
|
|
|
|
# Find first day where >50% of fields show stable CI
|
|
first_stable_day <- stabilization %>%
|
|
filter(pct_stable > 50) %>%
|
|
summarise(first_day = min(day_offset, na.rm = TRUE))
|
|
|
|
cat(sprintf("\n>50%% of fields show stable CI by day +%.0f after harvest\n",
|
|
first_stable_day$first_day))
|
|
|
|
# ============================================================================
|
|
# ANALYSIS 5: Threshold crossings
|
|
# ============================================================================
|
|
|
|
cat("\n=== ANALYSIS 5: THRESHOLD CROSSINGS BEFORE HARVEST ===\n")
|
|
|
|
thresholds <- c(3.0, 2.5, 2.0, 1.8, 1.5)
|
|
|
|
threshold_stats <- lapply(thresholds, function(thresh) {
|
|
crossings <- all_windows %>%
|
|
filter(day_offset < 0) %>%
|
|
group_by(harvest_id) %>%
|
|
summarise(
|
|
first_below = min(day_offset[ci < thresh], na.rm = TRUE),
|
|
.groups = "drop"
|
|
) %>%
|
|
filter(is.finite(first_below))
|
|
|
|
if (nrow(crossings) > 0) {
|
|
data.frame(
|
|
threshold = thresh,
|
|
n_crossed = nrow(crossings),
|
|
mean_days_before = mean(abs(crossings$first_below)),
|
|
median_days_before = median(abs(crossings$first_below)),
|
|
pct_crossed = 100 * nrow(crossings) / length(unique(all_windows$harvest_id))
|
|
)
|
|
} else {
|
|
data.frame(threshold = thresh, n_crossed = 0, mean_days_before = NA,
|
|
median_days_before = NA, pct_crossed = 0)
|
|
}
|
|
}) %>% bind_rows()
|
|
|
|
print(threshold_stats)
|
|
|
|
# ============================================================================
|
|
# VISUALIZATION
|
|
# ============================================================================
|
|
|
|
cat("\n=== CREATING VISUALIZATION ===\n")
|
|
|
|
# Average CI pattern across all harvests
|
|
avg_pattern <- all_windows %>%
|
|
group_by(day_offset) %>%
|
|
summarise(
|
|
mean_ci = mean(ci, na.rm = TRUE),
|
|
median_ci = median(ci, na.rm = TRUE),
|
|
q25_ci = quantile(ci, 0.25, na.rm = TRUE),
|
|
q75_ci = quantile(ci, 0.75, na.rm = TRUE),
|
|
sd_ci = sd(ci, na.rm = TRUE),
|
|
.groups = "drop"
|
|
)
|
|
|
|
png("harvest_ci_pattern_analysis.png", width = 1400, height = 900, res = 120)
|
|
|
|
par(mfrow = c(2, 2))
|
|
|
|
# Plot 1: Average CI pattern
|
|
plot(avg_pattern$day_offset, avg_pattern$mean_ci, type = "l", lwd = 2,
|
|
xlab = "Days from harvest", ylab = "CI",
|
|
main = "Average CI Pattern Around Harvest",
|
|
ylim = c(0, max(avg_pattern$q75_ci, na.rm = TRUE)))
|
|
polygon(c(avg_pattern$day_offset, rev(avg_pattern$day_offset)),
|
|
c(avg_pattern$q25_ci, rev(avg_pattern$q75_ci)),
|
|
col = rgb(0, 0, 1, 0.2), border = NA)
|
|
abline(v = 0, col = "red", lty = 2, lwd = 2)
|
|
abline(h = c(1.5, 2.0, 2.5), col = c("blue", "orange", "green"), lty = 3)
|
|
legend("topright", legend = c("Mean CI", "Q25-Q75", "Harvest date", "Thresholds 1.5, 2.0, 2.5"),
|
|
lwd = c(2, 8, 2, 1), col = c("black", rgb(0,0,1,0.2), "red", "blue"))
|
|
|
|
# Plot 2: Daily change rate
|
|
avg_change <- all_windows %>%
|
|
filter(!is.na(ci_change)) %>%
|
|
group_by(day_offset) %>%
|
|
summarise(mean_change = mean(ci_change, na.rm = TRUE), .groups = "drop")
|
|
|
|
plot(avg_change$day_offset, avg_change$mean_change, type = "l", lwd = 2,
|
|
xlab = "Days from harvest", ylab = "Daily CI change",
|
|
main = "Rate of CI Change")
|
|
abline(v = 0, col = "red", lty = 2)
|
|
abline(h = 0, col = "gray", lty = 3)
|
|
|
|
# Plot 3: Minimum CI timing distribution
|
|
hist(min_ci_timing$min_ci_day, breaks = 20,
|
|
xlab = "Day offset when minimum CI occurs",
|
|
main = "When Does CI Reach Minimum?",
|
|
col = "lightblue")
|
|
abline(v = 0, col = "red", lwd = 2, lty = 2)
|
|
abline(v = median(min_ci_timing$min_ci_day, na.rm = TRUE), col = "blue", lwd = 2)
|
|
|
|
# Plot 4: Threshold crossing timing
|
|
barplot(threshold_stats$median_days_before,
|
|
names.arg = threshold_stats$threshold,
|
|
xlab = "CI Threshold",
|
|
ylab = "Median days before harvest",
|
|
main = "When Are Thresholds Crossed?",
|
|
col = "lightgreen")
|
|
|
|
dev.off()
|
|
|
|
cat("\nPlot saved: harvest_ci_pattern_analysis.png\n")
|
|
|
|
# ============================================================================
|
|
# RECOMMENDATIONS
|
|
# ============================================================================
|
|
|
|
cat("\n=== RECOMMENDATIONS FOR HARVEST DETECTION ===\n\n")
|
|
|
|
# Find best threshold based on timing
|
|
best_threshold <- threshold_stats %>%
|
|
filter(median_days_before >= 7 & median_days_before <= 14) %>%
|
|
arrange(desc(pct_crossed))
|
|
|
|
if (nrow(best_threshold) > 0) {
|
|
cat(sprintf("BEST EARLY WARNING THRESHOLD: CI < %.1f\n", best_threshold$threshold[1]))
|
|
cat(sprintf(" - Crossed %.0f%% of the time\n", best_threshold$pct_crossed[1]))
|
|
cat(sprintf(" - Median %.1f days before harvest\n", best_threshold$median_days_before[1]))
|
|
cat(sprintf(" - MESSAGE: 'Harvest expected within 7-14 days'\n\n"))
|
|
}
|
|
|
|
cat("HARVEST COMPLETION SIGNAL:\n")
|
|
cat(sprintf(" - Look for stabilization: SD < 0.3 for 7 days\n"))
|
|
cat(sprintf(" - Typically occurs around day +%.0f after reported harvest\n", first_stable_day$first_day))
|
|
cat(sprintf(" - MESSAGE: 'Harvest likely completed in recent days'\n\n"))
|
|
|
|
cat("SHARP DECLINE DETECTION:\n")
|
|
sharp_decline_threshold <- -0.5 # CI dropping >0.5 per day
|
|
sharp_declines <- all_windows %>%
|
|
filter(!is.na(ci_change) & ci_change < sharp_decline_threshold) %>%
|
|
group_by(day_offset) %>%
|
|
summarise(count = n(), .groups = "drop") %>%
|
|
filter(day_offset < 0) %>%
|
|
arrange(desc(count))
|
|
|
|
if (nrow(sharp_declines) > 0) {
|
|
cat(sprintf(" - Sharp drops (>0.5/day) most common at day %.0f before harvest\n",
|
|
sharp_declines$day_offset[1]))
|
|
cat(sprintf(" - Can trigger immediate alert: 'Sharp decline detected - investigate field'\n"))
|
|
}
|