157 lines
5.1 KiB
R
157 lines
5.1 KiB
R
# Examine CI patterns for fields where detection failed
|
|
suppressPackageStartupMessages({
|
|
library(readxl)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(here)
|
|
library(ggplot2)
|
|
})
|
|
|
|
project_dir <- "esa"
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
source(here("r_app", "parameters_project.R"))
|
|
|
|
# Load daily CI data
|
|
ci_data_raw <- readRDS(here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% ungroup()
|
|
|
|
time_series <- ci_data_raw %>%
|
|
mutate(
|
|
date = as.Date(Date),
|
|
week = isoweek(date),
|
|
year = isoyear(date)
|
|
) %>%
|
|
select(field_id = field, date, week, year, mean_ci = FitData) %>%
|
|
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
|
|
arrange(field_id, date)
|
|
|
|
# Load harvest data
|
|
harvest_actual <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
|
mutate(
|
|
season_start = as.Date(season_start),
|
|
season_end = as.Date(season_end)
|
|
) %>%
|
|
filter(field %in% unique(field_boundaries_sf$field)) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
cat("=== EXAMINING MISSED HARVESTS ===\n\n")
|
|
|
|
# Focus on specific fields mentioned: 00302, 00F28
|
|
test_fields <- c("00302", "00F28")
|
|
|
|
for (field_name in test_fields) {
|
|
cat("\n\n===============================================\n")
|
|
cat("FIELD:", field_name, "\n")
|
|
cat("===============================================\n\n")
|
|
|
|
# Get harvest dates for this field
|
|
field_harvests <- harvest_actual %>%
|
|
filter(field == field_name) %>%
|
|
arrange(season_end) %>%
|
|
mutate(harvest_week = isoweek(season_end))
|
|
|
|
cat("Harvest dates:\n")
|
|
print(field_harvests %>% select(year, season_end, harvest_week))
|
|
|
|
# Get time series for this field
|
|
field_ts <- time_series %>%
|
|
filter(field_id == field_name) %>%
|
|
arrange(date) %>%
|
|
mutate(
|
|
# Add rolling average
|
|
ci_smooth = zoo::rollmean(mean_ci, k = 7, fill = NA, align = "center"),
|
|
# Add week-over-week change
|
|
ci_lag7 = lag(ci_smooth, 7),
|
|
ci_drop = ci_lag7 - ci_smooth
|
|
)
|
|
|
|
# For each harvest, show ±30 days of data
|
|
for (i in 1:nrow(field_harvests)) {
|
|
h_date <- field_harvests$season_end[i]
|
|
h_year <- field_harvests$year[i]
|
|
|
|
cat("\n--- Harvest", i, ":", as.character(h_date), "(Year:", h_year, ") ---\n")
|
|
|
|
window_data <- field_ts %>%
|
|
filter(date >= (h_date - 30), date <= (h_date + 30)) %>%
|
|
mutate(
|
|
days_from_harvest = as.numeric(date - h_date),
|
|
is_harvest_week = abs(days_from_harvest) <= 3
|
|
)
|
|
|
|
if (nrow(window_data) > 0) {
|
|
cat("\nCI values around harvest (±30 days):\n")
|
|
|
|
# Summary by week relative to harvest
|
|
weekly_summary <- window_data %>%
|
|
mutate(week_offset = floor(days_from_harvest / 7)) %>%
|
|
group_by(week_offset) %>%
|
|
summarise(
|
|
n_days = n(),
|
|
mean_ci = mean(mean_ci, na.rm = TRUE),
|
|
min_ci = min(mean_ci, na.rm = TRUE),
|
|
max_ci = max(mean_ci, na.rm = TRUE),
|
|
mean_drop = mean(ci_drop, na.rm = TRUE),
|
|
max_drop = max(ci_drop, na.rm = TRUE),
|
|
.groups = "drop"
|
|
) %>%
|
|
arrange(week_offset)
|
|
|
|
print(weekly_summary)
|
|
|
|
# Check detection conditions
|
|
cat("\nDetection analysis:\n")
|
|
|
|
# Count days meeting different thresholds
|
|
low_ci_1.5 <- sum(window_data$mean_ci < 1.5, na.rm = TRUE)
|
|
low_ci_2.0 <- sum(window_data$mean_ci < 2.0, na.rm = TRUE)
|
|
low_ci_2.5 <- sum(window_data$mean_ci < 2.5, na.rm = TRUE)
|
|
|
|
drop_0.3 <- sum(window_data$ci_drop > 0.3, na.rm = TRUE)
|
|
drop_0.5 <- sum(window_data$ci_drop > 0.5, na.rm = TRUE)
|
|
drop_0.8 <- sum(window_data$ci_drop > 0.8, na.rm = TRUE)
|
|
|
|
cat(" Days with CI < 1.5:", low_ci_1.5, "\n")
|
|
cat(" Days with CI < 2.0:", low_ci_2.0, "\n")
|
|
cat(" Days with CI < 2.5:", low_ci_2.5, "\n")
|
|
cat(" Days with drop > 0.3:", drop_0.3, "\n")
|
|
cat(" Days with drop > 0.5:", drop_0.5, "\n")
|
|
cat(" Days with drop > 0.8:", drop_0.8, "\n")
|
|
|
|
# Check if there's a sustained low period
|
|
consecutive_low <- window_data %>%
|
|
mutate(is_low = mean_ci < 2.0) %>%
|
|
filter(is_low) %>%
|
|
mutate(
|
|
date_diff = as.numeric(date - lag(date)),
|
|
new_group = is.na(date_diff) | date_diff > 3
|
|
) %>%
|
|
mutate(group_id = cumsum(new_group)) %>%
|
|
group_by(group_id) %>%
|
|
summarise(
|
|
start = min(date),
|
|
end = max(date),
|
|
duration = n(),
|
|
mean_ci = mean(mean_ci),
|
|
.groups = "drop"
|
|
) %>%
|
|
arrange(desc(duration))
|
|
|
|
if (nrow(consecutive_low) > 0) {
|
|
cat("\nLongest consecutive low CI periods (CI < 2.0):\n")
|
|
print(head(consecutive_low, 3))
|
|
} else {
|
|
cat("\nNo sustained low CI periods found (CI < 2.0)\n")
|
|
}
|
|
|
|
} else {
|
|
cat("No data available in this window\n")
|
|
}
|
|
}
|
|
}
|
|
|
|
cat("\n\n=== SUMMARY ===\n")
|
|
cat("This analysis shows why harvests were missed.\n")
|
|
cat("If CI doesn't drop low enough OR doesn't stay low for enough consecutive days,\n")
|
|
cat("the detection algorithm won't trigger.\n")
|