197 lines
6.5 KiB
R
197 lines
6.5 KiB
R
# Analyze specific MISSED harvests to understand why detection failed
|
|
suppressPackageStartupMessages({
|
|
library(readxl)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(terra)
|
|
library(sf)
|
|
library(here)
|
|
})
|
|
|
|
# Set project directory
|
|
project_dir <- "esa"
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
|
|
# Source required files
|
|
cat("Loading project configuration...\n")
|
|
source(here("r_app", "parameters_project.R"))
|
|
|
|
# Read pre-extracted CI data
|
|
ci_rds_file <- here("laravel_app/storage/app", project_dir, "Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
|
ci_data_raw <- readRDS(ci_rds_file) %>% ungroup()
|
|
|
|
time_series <- ci_data_raw %>%
|
|
mutate(
|
|
date = as.Date(Date),
|
|
week = isoweek(date),
|
|
year = isoyear(date)
|
|
) %>%
|
|
select(
|
|
field_id = field,
|
|
date,
|
|
week,
|
|
year,
|
|
mean_ci = FitData
|
|
) %>%
|
|
filter(!is.na(mean_ci), !is.na(date), !is.na(field_id)) %>%
|
|
arrange(field_id, date)
|
|
|
|
# Read actual harvest data
|
|
harvest_actual_all <- read_excel('laravel_app/storage/app/esa/Data/harvest.xlsx') %>%
|
|
mutate(
|
|
season_start = as.Date(season_start),
|
|
season_end = as.Date(season_end)
|
|
) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
fields_with_data <- unique(field_boundaries_sf$field)
|
|
|
|
harvest_actual <- harvest_actual_all %>%
|
|
filter(field %in% fields_with_data) %>%
|
|
filter(!is.na(season_end))
|
|
|
|
cat("=== ANALYZING MISSED HARVESTS ===\n\n")
|
|
|
|
# Fields that were missed in detection results (from previous output)
|
|
missed_cases <- c("00302", "00F25", "00F28", "00P81", "00P82", "00P83", "00P84", "KHWA", "KHWB", "KHWC", "LOMDA")
|
|
|
|
# Analyze each missed field's harvests
|
|
for (field_name in missed_cases[1:5]) { # Analyze first 5 fields
|
|
|
|
field_harvests <- harvest_actual %>%
|
|
filter(field == field_name) %>%
|
|
arrange(season_end)
|
|
|
|
if (nrow(field_harvests) == 0) next
|
|
|
|
cat("\n========================================\n")
|
|
cat("FIELD:", field_name, "\n")
|
|
cat("Total harvests:", nrow(field_harvests), "\n")
|
|
cat("========================================\n\n")
|
|
|
|
# Analyze each harvest for this field
|
|
for (i in 1:min(3, nrow(field_harvests))) { # First 3 harvests
|
|
harvest_date <- field_harvests$season_end[i]
|
|
harvest_week <- isoweek(harvest_date)
|
|
harvest_year <- isoyear(harvest_date)
|
|
|
|
cat("\n--- Harvest", i, "---\n")
|
|
cat("Date:", as.character(harvest_date), "(Week", harvest_week, harvest_year, ")\n\n")
|
|
|
|
# Get CI values around this harvest
|
|
harvest_window <- time_series %>%
|
|
filter(
|
|
field_id == field_name,
|
|
date >= (harvest_date - 30),
|
|
date <= (harvest_date + 30)
|
|
) %>%
|
|
mutate(
|
|
days_from_harvest = as.numeric(date - harvest_date),
|
|
ci_smooth = zoo::rollmean(mean_ci, k = 7, fill = NA, align = "center"),
|
|
ci_lag7 = lag(ci_smooth, 7),
|
|
ci_drop = ci_lag7 - ci_smooth,
|
|
is_low_1.5 = mean_ci < 1.5,
|
|
is_low_2.0 = mean_ci < 2.0,
|
|
is_low_2.5 = mean_ci < 2.5,
|
|
is_drop_0.3 = ci_drop > 0.3,
|
|
is_drop_0.5 = ci_drop > 0.5
|
|
)
|
|
|
|
if (nrow(harvest_window) == 0) {
|
|
cat(" NO DATA available for this harvest period\n")
|
|
next
|
|
}
|
|
|
|
# Summary statistics
|
|
cat("CI Summary (±30 days):\n")
|
|
cat(" Min CI:", round(min(harvest_window$mean_ci, na.rm = TRUE), 2), "\n")
|
|
cat(" Max CI:", round(max(harvest_window$mean_ci, na.rm = TRUE), 2), "\n")
|
|
cat(" Mean CI:", round(mean(harvest_window$mean_ci, na.rm = TRUE), 2), "\n")
|
|
|
|
# CI at/near harvest date
|
|
near_harvest <- harvest_window %>%
|
|
filter(abs(days_from_harvest) <= 3) %>%
|
|
arrange(abs(days_from_harvest))
|
|
|
|
if (nrow(near_harvest) > 0) {
|
|
cat(" CI at harvest date (±3 days):", round(near_harvest$mean_ci[1], 2), "\n")
|
|
}
|
|
|
|
# Find minimum CI and when it occurred
|
|
min_ci_row <- harvest_window %>%
|
|
filter(mean_ci == min(mean_ci, na.rm = TRUE)) %>%
|
|
head(1)
|
|
|
|
cat(" Minimum CI:", round(min_ci_row$mean_ci, 2), "at day", min_ci_row$days_from_harvest, "\n\n")
|
|
|
|
# Count days below different thresholds
|
|
cat("Days with low CI:\n")
|
|
cat(" CI < 1.5:", sum(harvest_window$is_low_1.5, na.rm = TRUE), "days\n")
|
|
cat(" CI < 2.0:", sum(harvest_window$is_low_2.0, na.rm = TRUE), "days\n")
|
|
cat(" CI < 2.5:", sum(harvest_window$is_low_2.5, na.rm = TRUE), "days\n\n")
|
|
|
|
# Find longest consecutive period below threshold
|
|
for (threshold in c(1.5, 2.0, 2.5)) {
|
|
consecutive <- harvest_window %>%
|
|
arrange(date) %>%
|
|
mutate(
|
|
is_low = mean_ci < threshold,
|
|
day_diff = as.numeric(date - lag(date)),
|
|
new_period = is.na(day_diff) | day_diff > 3 | !is_low,
|
|
period_id = cumsum(new_period)
|
|
) %>%
|
|
filter(is_low) %>%
|
|
group_by(period_id) %>%
|
|
summarise(
|
|
start_day = min(days_from_harvest),
|
|
end_day = max(days_from_harvest),
|
|
duration = n(),
|
|
mean_ci_period = mean(mean_ci),
|
|
.groups = "drop"
|
|
) %>%
|
|
arrange(desc(duration))
|
|
|
|
if (nrow(consecutive) > 0) {
|
|
longest <- consecutive[1, ]
|
|
cat("Longest consecutive period (CI <", threshold, "):\n")
|
|
cat(" Duration:", longest$duration, "days\n")
|
|
cat(" Start day:", longest$start_day, ", End day:", longest$end_day, "\n")
|
|
cat(" Mean CI:", round(longest$mean_ci_period, 2), "\n\n")
|
|
}
|
|
}
|
|
|
|
# Show when significant drops occurred
|
|
drops <- harvest_window %>%
|
|
filter(!is.na(ci_drop), ci_drop > 0.3) %>%
|
|
arrange(days_from_harvest)
|
|
|
|
if (nrow(drops) > 0) {
|
|
cat("Significant CI drops (>0.3) detected:\n")
|
|
cat(" First drop at day:", drops$days_from_harvest[1], "(drop:", round(drops$ci_drop[1], 2), ")\n")
|
|
if (nrow(drops) > 1) {
|
|
cat(" Total drops detected:", nrow(drops), "\n")
|
|
}
|
|
cat("\n")
|
|
} else {
|
|
cat("No significant CI drops (>0.3) detected in this period\n\n")
|
|
}
|
|
|
|
# Show daily data around harvest
|
|
cat("Daily CI values (days -7 to +21):\n")
|
|
daily_view <- harvest_window %>%
|
|
filter(days_from_harvest >= -7, days_from_harvest <= 21) %>%
|
|
select(days_from_harvest, date, mean_ci, is_low_2.0) %>%
|
|
arrange(days_from_harvest)
|
|
|
|
print(daily_view, n = 100)
|
|
}
|
|
}
|
|
|
|
cat("\n\n=== SUMMARY ===\n")
|
|
cat("Key observations:\n")
|
|
cat("1. Check if CI actually drops below 2.0 around harvest dates\n")
|
|
cat("2. Check when the minimum CI occurs (before, during, or after harvest)\n")
|
|
cat("3. Check duration of low CI periods\n")
|
|
cat("4. Identify timing offset between reported harvest date and actual low CI period\n")
|