8 week data working now

This commit is contained in:
Timon 2026-01-18 07:39:30 +01:00
parent 3e4430b3be
commit 7975f8ad06

View file

@ -72,7 +72,7 @@
# [ ] Confirm Four_week_trend calculates from 1-4 weeks (graceful degradation)
# [ ] Confirm CV_Trend_Long_Term uses full 8-week regression (when available)
# [ ] Load script 31 output and validate imminent_prob population
#
# [ ] maybe even look into aut calculating the mosaic if mosaic is missing in last 8 weeks
# ============================================================================
# ============================================================================
@ -140,7 +140,7 @@
# ============================================================================
# TEST MODE (for development with limited historical data)
TEST_MODE <- TRUE
TEST_MODE <- FALSE
TEST_MODE_NUM_WEEKS <- 2
# GERMINATION PROGRESS THRESHOLD
@ -618,10 +618,12 @@ get_status_trigger <- function(ci_values, ci_change, age_weeks) {
return(NA_character_)
}
load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4) {
load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) {
historical_data <- list()
loaded_weeks <- c()
missing_weeks <- c()
# First pass: try to load existing weeks
for (lookback in 0:(num_weeks - 1)) {
target_week <- current_week - lookback
if (target_week < 1) target_week <- target_week + 52
@ -639,16 +641,159 @@ load_historical_field_data <- function(project_dir, current_week, reports_dir, n
loaded_weeks <- c(loaded_weeks, target_week)
}, error = function(e) {
message(paste(" Warning: Could not load week", target_week, ":", e$message))
missing_weeks <<- c(missing_weeks, target_week)
})
} else {
missing_weeks <- c(missing_weeks, target_week)
}
}
# If weeks are missing and auto_generate=TRUE, calculate stats from ALL available mosaics
if (length(missing_weeks) > 0 && auto_generate) {
message(paste("⚠ Missing weeks:", paste(missing_weeks, collapse = ", ")))
message("Scanning for ALL available weekly mosaics and calculating stats...\n")
# Use field_boundaries_sf passed in (loaded in main)
if (is.null(field_boundaries_sf)) {
message(" Error: field_boundaries_sf not provided - cannot auto-generate")
return(historical_data)
}
if (!exists("weekly_tile_max")) {
message(" ✗ weekly_tile_max path not defined")
return(historical_data)
}
# Find the mosaic directory (with or without 5x5 subdirectory)
check_paths <- c(file.path(weekly_tile_max, "5x5"), weekly_tile_max)
mosaic_scan_dir <- NA
for (check_path in check_paths) {
if (dir.exists(check_path)) {
tif_files <- list.files(check_path, pattern = "week_.*\\.tif$", full.names = TRUE)
if (length(tif_files) > 0) {
mosaic_scan_dir <- check_path
break
}
}
}
if (is.na(mosaic_scan_dir)) {
message(" ✗ No mosaic files found in weekly_tile_max")
return(historical_data)
}
# Calculate actual date range for last 8 weeks
# Don't guess weeks - derive them from actual dates
weeks_to_load <- 8
today <- Sys.Date()
target_dates <- today - (0:(weeks_to_load - 1)) * 7
# For each date, calculate what week/year it falls in
expected_weeks <- data.frame(
date = target_dates,
week = as.numeric(format(target_dates, "%V")),
year = as.numeric(format(target_dates, "%Y")),
stringsAsFactors = FALSE
)
expected_weeks <- unique(expected_weeks)
message(paste(" Expected weeks (last 8 from", format(today, "%Y-%m-%d"), "):"))
for (i in seq_len(nrow(expected_weeks))) {
message(paste(" Week", sprintf("%02d", expected_weeks$week[i]), expected_weeks$year[i]))
}
message("")
# Parse all week_YY_YYYY_NN.tif files to find unique (week, year) combinations
tif_files <- list.files(mosaic_scan_dir, pattern = "week_([0-9]{2})_([0-9]{4})_[0-9]{2}\\.tif$",
full.names = FALSE)
# Extract week and year from filenames
available_weeks <- data.frame()
for (filename in tif_files) {
# Parse: week_02_2026_03.tif
matches <- regmatches(filename, gregexpr("week_([0-9]{2})_([0-9]{4})", filename))[[1]]
if (length(matches) > 0) {
week_year <- strsplit(matches[1], "_")[[1]]
if (length(week_year) == 3) {
week_num <- as.numeric(week_year[2])
year_num <- as.numeric(week_year[3])
# Only keep weeks that are in expected_weeks
if (week_num %in% expected_weeks$week && year_num %in% expected_weeks$year) {
available_weeks <- rbind(available_weeks,
data.frame(week = week_num, year = year_num))
}
}
}
}
# Remove duplicates and sort by date (descending - most recent first)
available_weeks <- unique(available_weeks)
# Merge with dates to sort properly
available_weeks <- merge(available_weeks, expected_weeks[, c("week", "year", "date")], by = c("week", "year"))
available_weeks <- available_weeks[order(available_weeks$date, decreasing = TRUE), ]
if (nrow(available_weeks) == 0) {
message(" ✗ No matching mosaic files found")
message(paste(" Scanned directory:", mosaic_scan_dir))
return(historical_data)
}
message(paste(" Found", nrow(available_weeks), "week(s) with available mosaics:"))
# Calculate stats for each available week
for (i in seq_len(nrow(available_weeks))) {
week_to_calc <- available_weeks$week[i]
year_to_calc <- available_weeks$year[i]
date_to_calc <- available_weeks$date[i]
# Find all tiles for this week/year combination
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_to_calc, year_to_calc)
tile_files <- list.files(mosaic_scan_dir, pattern = tile_pattern, full.names = TRUE)
if (length(tile_files) == 0) {
message(paste(" ✗ Week", sprintf("%02d", week_to_calc), year_to_calc, "- no tiles found"))
next
}
message(paste(" ✓ Week", sprintf("%02d", week_to_calc), year_to_calc, "-", length(tile_files), "mosaics"))
tryCatch({
# Calculate stats for this week/year
week_stats <- load_or_calculate_weekly_stats(
week_num = week_to_calc,
year = year_to_calc,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = mosaic_scan_dir,
reports_dir = reports_dir,
report_date = date_to_calc # Use actual date for this week
)
if (!is.null(week_stats) && nrow(week_stats) > 0) {
message(paste(" ✓ Calculated stats for", nrow(week_stats), "fields"))
# Add to historical data (use unique key: week_year combo)
historical_data[[length(historical_data) + 1]] <- list(
week = week_to_calc,
year = year_to_calc,
data = week_stats
)
loaded_weeks <- c(loaded_weeks, paste0(week_to_calc, "_", year_to_calc))
}
}, error = function(e) {
message(paste(" ✗ Error:", e$message))
})
}
}
if (length(historical_data) == 0) {
message(paste("Warning: No historical field data found for trend calculations"))
message(paste("Error: No historical field data found and could not auto-generate weeks"))
return(NULL)
}
message(paste("Loaded", length(historical_data), "weeks of historical data:",
message(paste("Loaded", length(historical_data), "weeks of historical data:",
paste(loaded_weeks, collapse = ", ")))
return(historical_data)
@ -1766,8 +1911,12 @@ main <- function() {
args <- commandArgs(trailingOnly = TRUE)
# end_date (arg 1)
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
as.Date(args[1])
} else if (exists("end_date", envir = .GlobalEnv)) {
# For recursive calls, use the end_date that was set in the global environment
get("end_date", envir = .GlobalEnv)
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
@ -1871,11 +2020,18 @@ main <- function() {
})
message("Loading historical field data for trend calculations...")
num_weeks_to_load <- if (TEST_MODE) TEST_MODE_NUM_WEEKS else max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
if (TEST_MODE) {
message(paste(" TEST MODE: Loading only", num_weeks_to_load, "weeks"))
}
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, num_weeks = num_weeks_to_load)
# Load up to 8 weeks (max of 4-week and 8-week trend requirements)
# Function gracefully handles missing weeks and loads whatever exists
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir,
num_weeks = num_weeks_to_load,
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf)
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)