# 80_WEEKLY_STATS_UTILS.R # ============================================================================ # UTILITY FUNCTIONS FOR WEEKLY STATISTICS CALCULATION # # This file contains reusable functions for: # - Tile grid management # - Tile loading and merging # - Field-level statistics calculation from CI rasters # - Weekly stats caching (RDS/CSV export/import) # - KPI trend calculations # - Historical data loading and auto-generation from mosaics # # Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts # ============================================================================ # ============================================================================ # WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts) # ============================================================================ #' Calculate week and year for a given lookback offset #' This function handles ISO 8601 week numbering with proper year wrapping #' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025) #' #' @param current_week ISO week number (1-53) #' @param current_year ISO week year (from format(..., "%G")) #' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.) #' #' @return List with: week (ISO week number), year (ISO week year) #' #' @details #' This is the authoritative week/year calculation function. #' Used by: #' - load_historical_field_data() - to find RDS/CSV files for 4-week lookback #' - Script 80 main - to calculate previous week with year wrapping #' - Any other script needing to walk backwards through weeks #' #' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025) calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) { target_week <- current_week - offset_weeks target_year <- current_year # Handle wrapping: when going back from week 1, wrap to week 52 of previous year while (target_week < 1) { target_week <- target_week + 52 target_year <- target_year - 1 } return(list(week = target_week, year = target_year)) } # ============================================================================ # TILE-AWARE HELPER FUNCTIONS # ============================================================================ get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { if (inherits(field_geom, "sf")) { field_bbox <- sf::st_bbox(field_geom) field_xmin <- field_bbox["xmin"] field_xmax <- field_bbox["xmax"] field_ymin <- field_bbox["ymin"] field_ymax <- field_bbox["ymax"] } else if (inherits(field_geom, "SpatVector")) { field_bbox <- terra::ext(field_geom) field_xmin <- field_bbox$xmin field_xmax <- field_bbox$xmax field_ymin <- field_bbox$ymin field_ymax <- field_bbox$ymax } else { stop("field_geom must be sf or terra::vect object") } intersecting_tiles <- tile_grid$id[ !(tile_grid$xmax < field_xmin | tile_grid$xmin > field_xmax | tile_grid$ymax < field_ymin | tile_grid$ymin > field_ymax) ] return(as.numeric(intersecting_tiles)) } load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { if (length(tile_ids) == 0) { return(NULL) } tiles_list <- list() for (tile_id in sort(tile_ids)) { tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id) tile_path <- file.path(mosaic_dir, tile_filename) if (file.exists(tile_path)) { tryCatch({ tile_rast <- terra::rast(tile_path) ci_band <- terra::subset(tile_rast, 5) tiles_list[[length(tiles_list) + 1]] <- ci_band }, error = function(e) { message(paste(" Warning: Could not load tile", tile_id, ":", e$message)) }) } } if (length(tiles_list) == 0) { return(NULL) } if (length(tiles_list) == 1) { return(tiles_list[[1]]) } else { tryCatch({ rsrc <- terra::sprc(tiles_list) merged <- terra::mosaic(rsrc, fun = "max") return(merged) }, error = function(e) { message(paste(" Warning: Could not merge tiles:", e$message)) return(tiles_list[[1]]) }) } } build_tile_grid <- function(mosaic_dir, week_num, year) { # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/) detected_grid_size <- NA if (dir.exists(mosaic_dir)) { subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) if (length(grid_patterns) > 0) { detected_grid_size <- grid_patterns[1] mosaic_dir <- file.path(mosaic_dir, detected_grid_size) message(paste(" Using grid-size subdirectory:", detected_grid_size)) } } tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) if (length(tile_files) == 0) { stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) } tile_grid <- data.frame( id = integer(), xmin = numeric(), xmax = numeric(), ymin = numeric(), ymax = numeric(), stringsAsFactors = FALSE ) for (tile_file in tile_files) { tryCatch({ matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file))) if (length(matches) > 0) { tile_id <- as.integer(sub("_|\\.tif", "", matches[1])) tile_rast <- terra::rast(tile_file) tile_ext <- terra::ext(tile_rast) tile_grid <- rbind(tile_grid, data.frame( id = tile_id, xmin = tile_ext$xmin, xmax = tile_ext$xmax, ymin = tile_ext$ymin, ymax = tile_ext$ymax, stringsAsFactors = FALSE )) } }, error = function(e) { message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message)) }) } if (nrow(tile_grid) == 0) { stop("Could not extract extents from any tile files") } return(list( tile_grid = tile_grid, mosaic_dir = mosaic_dir, grid_size = detected_grid_size )) } # ============================================================================ # STATISTICAL CATEGORIZATION FUNCTIONS # ============================================================================ categorize_four_week_trend <- function(ci_values_list) { if (is.null(ci_values_list) || length(ci_values_list) < 2) { return(NA_character_) } ci_values_list <- ci_values_list[!is.na(ci_values_list)] if (length(ci_values_list) < 2) { return(NA_character_) } weekly_changes <- diff(ci_values_list) avg_weekly_change <- mean(weekly_changes, na.rm = TRUE) if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) { return("strong growth") } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN && avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) { return("growth") } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) { return("no growth") } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN && avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { return("decline") } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { return("strong decline") } else { return("no growth") } } round_cloud_to_intervals <- function(cloud_pct_clear) { if (is.na(cloud_pct_clear)) { return(NA_character_) } if (cloud_pct_clear < 10) return("0-10%") if (cloud_pct_clear < 20) return("10-20%") if (cloud_pct_clear < 30) return("20-30%") if (cloud_pct_clear < 40) return("30-40%") if (cloud_pct_clear < 50) return("40-50%") if (cloud_pct_clear < 60) return("50-60%") if (cloud_pct_clear < 70) return("60-70%") if (cloud_pct_clear < 80) return("70-80%") if (cloud_pct_clear < 90) return("80-90%") if (cloud_pct_clear < 95) return("90-95%") return("95-100%") } get_ci_percentiles <- function(ci_values) { if (is.null(ci_values) || length(ci_values) == 0) { return(NA_character_) } ci_values <- ci_values[!is.na(ci_values)] if (length(ci_values) == 0) { return(NA_character_) } p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE) p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE) return(sprintf("%.1f-%.1f", p10, p90)) } calculate_cv_trend <- function(cv_current, cv_previous) { if (is.na(cv_current) || is.na(cv_previous)) { return(NA_real_) } return(round(cv_current - cv_previous, 4)) } calculate_four_week_trend <- function(mean_ci_values) { #' Calculate four-week CI trend from available weeks #' Uses whatever weeks are available (1-4 weeks) to estimate trend if (is.null(mean_ci_values) || length(mean_ci_values) == 0) { return(NA_real_) } ci_clean <- mean_ci_values[!is.na(mean_ci_values)] if (length(ci_clean) < 2) { return(NA_real_) } trend <- ci_clean[length(ci_clean)] - ci_clean[1] return(round(trend, 2)) } categorize_cv_slope <- function(slope) { #' Categorize CV slope (8-week regression) into field uniformity interpretation if (is.na(slope)) { return(NA_character_) } if (slope <= CV_SLOPE_IMPROVEMENT_MIN) { return("Excellent uniformity") } else if (slope < CV_SLOPE_HOMOGENOUS_MIN) { return("Homogenous growth") } else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) { return("Homogenous growth") } else if (slope <= CV_SLOPE_PATCHINESS_MAX) { return("Minor patchiness") } else { return("Severe fragmentation") } } calculate_cv_trend_long_term <- function(cv_values) { #' Calculate 8-week CV trend via linear regression slope if (is.null(cv_values) || length(cv_values) == 0) { return(NA_real_) } cv_clean <- cv_values[!is.na(cv_values)] if (length(cv_clean) < 2) { return(NA_real_) } weeks <- seq_along(cv_clean) tryCatch({ lm_fit <- lm(cv_clean ~ weeks) slope <- coef(lm_fit)["weeks"] return(round(as.numeric(slope), 4)) }, error = function(e) { return(NA_real_) }) } # ============================================================================ # HELPER FUNCTIONS # ============================================================================ get_phase_by_age <- function(age_weeks) { if (is.na(age_weeks)) return(NA_character_) for (i in seq_len(nrow(PHASE_DEFINITIONS))) { if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && age_weeks <= PHASE_DEFINITIONS$age_end[i]) { return(PHASE_DEFINITIONS$phase[i]) } } return("Unknown") } get_status_trigger <- function(ci_values, ci_change, age_weeks) { if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) ci_values <- ci_values[!is.na(ci_values)] if (length(ci_values) == 0) return(NA_character_) pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 mean_ci <- mean(ci_values, na.rm = TRUE) if (age_weeks >= 0 && age_weeks <= 6) { if (pct_at_or_above_2 >= 70) { return("germination_complete") } else if (pct_above_2 > 10) { return("germination_started") } } if (age_weeks >= 45) { return("harvest_ready") } if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { return("stress_detected_whole_field") } if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { return("strong_recovery") } if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { return("growth_on_track") } if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { return("maturation_progressing") } return(NA_character_) } extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { # Extract planting dates from harvest.xlsx (season_start column) # Returns: data.frame with columns (field_id, planting_date) if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { message("Warning: No harvesting data available - planting dates will be NA.") if (!is.null(field_boundaries_sf)) { return(data.frame( field_id = field_boundaries_sf$field, planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), stringsAsFactors = FALSE )) } return(NULL) } tryCatch({ planting_dates <- harvesting_data %>% arrange(field, desc(season_start)) %>% distinct(field, .keep_all = TRUE) %>% select(field, season_start) %>% rename(field_id = field, planting_date = season_start) %>% filter(!is.na(planting_date)) %>% as.data.frame() message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx")) return(planting_dates) }, error = function(e) { message(paste("Error extracting planting dates:", e$message)) return(NULL) }) } # ============================================================================ # MODULAR STATISTICS CALCULATION # ============================================================================ calculate_field_statistics <- function(field_boundaries_sf, week_num, year, mosaic_dir, report_date = Sys.Date()) { message(paste("Calculating statistics for all fields - Week", week_num, year)) # Support both tile-based and single-file mosaics tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year) # Try tile-based first tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) # If no tiles, try single-file if (length(tile_files) == 0) { single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE) if (length(single_file) > 0) { message(paste(" Using single-file mosaic for week", week_num)) tile_files <- single_file[1] # Use first match as single "tile" } else { stop(paste("No mosaic files found for week", week_num, year, "in", mosaic_dir)) } } message(paste(" Found", length(tile_files), "mosaic file(s) for week", week_num)) results_list <- list() fields_processed <- 0 for (tile_idx in seq_along(tile_files)) { tile_file <- tile_files[tile_idx] tryCatch({ current_rast <- terra::rast(tile_file) ci_band <- current_rast[["CI"]] if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) return(NULL) } extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) for (field_poly_idx in unique_field_ids) { field_id <- field_boundaries_sf$field[field_poly_idx] ci_vals <- extracted$CI[extracted$ID == field_poly_idx] ci_vals <- ci_vals[!is.na(ci_vals)] if (length(ci_vals) == 0) { next } mean_ci <- mean(ci_vals, na.rm = TRUE) ci_std <- sd(ci_vals, na.rm = TRUE) cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ range_min <- min(ci_vals, na.rm = TRUE) range_max <- max(ci_vals, na.rm = TRUE) range_str <- sprintf("%.1f-%.1f", range_min, range_max) ci_percentiles_str <- get_ci_percentiles(ci_vals) # Count pixels with CI >= 2 (germination threshold) GERMINATION_CI_THRESHOLD <- 2.0 num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE) num_pixels_total <- length(ci_vals) pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 field_rows <- extracted[extracted$ID == field_poly_idx, ] num_total <- nrow(field_rows) num_data <- sum(!is.na(field_rows$CI)) pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 cloud_cat <- if (num_data == 0) "No image available" else if (pct_clear >= 95) "Clear view" else "Partial coverage" # Age_week and Phase are now calculated in main script using actual planting dates # Germination_progress is calculated in main script after Age_week is known existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) if (length(existing_idx) > 0) { next } results_list[[length(results_list) + 1]] <- data.frame( Field_id = field_id, Mean_CI = round(mean_ci, 2), CV = round(cv * 100, 2), CI_range = range_str, CI_Percentiles = ci_percentiles_str, Pct_pixels_CI_gte_2 = pct_pixels_gte_2, Cloud_pct_clear = pct_clear, Cloud_category = cloud_cat, stringsAsFactors = FALSE ) fields_processed <- fields_processed + 1 } message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) }, error = function(e) { message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) }) } if (length(results_list) == 0) { stop(paste("No fields processed successfully for week", week_num)) } stats_df <- dplyr::bind_rows(results_list) message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) return(stats_df) } # ============================================================================ # CALCULATE KPI TRENDS # ============================================================================ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, project_dir = NULL, reports_dir = NULL, current_week = NULL, year = NULL) { message("Calculating KPI trends from current and previous week data") current_stats$Weekly_ci_change <- NA_real_ current_stats$CV_Trend_Short_Term <- NA_real_ current_stats$Four_week_trend <- NA_real_ current_stats$CV_Trend_Long_Term <- NA_real_ current_stats$nmr_of_weeks_analysed <- 1L if (is.null(prev_stats) || nrow(prev_stats) == 0) { message(" No previous week data available - using defaults") return(current_stats) } message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) prev_field_analysis <- NULL tryCatch({ analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") if (dir.exists(analysis_dir)) { analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) if (length(analysis_files) > 0) { recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, col_select = c(Field_id, nmr_of_weeks_analysed, Phase)) } } }, error = function(e) { message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) }) if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed")) } historical_4weeks <- list() historical_8weeks <- list() if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { message(" Loading historical field_stats for 4-week and 8-week trends...") for (lookback in 1:4) { target_week <- current_week - lookback target_year <- year if (target_week < 1) { target_week <- target_week + 52 target_year <- target_year - 1 } rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) if (file.exists(rds_path)) { tryCatch({ stats_data <- readRDS(rds_path) historical_4weeks[[length(historical_4weeks) + 1]] <- list( week = target_week, stats = stats_data ) }, error = function(e) { message(paste(" Warning: Could not load week", target_week, ":", e$message)) }) } } for (lookback in 1:8) { target_week <- current_week - lookback target_year <- year if (target_week < 1) { target_week <- target_week + 52 target_year <- target_year - 1 } rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) if (file.exists(rds_path)) { tryCatch({ stats_data <- readRDS(rds_path) historical_8weeks[[length(historical_8weeks) + 1]] <- list( week = target_week, stats = stats_data ) }, error = function(e) { # Silently skip }) } } if (length(historical_4weeks) > 0) { message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) } if (length(historical_8weeks) > 0) { message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) } } cv_trends_calculated <- 0 four_week_trends_calculated <- 0 cv_long_term_calculated <- 0 for (i in seq_len(nrow(current_stats))) { field_id <- current_stats$Field_id[i] prev_idx <- prev_lookup[field_id] if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { prev_row <- prev_stats[prev_idx, , drop = FALSE] prev_ci <- prev_row$Mean_CI[1] if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { current_stats$Weekly_ci_change[i] <- round(current_stats$Mean_CI[i] - prev_ci, 2) } prev_cv <- prev_row$CV[1] if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { current_stats$CV_Trend_Short_Term[i] <- calculate_cv_trend(current_stats$CV[i], prev_cv) cv_trends_calculated <- cv_trends_calculated + 1 } if (length(historical_4weeks) > 0) { ci_values_4week <- numeric() for (hist_idx in rev(seq_along(historical_4weeks))) { hist_data <- historical_4weeks[[hist_idx]]$stats hist_field <- which(hist_data$Field_id == field_id) if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) } } ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) if (length(ci_values_4week) >= 2) { current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) four_week_trends_calculated <- four_week_trends_calculated + 1 } } if (length(historical_8weeks) > 0) { cv_values_8week <- numeric() for (hist_idx in rev(seq_along(historical_8weeks))) { hist_data <- historical_8weeks[[hist_idx]]$stats hist_field <- which(hist_data$Field_id == field_id) if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) } } cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) if (length(cv_values_8week) >= 2) { slope <- calculate_cv_trend_long_term(cv_values_8week) current_stats$CV_Trend_Long_Term[i] <- slope cv_long_term_calculated <- cv_long_term_calculated + 1 } } if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { prev_analysis_row <- prev_field_analysis %>% dplyr::filter(Field_id == field_id) if (nrow(prev_analysis_row) > 0) { prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1] # Only increment nmr_of_weeks_analysed if we have previous data if (!is.na(prev_nmr_weeks_analysis)) { current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L } else { current_stats$nmr_of_weeks_analysed[i] <- 1L } } } } } message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) return(current_stats) } # ============================================================================ # LOAD OR CALCULATE WEEKLY STATISTICS # ============================================================================ load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, mosaic_dir, reports_dir, report_date = Sys.Date()) { rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) if (file.exists(rds_path)) { message(paste("Loading cached statistics from:", basename(rds_path))) return(readRDS(rds_path)) } message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date) output_dir <- file.path(reports_dir, "kpis", "field_stats") if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) } saveRDS(stats_df, rds_path) message(paste("Saved weekly statistics RDS:", basename(rds_path))) csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year) csv_path <- file.path(output_dir, csv_filename) readr::write_csv(stats_df, csv_path) message(paste("Saved weekly statistics CSV:", basename(csv_path))) return(stats_df) } load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { historical_data <- list() loaded_weeks <- c() missing_weeks <- c() for (lookback in 0:(num_weeks - 1)) { # Calculate target week and year using authoritative helper (handles year boundaries) target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) target_week <- target$week target_year <- target$year # Construct filename with BOTH week and year (proper ISO format) csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv") csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) if (file.exists(csv_path)) { tryCatch({ data <- read_csv(csv_path, show_col_types = FALSE) historical_data[[lookback + 1]] <- list( week = target_week, year = target_year, data = data ) loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) }, error = function(e) { message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message)) missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) }) } else { missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) } } if (length(missing_weeks) > 0 && auto_generate) { message(paste("⚠ Missing weeks:", paste(missing_weeks, collapse = ", "))) message("Scanning for ALL available weekly mosaics and calculating stats...\n") if (is.null(field_boundaries_sf)) { message(" Error: field_boundaries_sf not provided - cannot auto-generate") return(historical_data) } if (!exists("weekly_tile_max")) { message(" ✗ weekly_tile_max path not defined") return(historical_data) } check_paths <- c(file.path(weekly_tile_max, "5x5"), weekly_tile_max) mosaic_scan_dir <- NA for (check_path in check_paths) { if (dir.exists(check_path)) { tif_files <- list.files(check_path, pattern = "week_.*\\.tif$", full.names = TRUE) if (length(tif_files) > 0) { mosaic_scan_dir <- check_path break } } } if (is.na(mosaic_scan_dir)) { message(" ✗ No mosaic files found in weekly_tile_max") return(historical_data) } weeks_to_load <- 8 today <- Sys.Date() target_dates <- today - (0:(weeks_to_load - 1)) * 7 expected_weeks <- data.frame( date = target_dates, week = as.numeric(format(target_dates, "%V")), year = as.numeric(format(target_dates, "%G")), stringsAsFactors = FALSE ) expected_weeks <- unique(expected_weeks) message(paste(" Expected weeks (last 8 from", format(today, "%Y-%m-%d"), "):")) for (i in seq_len(nrow(expected_weeks))) { message(paste(" Week", sprintf("%02d", expected_weeks$week[i]), expected_weeks$year[i])) } message("") tif_files <- list.files(mosaic_scan_dir, pattern = "week_([0-9]{2})_([0-9]{4})_[0-9]{2}\\.tif$", full.names = FALSE) available_weeks <- data.frame() for (filename in tif_files) { matches <- regmatches(filename, gregexpr("week_([0-9]{2})_([0-9]{4})", filename))[[1]] if (length(matches) > 0) { week_year <- strsplit(matches[1], "_")[[1]] if (length(week_year) == 3) { week_num <- as.numeric(week_year[2]) year_num <- as.numeric(week_year[3]) if (week_num %in% expected_weeks$week && year_num %in% expected_weeks$year) { available_weeks <- rbind(available_weeks, data.frame(week = week_num, year = year_num)) } } } } available_weeks <- unique(available_weeks) available_weeks <- merge(available_weeks, expected_weeks[, c("week", "year", "date")], by = c("week", "year")) available_weeks <- available_weeks[order(available_weeks$date, decreasing = TRUE), ] if (nrow(available_weeks) == 0) { message(" ✗ No matching mosaic files found") message(paste(" Scanned directory:", mosaic_scan_dir)) return(historical_data) } message(paste(" Found", nrow(available_weeks), "week(s) with available mosaics:")) for (i in seq_len(nrow(available_weeks))) { week_to_calc <- available_weeks$week[i] year_to_calc <- available_weeks$year[i] date_to_calc <- available_weeks$date[i] tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_to_calc, year_to_calc) tile_files <- list.files(mosaic_scan_dir, pattern = tile_pattern, full.names = TRUE) if (length(tile_files) == 0) { message(paste(" ✗ Week", sprintf("%02d", week_to_calc), year_to_calc, "- no tiles found")) next } message(paste(" ✓ Week", sprintf("%02d", week_to_calc), year_to_calc, "-", length(tile_files), "mosaics")) tryCatch({ week_stats <- load_or_calculate_weekly_stats( week_num = week_to_calc, year = year_to_calc, project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, mosaic_dir = mosaic_scan_dir, reports_dir = reports_dir, report_date = date_to_calc ) if (!is.null(week_stats) && nrow(week_stats) > 0) { message(paste(" ✓ Calculated stats for", nrow(week_stats), "fields")) historical_data[[length(historical_data) + 1]] <- list( week = week_to_calc, year = year_to_calc, data = week_stats ) loaded_weeks <- c(loaded_weeks, paste0(week_to_calc, "_", year_to_calc)) } }, error = function(e) { message(paste(" ✗ Error:", e$message)) }) } } if (length(historical_data) == 0) { message(paste("Error: No historical field data found and could not auto-generate weeks")) return(NULL) } message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", paste(loaded_weeks, collapse = ", "))) return(historical_data) } # ============================================================================ # HELPER: Extract field-level statistics from CI raster # ============================================================================ extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { #' Extract CI statistics for all fields from a single CI raster band extract_result <- terra::extract(ci_band, field_boundaries_sf) stats_list <- list() for (field_idx in seq_len(nrow(field_boundaries_sf))) { field_pixels <- extract_result[extract_result$ID == field_idx, 2] pixels <- as.numeric(field_pixels[!is.na(field_pixels)]) if (length(pixels) == 0) { stats_list[[field_idx]] <- data.frame( field_idx = field_idx, mean_ci = NA_real_, cv = NA_real_, p10 = NA_real_, p90 = NA_real_, min_ci = NA_real_, max_ci = NA_real_, pixel_count_valid = 0, pixel_count_total = 0, stringsAsFactors = FALSE ) next } mean_val <- mean(pixels, na.rm = TRUE) cv_val <- if (mean_val > 0) sd(pixels, na.rm = TRUE) / mean_val else NA_real_ p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]] p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]] min_val <- min(pixels, na.rm = TRUE) max_val <- max(pixels, na.rm = TRUE) stats_list[[field_idx]] <- data.frame( field_idx = field_idx, mean_ci = mean_val, cv = cv_val, p10 = p10_val, p90 = p90_val, min_ci = min_val, max_ci = max_val, pixel_count_valid = length(pixels), pixel_count_total = nrow(extract_result[extract_result$ID == field_idx, ]), stringsAsFactors = FALSE ) } return(dplyr::bind_rows(stats_list)) } # ============================================================================ # COMMENTED OUT / UNUSED FUNCTIONS (kept for future use) # ============================================================================ # analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, # mosaic_dir, historical_data = NULL, planting_dates = NULL, # report_date = Sys.Date(), harvest_imminence_data = NULL, # harvesting_data = NULL) { # # [Function kept as reference for parallel field analysis] # # Currently replaced by calculate_field_statistics() for efficiency # }