# 80_UTILS_COMMON.R # ============================================================================ # SHARED UTILITY FUNCTIONS FOR ALL CLIENT TYPES (SCRIPT 80) # # Contains helper and infrastructure functions used by both AURA and ANGATA workflows: # - Statistical categorization and calculations # - Tile operations and data loading # - Field statistics extraction # - Week/year calculations for consistent date handling # - Excel/CSV/RDS export utilities # # Used by: 80_calculate_kpis.R, all client-specific utils files # ============================================================================ # ============================================================================ # CONSTANTS (from 80_calculate_kpis.R) # ============================================================================ # Four-week trend thresholds FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 # CV Trend thresholds (8-week slope interpretation) CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03 CV_SLOPE_IMPROVEMENT_MIN <- -0.02 CV_SLOPE_IMPROVEMENT_MAX <- -0.01 CV_SLOPE_HOMOGENOUS_MIN <- -0.01 CV_SLOPE_HOMOGENOUS_MAX <- 0.01 CV_SLOPE_PATCHINESS_MIN <- 0.01 CV_SLOPE_PATCHINESS_MAX <- 0.02 CV_SLOPE_SEVERE_MIN <- 0.02 # Percentile calculations CI_PERCENTILE_LOW <- 0.10 CI_PERCENTILE_HIGH <- 0.90 # Phase definitions (used by get_phase_by_age) PHASE_DEFINITIONS <- data.frame( phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), age_start = c(0, 4, 17, 39), age_end = c(6, 16, 39, 200), stringsAsFactors = FALSE ) # ============================================================================ # WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts) # ============================================================================ #' Calculate week and year for a given lookback offset #' This function handles ISO 8601 week numbering with proper year wrapping #' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025) #' #' @param current_week ISO week number (1-53) #' @param current_year ISO week year (from format(..., "%G")) #' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.) #' #' @return List with: week (ISO week number), year (ISO week year) #' #' @details #' This is the authoritative week/year calculation function. #' Used by: #' - load_historical_field_data() - to find RDS/CSV files for 4-week lookback #' - Script 80 main - to calculate previous week with year wrapping #' - Any other script needing to walk backwards through weeks #' #' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025) calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) { target_week <- current_week - offset_weeks target_year <- current_year # Handle wrapping: when going back from week 1, wrap to week 52 of previous year while (target_week < 1) { target_week <- target_week + 52 target_year <- target_year - 1 } return(list(week = target_week, year = target_year)) } # ============================================================================ # TILE-AWARE HELPER FUNCTIONS # ============================================================================ #' Get tile IDs that intersect with a field geometry get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { if (inherits(field_geom, "sf")) { field_bbox <- sf::st_bbox(field_geom) field_xmin <- field_bbox["xmin"] field_xmax <- field_bbox["xmax"] field_ymin <- field_bbox["ymin"] field_ymax <- field_bbox["ymax"] } else if (inherits(field_geom, "SpatVector")) { field_bbox <- terra::ext(field_geom) field_xmin <- field_bbox$xmin field_xmax <- field_bbox$xmax field_ymin <- field_bbox$ymin field_ymax <- field_bbox$ymax } else { stop("field_geom must be sf or terra::vect object") } intersecting_tiles <- tile_grid$id[ !(tile_grid$xmax < field_xmin | tile_grid$xmin > field_xmax | tile_grid$ymax < field_ymin | tile_grid$ymin > field_ymax) ] return(as.numeric(intersecting_tiles)) } #' Load and merge tiles for a specific field load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { if (length(tile_ids) == 0) { return(NULL) } tiles_list <- list() for (tile_id in sort(tile_ids)) { tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id) tile_path <- file.path(mosaic_dir, tile_filename) if (file.exists(tile_path)) { tryCatch({ tile_rast <- terra::rast(tile_path) ci_band <- terra::subset(tile_rast, 5) tiles_list[[length(tiles_list) + 1]] <- ci_band }, error = function(e) { message(paste(" Warning: Could not load tile", tile_id, ":", e$message)) }) } } if (length(tiles_list) == 0) { return(NULL) } if (length(tiles_list) == 1) { return(tiles_list[[1]]) } else { tryCatch({ rsrc <- terra::sprc(tiles_list) merged <- terra::mosaic(rsrc, fun = "max") return(merged) }, error = function(e) { message(paste(" Warning: Could not merge tiles:", e$message)) return(tiles_list[[1]]) }) } } #' Build tile grid from available tile files build_tile_grid <- function(mosaic_dir, week_num, year) { # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/) detected_grid_size <- NA if (dir.exists(mosaic_dir)) { subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) if (length(grid_patterns) > 0) { detected_grid_size <- grid_patterns[1] mosaic_dir <- file.path(mosaic_dir, detected_grid_size) message(paste(" Using grid-size subdirectory:", detected_grid_size)) } } tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) if (length(tile_files) == 0) { stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) } tile_grid <- data.frame( id = integer(), xmin = numeric(), xmax = numeric(), ymin = numeric(), ymax = numeric(), stringsAsFactors = FALSE ) for (tile_file in tile_files) { tryCatch({ matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file))) if (length(matches) > 0) { tile_id <- as.integer(sub("_|\\.tif", "", matches[1])) tile_rast <- terra::rast(tile_file) tile_ext <- terra::ext(tile_rast) tile_grid <- rbind(tile_grid, data.frame( id = tile_id, xmin = tile_ext$xmin, xmax = tile_ext$xmax, ymin = tile_ext$ymin, ymax = tile_ext$ymax, stringsAsFactors = FALSE )) } }, error = function(e) { message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message)) }) } if (nrow(tile_grid) == 0) { stop("Could not extract extents from any tile files") } return(list( tile_grid = tile_grid, mosaic_dir = mosaic_dir, grid_size = detected_grid_size )) } # ============================================================================ # STATISTICAL CATEGORIZATION FUNCTIONS # ============================================================================ #' Categorize four-week CI trend categorize_four_week_trend <- function(ci_values_list) { if (is.null(ci_values_list) || length(ci_values_list) < 2) { return(NA_character_) } ci_values_list <- ci_values_list[!is.na(ci_values_list)] if (length(ci_values_list) < 2) { return(NA_character_) } weekly_changes <- diff(ci_values_list) avg_weekly_change <- mean(weekly_changes, na.rm = TRUE) if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) { return("strong growth") } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN && avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) { return("growth") } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) { return("no growth") } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN && avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { return("decline") } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { return("strong decline") } else { return("no growth") } } #' Round cloud coverage to interval categories round_cloud_to_intervals <- function(cloud_pct_clear) { if (is.na(cloud_pct_clear)) { return(NA_character_) } if (cloud_pct_clear < 10) return("0-10%") if (cloud_pct_clear < 20) return("10-20%") if (cloud_pct_clear < 30) return("20-30%") if (cloud_pct_clear < 40) return("30-40%") if (cloud_pct_clear < 50) return("40-50%") if (cloud_pct_clear < 60) return("50-60%") if (cloud_pct_clear < 70) return("60-70%") if (cloud_pct_clear < 80) return("70-80%") if (cloud_pct_clear < 90) return("80-90%") if (cloud_pct_clear < 95) return("90-95%") return("95-100%") } #' Get CI percentile range (10th to 90th) get_ci_percentiles <- function(ci_values) { if (is.null(ci_values) || length(ci_values) == 0) { return(NA_character_) } ci_values <- ci_values[!is.na(ci_values)] if (length(ci_values) == 0) { return(NA_character_) } p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE) p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE) return(sprintf("%.1f-%.1f", p10, p90)) } #' Calculate short-term CV trend (current week vs previous week) calculate_cv_trend <- function(cv_current, cv_previous) { if (is.na(cv_current) || is.na(cv_previous)) { return(NA_real_) } return(round(cv_current - cv_previous, 4)) } #' Calculate four-week CI trend calculate_four_week_trend <- function(mean_ci_values) { if (is.null(mean_ci_values) || length(mean_ci_values) == 0) { return(NA_real_) } ci_clean <- mean_ci_values[!is.na(mean_ci_values)] if (length(ci_clean) < 2) { return(NA_real_) } trend <- ci_clean[length(ci_clean)] - ci_clean[1] return(round(trend, 2)) } #' Categorize CV slope (8-week regression) into field uniformity interpretation categorize_cv_slope <- function(slope) { if (is.na(slope)) { return(NA_character_) } if (slope <= CV_SLOPE_IMPROVEMENT_MIN) { return("Excellent uniformity") } else if (slope < CV_SLOPE_HOMOGENOUS_MIN) { return("Homogenous growth") } else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) { return("Homogenous growth") } else if (slope <= CV_SLOPE_PATCHINESS_MAX) { return("Minor patchiness") } else { return("Severe fragmentation") } } #' Calculate 8-week CV trend via linear regression slope calculate_cv_trend_long_term <- function(cv_values) { if (is.null(cv_values) || length(cv_values) == 0) { return(NA_real_) } cv_clean <- cv_values[!is.na(cv_values)] if (length(cv_clean) < 2) { return(NA_real_) } weeks <- seq_along(cv_clean) tryCatch({ lm_fit <- lm(cv_clean ~ weeks) slope <- coef(lm_fit)["weeks"] return(round(as.numeric(slope), 4)) }, error = function(e) { return(NA_real_) }) } # ============================================================================ # HELPER FUNCTIONS # ============================================================================ #' Get crop phase by age in weeks get_phase_by_age <- function(age_weeks) { if (is.na(age_weeks)) return(NA_character_) for (i in seq_len(nrow(PHASE_DEFINITIONS))) { if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && age_weeks <= PHASE_DEFINITIONS$age_end[i]) { return(PHASE_DEFINITIONS$phase[i]) } } return("Unknown") } #' Get status trigger based on CI values and field age get_status_trigger <- function(ci_values, ci_change, age_weeks) { if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) ci_values <- ci_values[!is.na(ci_values)] if (length(ci_values) == 0) return(NA_character_) pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 mean_ci <- mean(ci_values, na.rm = TRUE) if (age_weeks >= 0 && age_weeks <= 6) { if (pct_at_or_above_2 >= 70) { return("germination_complete") } else if (pct_above_2 > 10) { return("germination_started") } } if (age_weeks >= 45) { return("harvest_ready") } if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { return("stress_detected_whole_field") } if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { return("strong_recovery") } if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { return("growth_on_track") } if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { return("maturation_progressing") } return(NA_character_) } #' Extract planting dates from harvesting data extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { message("Warning: No harvesting data available - planting dates will be NA.") if (!is.null(field_boundaries_sf)) { return(data.frame( field_id = field_boundaries_sf$field, planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), stringsAsFactors = FALSE )) } return(NULL) } tryCatch({ planting_dates <- harvesting_data %>% arrange(field, desc(season_start)) %>% distinct(field, .keep_all = TRUE) %>% select(field, season_start) %>% rename(field_id = field, planting_date = season_start) %>% filter(!is.na(planting_date)) %>% as.data.frame() message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx")) return(planting_dates) }, error = function(e) { message(paste("Error extracting planting dates:", e$message)) return(NULL) }) } # ============================================================================ # FIELD STATISTICS EXTRACTION # ============================================================================ #' Extract CI statistics for all fields from a single CI raster band extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { extract_result <- terra::extract(ci_band, field_boundaries_sf) stats_list <- list() for (field_idx in seq_len(nrow(field_boundaries_sf))) { field_pixels <- extract_result[extract_result$ID == field_idx, 2] pixels <- as.numeric(field_pixels[!is.na(field_pixels)]) if (length(pixels) == 0) { stats_list[[field_idx]] <- data.frame( field_idx = field_idx, mean_ci = NA_real_, cv = NA_real_, p10 = NA_real_, p90 = NA_real_, min_ci = NA_real_, max_ci = NA_real_, pixel_count_valid = 0, pixel_count_total = 0, stringsAsFactors = FALSE ) next } mean_val <- mean(pixels, na.rm = TRUE) cv_val <- if (mean_val > 0) sd(pixels, na.rm = TRUE) / mean_val else NA_real_ p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]] p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]] min_val <- min(pixels, na.rm = TRUE) max_val <- max(pixels, na.rm = TRUE) stats_list[[field_idx]] <- data.frame( field_idx = field_idx, mean_ci = mean_val, cv = cv_val, p10 = p10_val, p90 = p90_val, min_ci = min_val, max_ci = max_val, pixel_count_valid = length(pixels), pixel_count_total = nrow(extract_result[extract_result$ID == field_idx, ]), stringsAsFactors = FALSE ) } return(dplyr::bind_rows(stats_list)) } # ============================================================================ # EXPORT FUNCTIONS (USED BY ALL CLIENTS) # ============================================================================ #' Generate summary statistics from field analysis data generate_field_analysis_summary <- function(field_df) { message("Generating summary statistics...") total_acreage <- sum(field_df$Acreage, na.rm = TRUE) germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE) tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE) grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE) maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE) unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE) harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) total_fields <- nrow(field_df) clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) summary_df <- data.frame( Category = c( "--- PHASE DISTRIBUTION ---", "Germination", "Tillering", "Grand Growth", "Maturation", "Unknown phase", "--- STATUS TRIGGERS ---", "Harvest ready", "Stress detected", "Strong recovery", "Growth on track", "Germination complete", "Germination started", "No trigger", "--- CLOUD COVERAGE (FIELDS) ---", "Clear view", "Partial coverage", "No image available", "--- CLOUD COVERAGE (ACREAGE) ---", "Clear view", "Partial coverage", "No image available", "--- TOTAL ---", "Total Acreage" ), Acreage = c( NA, round(germination_acreage, 2), round(tillering_acreage, 2), round(grand_growth_acreage, 2), round(maturation_acreage, 2), round(unknown_phase_acreage, 2), NA, round(harvest_ready_acreage, 2), round(stress_acreage, 2), round(recovery_acreage, 2), round(growth_on_track_acreage, 2), round(germination_complete_acreage, 2), round(germination_started_acreage, 2), round(no_trigger_acreage, 2), NA, paste0(clear_fields, " fields"), paste0(partial_fields, " fields"), paste0(no_image_fields, " fields"), NA, round(clear_acreage, 2), round(partial_acreage, 2), round(no_image_acreage, 2), NA, round(total_acreage, 2) ), stringsAsFactors = FALSE ) return(summary_df) } #' Export field analysis to Excel, CSV, and RDS formats export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) { message("Exporting per-field analysis to Excel, CSV, and RDS...") field_df_rounded <- field_df %>% mutate(across(where(is.numeric), ~ round(., 2))) # Handle NULL summary_df summary_df_rounded <- if (!is.null(summary_df)) { summary_df %>% mutate(across(where(is.numeric), ~ round(., 2))) } else { NULL } output_subdir <- file.path(reports_dir, "field_analysis") if (!dir.exists(output_subdir)) { dir.create(output_subdir, recursive = TRUE) } excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") excel_path <- file.path(output_subdir, excel_filename) excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) # Build sheets list dynamically sheets <- list( "Field Data" = field_df_rounded ) if (!is.null(summary_df_rounded)) { sheets[["Summary"]] <- summary_df_rounded } write_xlsx(sheets, excel_path) message(paste("✓ Field analysis Excel exported to:", excel_path)) kpi_data <- list( field_analysis = field_df_rounded, field_analysis_summary = summary_df_rounded, metadata = list( current_week = current_week, year = year, project = project_dir, created_at = Sys.time() ) ) rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") rds_path <- file.path(reports_dir, rds_filename) saveRDS(kpi_data, rds_path) message(paste("✓ Field analysis RDS exported to:", rds_path)) csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") csv_path <- file.path(output_subdir, csv_filename) write_csv(field_df_rounded, csv_path) message(paste("✓ Field analysis CSV exported to:", csv_path)) return(list(excel = excel_path, rds = rds_path, csv = csv_path)) } # ============================================================================ # ADDITIONAL CRITICAL FUNCTIONS FROM 80_weekly_stats_utils.R (REQUIRED BY 80_calculate_kpis.R) # ============================================================================ #' Calculate statistics for all fields from weekly mosaics calculate_field_statistics <- function(field_boundaries_sf, week_num, year, mosaic_dir, report_date = Sys.Date()) { message(paste("Calculating statistics for all fields - Week", week_num, year)) # Per-field mode: look in per-field subdirectories single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year) # Find all field subdirectories with mosaics for this week field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) field_dirs <- field_dirs[field_dirs != ""] per_field_files <- list() for (field in field_dirs) { field_mosaic_dir <- file.path(mosaic_dir, field) files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) if (length(files) > 0) { per_field_files[[field]] <- files[1] # Take first match for this field } } if (length(per_field_files) == 0) { stop(paste("No per-field mosaic files found for week", week_num, year, "in", mosaic_dir)) } message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num)) results_list <- list() # Initialize progress bar pb <- progress::progress_bar$new( format = " [:bar] :percent | Field :current/:total", total = length(per_field_files), width = 60 ) # Process each field's mosaic for (field_idx in seq_along(per_field_files)) { pb$tick() # Update progress bar field_name <- names(per_field_files)[field_idx] field_file <- per_field_files[[field_name]] tryCatch({ current_rast <- terra::rast(field_file) ci_band <- current_rast[["CI"]] if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { message(paste(" [SKIP] Field", field_name, "- CI band not found")) next } # Extract CI values for this field field_boundary <- field_boundaries_sf[field_boundaries_sf$field == field_name, ] if (nrow(field_boundary) == 0) { message(paste(" [SKIP] Field", field_name, "- not in field boundaries")) next } extracted <- terra::extract(ci_band, field_boundary, na.rm = FALSE) if (nrow(extracted) == 0 || all(is.na(extracted$CI))) { message(paste(" [SKIP] Field", field_name, "- no CI values found")) next } ci_vals <- extracted$CI[!is.na(extracted$CI)] if (length(ci_vals) == 0) { next } # Calculate statistics mean_ci <- mean(ci_vals, na.rm = TRUE) ci_std <- sd(ci_vals, na.rm = TRUE) cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ range_min <- min(ci_vals, na.rm = TRUE) range_max <- max(ci_vals, na.rm = TRUE) range_str <- sprintf("%.1f-%.1f", range_min, range_max) ci_percentiles_str <- get_ci_percentiles(ci_vals) num_pixels_total <- length(ci_vals) num_pixels_gte_2 <- sum(ci_vals >= 2) pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0 num_total <- nrow(extracted) num_data <- sum(!is.na(extracted$CI)) pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 cloud_cat <- if (num_data == 0) "No image available" else if (pct_clear >= 95) "Clear view" else "Partial coverage" # Add to results results_list[[length(results_list) + 1]] <- data.frame( Field_id = field_name, Mean_CI = round(mean_ci, 2), CV = round(cv * 100, 2), CI_range = range_str, CI_Percentiles = ci_percentiles_str, Pct_pixels_CI_gte_2 = pct_pixels_gte_2, Cloud_pct_clear = pct_clear, Cloud_category = cloud_cat, stringsAsFactors = FALSE ) }, error = function(e) { message(paste(" [ERROR] Field", field_name, ":", e$message)) }) } if (length(results_list) == 0) { stop(paste("No fields processed successfully for week", week_num)) } stats_df <- dplyr::bind_rows(results_list) message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) return(stats_df) } #' Load or calculate weekly statistics (with RDS caching) load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, mosaic_dir, reports_dir, report_date = Sys.Date()) { rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year) rds_path <- file.path(reports_dir, "field_stats", rds_filename) if (file.exists(rds_path)) { message(paste("Loading cached statistics from:", basename(rds_path))) return(readRDS(rds_path)) } message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date) output_dir <- file.path(reports_dir, "field_stats") if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) } saveRDS(stats_df, rds_path) message(paste("Saved weekly statistics RDS:", basename(rds_path))) csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year) csv_path <- file.path(output_dir, csv_filename) readr::write_csv(stats_df, csv_path) message(paste("Saved weekly statistics CSV:", basename(csv_path))) return(stats_df) } #' Load historical field data from CSV (4-week lookback) load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) { historical_data <- list() loaded_weeks <- c() missing_weeks <- c() for (lookback in 0:(num_weeks - 1)) { target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback) target_week <- target$week target_year <- target$year csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv") csv_path <- file.path(reports_dir, "field_analysis", csv_filename) if (file.exists(csv_path)) { tryCatch({ data <- readr::read_csv(csv_path, show_col_types = FALSE) historical_data[[lookback + 1]] <- list( week = target_week, year = target_year, data = data ) loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) }, error = function(e) { message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message)) missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) }) } else { missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year))) } } if (length(historical_data) == 0) { message(paste("Error: No historical field data found")) return(NULL) } message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", paste(loaded_weeks, collapse = ", "))) return(historical_data) } #' Calculate KPI trends (CI change, CV trends, 4-week and 8-week trends) calculate_kpi_trends <- function(current_stats, prev_stats = NULL, project_dir = NULL, reports_dir = NULL, current_week = NULL, year = NULL) { message("Calculating KPI trends from current and previous week data") current_stats$Weekly_ci_change <- NA_real_ current_stats$CV_Trend_Short_Term <- NA_real_ current_stats$Four_week_trend <- NA_real_ current_stats$CV_Trend_Long_Term <- NA_real_ current_stats$nmr_of_weeks_analysed <- 1L if (is.null(prev_stats) || nrow(prev_stats) == 0) { message(" No previous week data available - using defaults") return(current_stats) } message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) prev_field_analysis <- NULL tryCatch({ analysis_dir <- file.path(reports_dir, "field_analysis") if (dir.exists(analysis_dir)) { analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) if (length(analysis_files) > 0) { recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, col_select = c(Field_id, nmr_of_weeks_analysed, Phase)) } } }, error = function(e) { message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) }) if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed")) } historical_4weeks <- list() historical_8weeks <- list() if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { message(" Loading historical field_stats for 4-week and 8-week trends...") for (lookback in 1:4) { target_week <- current_week - lookback target_year <- year if (target_week < 1) { target_week <- target_week + 52 target_year <- target_year - 1 } rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_path <- file.path(reports_dir, "field_stats", rds_filename) if (file.exists(rds_path)) { tryCatch({ stats_data <- readRDS(rds_path) historical_4weeks[[length(historical_4weeks) + 1]] <- list(week = target_week, stats = stats_data) }, error = function(e) { message(paste(" Warning: Could not load week", target_week, ":", e$message)) }) } } for (lookback in 1:8) { target_week <- current_week - lookback target_year <- year if (target_week < 1) { target_week <- target_week + 52 target_year <- target_year - 1 } rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year) rds_path <- file.path(reports_dir, "field_stats", rds_filename) if (file.exists(rds_path)) { tryCatch({ stats_data <- readRDS(rds_path) historical_8weeks[[length(historical_8weeks) + 1]] <- list(week = target_week, stats = stats_data) }, error = function(e) { # Silently skip }) } } if (length(historical_4weeks) > 0) { message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) } if (length(historical_8weeks) > 0) { message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) } } cv_trends_calculated <- 0 four_week_trends_calculated <- 0 cv_long_term_calculated <- 0 for (i in seq_len(nrow(current_stats))) { field_id <- current_stats$Field_id[i] prev_idx <- prev_lookup[field_id] if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { prev_row <- prev_stats[prev_idx, , drop = FALSE] prev_ci <- prev_row$Mean_CI[1] if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { current_stats$Weekly_ci_change[i] <- round(current_stats$Mean_CI[i] - prev_ci, 2) } prev_cv <- prev_row$CV[1] if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { current_stats$CV_Trend_Short_Term[i] <- calculate_cv_trend(current_stats$CV[i], prev_cv) cv_trends_calculated <- cv_trends_calculated + 1 } if (length(historical_4weeks) > 0) { ci_values_4week <- numeric() for (hist_idx in rev(seq_along(historical_4weeks))) { hist_data <- historical_4weeks[[hist_idx]]$stats hist_field <- which(hist_data$Field_id == field_id) if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) } } ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) if (length(ci_values_4week) >= 2) { current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) four_week_trends_calculated <- four_week_trends_calculated + 1 } } if (length(historical_8weeks) > 0) { cv_values_8week <- numeric() for (hist_idx in rev(seq_along(historical_8weeks))) { hist_data <- historical_8weeks[[hist_idx]]$stats hist_field <- which(hist_data$Field_id == field_id) if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) } } cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) if (length(cv_values_8week) >= 2) { slope <- calculate_cv_trend_long_term(cv_values_8week) current_stats$CV_Trend_Long_Term[i] <- slope cv_long_term_calculated <- cv_long_term_calculated + 1 } } if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { prev_analysis_row <- prev_field_analysis %>% dplyr::filter(Field_id == field_id) if (nrow(prev_analysis_row) > 0) { prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1] if (!is.na(prev_nmr_weeks_analysis)) { current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L } else { current_stats$nmr_of_weeks_analysed[i] <- 1L } } } } } message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) return(current_stats) } # ============================================================================ # INTERNAL HELPER FUNCTIONS (from 80_kpi_utils.R) - DO NOT DELETE # ============================================================================ # Spatial autocorrelation thresholds for field pattern analysis MORAN_THRESHOLD_HIGH <- 0.95 # Very strong clustering (problematic patterns) MORAN_THRESHOLD_MODERATE <- 0.85 # Moderate clustering MORAN_THRESHOLD_LOW <- 0.7 # Normal field continuity #' Calculate coefficient of variation for uniformity assessment calculate_cv <- function(values) { values <- values[!is.na(values) & is.finite(values)] if (length(values) < 2) return(NA) cv <- sd(values) / mean(values) return(cv) } #' Calculate percentage of field with positive vs negative change calculate_change_percentages <- function(current_values, previous_values) { if (length(current_values) != length(previous_values)) { return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) } change_values <- current_values - previous_values valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)] if (length(valid_changes) < 2) { return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) } positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100 negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100 stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100 return(list( positive_pct = positive_pct, negative_pct = negative_pct, stable_pct = stable_pct )) } #' Calculate spatial autocorrelation (Moran's I) for a field calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) { tryCatch({ field_raster <- terra::crop(ci_raster, field_boundary) field_raster <- terra::mask(field_raster, field_boundary) raster_points <- terra::as.points(field_raster, na.rm = TRUE) if (length(raster_points) < 10) { return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data")) } points_sf <- sf::st_as_sf(raster_points) coords <- sf::st_coordinates(points_sf) k_neighbors <- min(8, max(4, floor(nrow(coords) / 10))) knn_nb <- spdep::knearneigh(coords, k = k_neighbors) knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE) ci_values <- points_sf[[1]] moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE) morans_i <- moran_result$estimate[1] p_value <- moran_result$p.value interpretation <- if (is.na(morans_i)) { "insufficient_data" } else if (p_value > 0.05) { "random" } else if (morans_i > MORAN_THRESHOLD_HIGH) { "very_strong_clustering" } else if (morans_i > MORAN_THRESHOLD_MODERATE) { "strong_clustering" } else if (morans_i > MORAN_THRESHOLD_LOW) { "normal_continuity" } else if (morans_i > 0.3) { "weak_clustering" } else if (morans_i < -0.3) { "dispersed" } else { "low_autocorrelation" } return(list(morans_i = morans_i, p_value = p_value, interpretation = interpretation)) }, error = function(e) { warning(paste("Error calculating spatial autocorrelation:", e$message)) return(list(morans_i = NA, p_value = NA, interpretation = "error")) }) } #' Extract CI band from multi-band raster extract_ci_values <- function(ci_raster, field_vect) { extracted <- terra::extract(ci_raster, field_vect, fun = NULL) if ("CI" %in% names(extracted)) { return(extracted[, "CI"]) } else if (ncol(extracted) > 1) { return(extracted[, ncol(extracted)]) } else { return(extracted[, 1]) } } #' Calculate current and previous week numbers using ISO 8601 calculate_week_numbers <- function(report_date = Sys.Date()) { current_week <- as.numeric(format(report_date, "%V")) current_year <- as.numeric(format(report_date, "%G")) previous_week <- current_week - 1 previous_year <- current_year if (previous_week < 1) { previous_week <- 52 previous_year <- current_year - 1 } return(list( current_week = current_week, previous_week = previous_week, year = current_year, previous_year = previous_year )) } #' Load field CI raster (handles single-file and per-field architectures) load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) { is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field") if (is_per_field) { per_field_dir <- attr(ci_raster_or_obj, "per_field_dir") week_file <- attr(ci_raster_or_obj, "week_file") field_mosaic_path <- file.path(per_field_dir, field_name, week_file) if (file.exists(field_mosaic_path)) { tryCatch({ field_mosaic <- terra::rast(field_mosaic_path) if (terra::nlyr(field_mosaic) >= 5) { return(field_mosaic[[5]]) } else { return(field_mosaic[[1]]) } }, error = function(e) { return(NULL) }) } else { return(NULL) } } else { if (!is.null(field_vect)) { return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE)) } else { return(ci_raster_or_obj) } } } #' Load weekly CI mosaic (single-file or per-field) load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { week_file <- sprintf("week_%02d_%d.tif", week_num, year) week_path <- file.path(mosaic_dir, week_file) if (file.exists(week_path)) { tryCatch({ mosaic_raster <- terra::rast(week_path) ci_raster <- mosaic_raster[[5]] names(ci_raster) <- "CI" return(ci_raster) }, error = function(e) { return(NULL) }) } if (dir.exists(mosaic_dir)) { field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) field_dirs <- field_dirs[field_dirs != ""] found_any <- FALSE for (field in field_dirs) { field_mosaic_path <- file.path(mosaic_dir, field, week_file) if (file.exists(field_mosaic_path)) { found_any <- TRUE break } } if (found_any) { dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA) attr(dummy_raster, "per_field_dir") <- mosaic_dir attr(dummy_raster, "week_file") <- week_file attr(dummy_raster, "is_per_field") <- TRUE return(dummy_raster) } } return(NULL) } #' Prepare predictions with consistent naming prepare_predictions <- function(predictions, newdata) { return(predictions %>% as.data.frame() %>% dplyr::rename(predicted_Tcha = ".") %>% dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, Age_days = newdata$DOY, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season ) %>% dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>% dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) }