diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 823c708..9c5b2b4 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -12,6 +12,20 @@ # - Parallel processing (tile-aware, 1000+ fields supported) # - Comprehensive Excel + RDS + CSV exports (21 columns per spec) # - Test mode for development + +# CRITICAL INTEGRATIONS: +# +# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307) +# [ ] Load script 31 output: {project}_imminent_harvest_week{WW}.csv +# Columns: field, imminent_prob, detected_prob, week, year +# [ ] LEFT JOIN to field_analysis_df by (field, week, year) +# [ ] Replace hardcoded "placeholder data" in Status_trigger calculation +# [ ] Update column to show actual harvest probability (0-1 or 0-100%) +# +# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 31) +# [ ] Scripts 22 & 31 populate harvest.xlsx with planting_date per field +# [ ] Load harvest.xlsx instead of using UNIFORM_PLANTING_DATE +# [ ] Calculate Age_week = difftime(report_date, planting_date, units="weeks") # # COMMAND-LINE USAGE: # Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata @@ -27,131 +41,15 @@ # main() # ============================================================================ -# PENDING WORK - PHASE 4 (Next Sprint) +# NEXT INTEGRATIONS (See Linear issues for detailed requirements) # ============================================================================ -# -# CRITICAL INTEGRATIONS: -# -# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307) -# [ ] Load script 31 output: {project}_imminent_harvest_week{WW}.csv -# Columns: field, imminent_prob, detected_prob, week, year -# [ ] LEFT JOIN to field_analysis_df by (field, week, year) -# [ ] Replace hardcoded "placeholder data" in Status_trigger calculation -# [ ] Update column to show actual harvest probability (0-1 or 0-100%) -# -# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 31) -# [ ] Scripts 22 & 31 populate harvest.xlsx with planting_date per field -# [ ] Load harvest.xlsx instead of using UNIFORM_PLANTING_DATE -# [ ] Calculate Age_week = difftime(report_date, planting_date, units="weeks") -# [ ] Removes TEST MODE hardcoding and enables field-specific aging -# -# UTILITY FILE REFACTORING (Improve code maintainability): -# -# 3. CREATE r_app/80_weekly_stats_utils.R -# [ ] Extract functions from lines 250-795 (calculation layer): -# - calculate_field_statistics() -# - calculate_kpi_trends() -# - load_or_calculate_weekly_stats() -# - Helper: load_tiles_for_field(), get_tile_ids_for_field() -# - Helper: extract_field_statistics_from_ci() -# [ ] Clean separation: DATA CALCULATION ONLY (no Excel export) -# [ ] Reusable by run_full_pipeline.R and other scripts -# -# 4. CREATE r_app/80_report_building_utils.R -# [ ] Extract functions from lines 1350-2100+ (output/reporting layer): -# - generate_field_analysis_summary() -# - export_field_analysis_excel() -# - calculate_and_export_farm_kpis() -# - Helper: categorize_*, get_*, round_* functions -# - Helper: get_phase_by_age(), get_status_trigger() -# [ ] Clean separation: OUTPUT/FORMATTING ONLY (consumes calculated stats) -# [ ] Reusable for alternative export formats (PDF, HTML, dashboard) -# -# TESTING PLAN: -# [ ] Verify 8-week historical data loads (currently TEST_MODE = 2 weeks only) -# [ ] Confirm Four_week_trend calculates from 1-4 weeks (graceful degradation) -# [ ] Confirm CV_Trend_Long_Term uses full 8-week regression (when available) -# [ ] Load script 31 output and validate imminent_prob population -# [ ] maybe even look into aut calculating the mosaic if mosaic is missing in last 8 weeks +# 1. Load imminent_prob from script 31 (harvest_imminent_weekly.csv) +# 2. Load planting_date from harvest.xlsx for field-specific age calculation # ============================================================================ # ============================================================================ -# EXCEL OUTPUT SPECIFICATION (21 COLUMNS) +# CONFIGURATION # ============================================================================ -# This script exports 21 columns per the field analysis specification: -# -# COMPLETED/IN PROGRESS: -# 1. Field_id ✓ - Unique field identifier -# 2. Farm_Section - Management zone (to be filled by user) -# 3. Field_name ✓ - Client-facing field name (from GeoJSON) -# 4. Acreage ✓ - Field size in acres -# 5. Mean_CI ✓ - Average Chlorophyll Index -# 6. Weekly_ci_change ✓ - Week-over-week CI change -# 7. Four_week_trend - [FUTURE] Trend over 4 weeks (requires historical mosaics) -# 8. Last_harvest_or_planting_date - [DUMMY for now] Will be from harvest Excel + LSTM (script 31) -# 9. Age_week ✓ - Weeks since planting -# 10. Phase (age based) ✓ - Growth phase (Germination, Tillering, Grand Growth, Maturation) -# 11. nmr_weeks_in_this_phase - [TODO] Weeks spent in current phase (track phase transitions) -# 12. Germination_progress - [TODO] % pixels with CI >= threshold (default 2, for age < 4 months) -# 13. Imminent_prob - [DUMMY for now] Harvest probability (will be from script 31 output) -# 14. Status_trigger ✓ - Alerts (harvest_ready, stress, etc.) -# 15. CI_range (min-max) - [TODO] Min and max CI values in field -# 16. CI_Percentiles ✓ - 10th-90th percentile of CI (p10-p90 format) -# 17. CV ✓ - Coefficient of variation (field uniformity) -# 18. CV_Trend_Short_Term - [TODO] 2-week CV trend (current week CV - last week CV) -# 19. CV_Trend_Long_Term - [FUTURE] 8-week CV slope (requires linear regression, historical mosaics) -# 20. Cloud_pct_clear ✓ - % field visible (pixel coverage) -# 21. Cloud_category ✓ - Cloud classification (Clear view / Partial coverage / No image available) -# -# IMPLEMENTATION PLAN (ordered by difficulty): -# ============================================================================ -# PHASE 1 - EASY (Current data only): -# [✓] Remove Mean_CI_prev column -# [✓] Add Field_name column (from field_boundaries_sf$field) -# [✓] Add Farm_Section column (empty, user will fill) -# [✓] Add Last_harvest_or_planting_date (use UNIFORM_PLANTING_DATE as dummy) -# [✓] Add CI_range (min/max from pixel extraction) -# [✓] Add Cloud_pct_clear (% from pixel coverage) -# [✓] Column order: Reorder to match spec (1-21) -# -# -# PHASE 2 - MEDIUM (Requires computation): -# [ ] Add nmr_weeks_in_this_phase (track phase transitions with previous week CSV) -# [ ] Add Germination_progress (% pixels CI >= GERMINATION_CI_THRESHOLD, configurable) -# [ ] Add Imminent_prob column (dummy NA, will merge from script 31 harvest_imminent_weekly.csv) -# [ ] Add CV_Trend_Short_Term (requires loading last week's CV values) -# -# PHASE 3 - COMPLEX (Requires historical data): -# [ ] Add Four_week_trend (CI value difference week vs 4 weeks ago, requires loading prev mosaics) -# [ ] Add CV_Trend_Long_Term (8-week slope: linear regression on 8 weeks of CV, suggests lm()) -# [ ] Load previous week's CSV to cross-check phase transitions and trends -# -# NOTES: -# - Script 31 (harvest_imminent_weekly.py) outputs: field, imminent_prob, detected_prob, week, year -# - Will need to LEFT JOIN on (field, week, year) to populate Imminent_prob -# - Phase transition logic: Compare this week's Phase vs last week's Phase from CSV -# - For 8-week CV slope: Linear regression slope = (CV_week8 - CV_week1) / 7 weeks (approximately) -# or use lm(CV ~ week) on 8-week sequence for proper slope calculation -# - Germination_progress only calculated if Age_week < 17 (before end of Tillering phase) -# - Cloud_pct_clear calculated as: (pixel_count / expected_pixels) * 100 - -# ============================================================================ -# *** CONFIGURATION SECTION - MANUALLY DEFINED THRESHOLDS *** -# ============================================================================ - -# TEST MODE (for development with limited historical data) -TEST_MODE <- FALSE -TEST_MODE_NUM_WEEKS <- 2 - -# GERMINATION PROGRESS THRESHOLD -# Percentage of pixels that must reach this CI value to count as "germinated" -GERMINATION_CI_THRESHOLD <- 2.0 # Pixels with CI >= 2 count as germinated - -# FOR TESTING: Set these fields as "recently planted" to demonstrate germination progress -YOUNG_FIELDS_FOR_TESTING <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10") # First 10 field IDs -YOUNG_FIELD_PLANTING_DATE <- as.Date("2026-01-01") # Recently planted for demo - -# FOUR-WEEK TREND THRESHOLDS FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 @@ -184,6 +82,13 @@ CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100) CI_PERCENTILE_LOW <- 0.10 CI_PERCENTILE_HIGH <- 0.90 +# GERMINATION THRESHOLD (for germination_progress calculation) +GERMINATION_CI_THRESHOLD <- 2.0 + +# PLANTING DATE & AGE CONFIGURATION +USE_UNIFORM_AGE <- TRUE +UNIFORM_PLANTING_DATE <- as.Date("2026-01-01") + # HISTORICAL DATA LOOKBACK WEEKS_FOR_FOUR_WEEK_TREND <- 4 WEEKS_FOR_CV_TREND_SHORT <- 2 @@ -216,6 +121,22 @@ suppressPackageStartupMessages({ }) }) +# ============================================================================ +# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES +# ============================================================================ + +tryCatch({ + source(here("r_app", "80_weekly_stats_utils.R")) +}, error = function(e) { + stop("Error loading 80_weekly_stats_utils.R: ", e$message) +}) + +tryCatch({ + source(here("r_app", "80_report_building_utils.R")) +}, error = function(e) { + stop("Error loading 80_report_building_utils.R: ", e$message) +}) + # ============================================================================ # PHASE AND STATUS TRIGGER DEFINITIONS # ============================================================================ @@ -252,1656 +173,9 @@ STATUS_TRIGGERS <- data.frame( ) # ============================================================================ -# TILE-AWARE HELPER FUNCTIONS +# MAIN # ============================================================================ -get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { - if (inherits(field_geom, "sf")) { - field_bbox <- sf::st_bbox(field_geom) - field_xmin <- field_bbox["xmin"] - field_xmax <- field_bbox["xmax"] - field_ymin <- field_bbox["ymin"] - field_ymax <- field_bbox["ymax"] - } else if (inherits(field_geom, "SpatVector")) { - field_bbox <- terra::ext(field_geom) - field_xmin <- field_bbox$xmin - field_xmax <- field_bbox$xmax - field_ymin <- field_bbox$ymin - field_ymax <- field_bbox$ymax - } else { - stop("field_geom must be sf or terra::vect object") - } - - # DEBUG: Print bbox info for first field - if (!is.null(field_id) && field_id == "1391") { - message(paste("[DEBUG get_tile_ids] Field bbox - xmin:", field_xmin, "xmax:", field_xmax, - "ymin:", field_ymin, "ymax:", field_ymax)) - message(paste("[DEBUG get_tile_ids] tile_grid sample: id=", tile_grid$id[1], - "xmin=", tile_grid$xmin[1], "xmax=", tile_grid$xmax[1], - "ymin=", tile_grid$ymin[1], "ymax=", tile_grid$ymax[1])) - message(paste("[DEBUG get_tile_ids] tile_grid CRS:", sf::st_crs(tile_grid))) - message(paste("[DEBUG get_tile_ids] field CRS:", sf::st_crs(field_geom))) - } - - intersecting_tiles <- tile_grid$id[ - !(tile_grid$xmax < field_xmin | - tile_grid$xmin > field_xmax | - tile_grid$ymax < field_ymin | - tile_grid$ymin > field_ymax) - ] - - return(as.numeric(intersecting_tiles)) -} - -load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { - if (length(tile_ids) == 0) { - return(NULL) - } - - tiles_list <- list() - for (tile_id in sort(tile_ids)) { - tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id) - tile_path <- file.path(mosaic_dir, tile_filename) - - if (file.exists(tile_path)) { - tryCatch({ - tile_rast <- terra::rast(tile_path) - ci_band <- terra::subset(tile_rast, 5) - tiles_list[[length(tiles_list) + 1]] <- ci_band - }, error = function(e) { - message(paste(" Warning: Could not load tile", tile_id, ":", e$message)) - }) - } - } - - if (length(tiles_list) == 0) { - return(NULL) - } - - if (length(tiles_list) == 1) { - return(tiles_list[[1]]) - } else { - tryCatch({ - rsrc <- terra::sprc(tiles_list) - merged <- terra::mosaic(rsrc, fun = "max") - return(merged) - }, error = function(e) { - message(paste(" Warning: Could not merge tiles:", e$message)) - return(tiles_list[[1]]) - }) - } -} - -build_tile_grid <- function(mosaic_dir, week_num, year) { - # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/) - # First check if mosaic_dir contains grid-size subdirectories - detected_grid_size <- NA - if (dir.exists(mosaic_dir)) { - subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) - grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) - - if (length(grid_patterns) > 0) { - # Use the first grid-size subdirectory found - detected_grid_size <- grid_patterns[1] - mosaic_dir <- file.path(mosaic_dir, detected_grid_size) - message(paste(" Using grid-size subdirectory:", detected_grid_size)) - } - } - - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) - tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) == 0) { - stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) - } - - tile_grid <- data.frame( - id = integer(), - xmin = numeric(), - xmax = numeric(), - ymin = numeric(), - ymax = numeric(), - stringsAsFactors = FALSE - ) - - for (tile_file in tile_files) { - tryCatch({ - matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file))) - if (length(matches) > 0) { - tile_id <- as.integer(sub("_|\\.tif", "", matches[1])) - tile_rast <- terra::rast(tile_file) - tile_ext <- terra::ext(tile_rast) - tile_grid <- rbind(tile_grid, data.frame( - id = tile_id, - xmin = tile_ext$xmin, - xmax = tile_ext$xmax, - ymin = tile_ext$ymin, - ymax = tile_ext$ymax, - stringsAsFactors = FALSE - )) - } - }, error = function(e) { - message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message)) - }) - } - - if (nrow(tile_grid) == 0) { - stop("Could not extract extents from any tile files") - } - - # RETURN BOTH the grid AND the corrected mosaic directory path - return(list( - tile_grid = tile_grid, - mosaic_dir = mosaic_dir, - grid_size = detected_grid_size - )) -} - -# ============================================================================ -# SC-64 ENHANCEMENT FUNCTIONS -# ============================================================================ - -categorize_four_week_trend <- function(ci_values_list) { - if (is.null(ci_values_list) || length(ci_values_list) < 2) { - return(NA_character_) - } - - ci_values_list <- ci_values_list[!is.na(ci_values_list)] - if (length(ci_values_list) < 2) { - return(NA_character_) - } - - weekly_changes <- diff(ci_values_list) - avg_weekly_change <- mean(weekly_changes, na.rm = TRUE) - - if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) { - return("strong growth") - } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN && - avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) { - return("growth") - } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) { - return("no growth") - } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN && - avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { - return("decline") - } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { - return("strong decline") - } else { - return("no growth") - } -} - -round_cloud_to_intervals <- function(cloud_pct_clear) { - if (is.na(cloud_pct_clear)) { - return(NA_character_) - } - - if (cloud_pct_clear < 50) return("<50%") - if (cloud_pct_clear < 60) return("50-60%") - if (cloud_pct_clear < 70) return("60-70%") - if (cloud_pct_clear < 80) return("70-80%") - if (cloud_pct_clear < 90) return("80-90%") - return(">90%") -} - -get_ci_percentiles <- function(ci_values) { - if (is.null(ci_values) || length(ci_values) == 0) { - return(NA_character_) - } - - ci_values <- ci_values[!is.na(ci_values)] - if (length(ci_values) == 0) { - return(NA_character_) - } - - p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE) - p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE) - - return(sprintf("%.1f-%.1f", p10, p90)) -} - -calculate_cv_trend <- function(cv_current, cv_previous) { - if (is.na(cv_current) || is.na(cv_previous)) { - return(NA_real_) - } - return(round(cv_current - cv_previous, 4)) -} - -calculate_four_week_trend <- function(mean_ci_values) { - #' Calculate four-week CI trend from available weeks - #' - #' Uses whatever weeks are available (1-4 weeks) to estimate trend - #' Returns difference between current (most recent) and oldest available week - #' - #' @param mean_ci_values vector of Mean_CI values in chronological order (oldest to newest) - #' @return numeric: CI difference (current - oldest), rounded to 2 decimals - - if (is.null(mean_ci_values) || length(mean_ci_values) == 0) { - return(NA_real_) - } - - # Remove NAs - ci_clean <- mean_ci_values[!is.na(mean_ci_values)] - - if (length(ci_clean) < 2) { - # Need at least 2 weeks to calculate trend - return(NA_real_) - } - - # Calculate difference: current - oldest - trend <- ci_clean[length(ci_clean)] - ci_clean[1] - return(round(trend, 2)) -} - -categorize_cv_slope <- function(slope) { - #' Categorize CV slope (8-week regression) into field uniformity interpretation - #' - #' Slope interpretation: - #' Negative slope = CV decreasing = field becoming MORE uniform = GOOD - #' Positive slope = CV increasing = field becoming MORE patchy = BAD - #' Near zero = Homogenous growth (all crops progressing equally) - #' - #' Categories: - #' - "Excellent uniformity": Slope <= -0.02 (CV decreasing, field synchronizing) - #' - "Homogenous growth": -0.02 < slope < 0.005 (stable, uniform growth) - #' - "Minor patchiness": 0.005 <= slope <= 0.02 (CV slowly increasing) - #' - "Severe fragmentation": slope > 0.02 (rapid CV increase, parts diverging) - #' - #' @param slope numeric: CV trend slope per week - #' @return character: Category string - - if (is.na(slope)) { - return(NA_character_) - } - - if (slope <= CV_SLOPE_IMPROVEMENT_MIN) { - return("Excellent uniformity") - } else if (slope < CV_SLOPE_HOMOGENOUS_MIN) { - return("Homogenous growth") - } else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) { - return("Homogenous growth") - } else if (slope <= CV_SLOPE_PATCHINESS_MAX) { - return("Minor patchiness") - } else { - return("Severe fragmentation") - } -} - -calculate_cv_trend_long_term <- function(cv_values) { - #' Calculate 8-week CV trend via linear regression slope - #' - #' Fits linear regression to CV over available weeks (1-8) - #' Returns slope = rate of change per week - #' - #' @param cv_values vector of CV values in chronological order (oldest to newest) - #' @return numeric: Regression slope (CV change per week), rounded to 4 decimals - - if (is.null(cv_values) || length(cv_values) == 0) { - return(NA_real_) - } - - # Remove NAs - cv_clean <- cv_values[!is.na(cv_values)] - - if (length(cv_clean) < 2) { - # Need at least 2 weeks to fit a line - return(NA_real_) - } - - # Create week sequence matching data length - weeks <- seq_along(cv_clean) - - # Fit linear model - tryCatch({ - lm_fit <- lm(cv_clean ~ weeks) - slope <- coef(lm_fit)["weeks"] - return(round(as.numeric(slope), 4)) - }, error = function(e) { - return(NA_real_) - }) -} - -# ============================================================================ -# HELPER FUNCTIONS -# ============================================================================ - -get_phase_by_age <- function(age_weeks) { - if (is.na(age_weeks)) return(NA_character_) - for (i in seq_len(nrow(PHASE_DEFINITIONS))) { - if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && - age_weeks <= PHASE_DEFINITIONS$age_end[i]) { - return(PHASE_DEFINITIONS$phase[i]) - } - } - return("Unknown") -} - -get_status_trigger <- function(ci_values, ci_change, age_weeks) { - if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) - - ci_values <- ci_values[!is.na(ci_values)] - if (length(ci_values) == 0) return(NA_character_) - - pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 - pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 - ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 - mean_ci <- mean(ci_values, na.rm = TRUE) - - if (age_weeks >= 0 && age_weeks <= 6) { - if (pct_at_or_above_2 >= 70) { - return("germination_complete") - } else if (pct_above_2 > 10) { - return("germination_started") - } - } - - if (age_weeks >= 45) { - return("harvest_ready") - } - - if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { - return("stress_detected_whole_field") - } - - if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { - return("strong_recovery") - } - - if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { - return("growth_on_track") - } - - if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { - return("maturation_progressing") - } - - return(NA_character_) -} - -load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { - historical_data <- list() - loaded_weeks <- c() - missing_weeks <- c() - - # First pass: try to load existing weeks - for (lookback in 0:(num_weeks - 1)) { - target_week <- current_week - lookback - if (target_week < 1) target_week <- target_week + 52 - - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv") - csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) - - if (file.exists(csv_path)) { - tryCatch({ - data <- read_csv(csv_path, show_col_types = FALSE) - historical_data[[lookback + 1]] <- list( - week = target_week, - data = data - ) - loaded_weeks <- c(loaded_weeks, target_week) - }, error = function(e) { - message(paste(" Warning: Could not load week", target_week, ":", e$message)) - missing_weeks <<- c(missing_weeks, target_week) - }) - } else { - missing_weeks <- c(missing_weeks, target_week) - } - } - - # If weeks are missing and auto_generate=TRUE, calculate stats from ALL available mosaics - if (length(missing_weeks) > 0 && auto_generate) { - message(paste("⚠ Missing weeks:", paste(missing_weeks, collapse = ", "))) - message("Scanning for ALL available weekly mosaics and calculating stats...\n") - - # Use field_boundaries_sf passed in (loaded in main) - if (is.null(field_boundaries_sf)) { - message(" Error: field_boundaries_sf not provided - cannot auto-generate") - return(historical_data) - } - - if (!exists("weekly_tile_max")) { - message(" ✗ weekly_tile_max path not defined") - return(historical_data) - } - - # Find the mosaic directory (with or without 5x5 subdirectory) - check_paths <- c(file.path(weekly_tile_max, "5x5"), weekly_tile_max) - mosaic_scan_dir <- NA - - for (check_path in check_paths) { - if (dir.exists(check_path)) { - tif_files <- list.files(check_path, pattern = "week_.*\\.tif$", full.names = TRUE) - if (length(tif_files) > 0) { - mosaic_scan_dir <- check_path - break - } - } - } - - if (is.na(mosaic_scan_dir)) { - message(" ✗ No mosaic files found in weekly_tile_max") - return(historical_data) - } - - # Calculate actual date range for last 8 weeks - # Don't guess weeks - derive them from actual dates - weeks_to_load <- 8 - today <- Sys.Date() - target_dates <- today - (0:(weeks_to_load - 1)) * 7 - - # For each date, calculate what week/year it falls in - expected_weeks <- data.frame( - date = target_dates, - week = as.numeric(format(target_dates, "%V")), - year = as.numeric(format(target_dates, "%Y")), - stringsAsFactors = FALSE - ) - expected_weeks <- unique(expected_weeks) - - message(paste(" Expected weeks (last 8 from", format(today, "%Y-%m-%d"), "):")) - for (i in seq_len(nrow(expected_weeks))) { - message(paste(" Week", sprintf("%02d", expected_weeks$week[i]), expected_weeks$year[i])) - } - message("") - - # Parse all week_YY_YYYY_NN.tif files to find unique (week, year) combinations - tif_files <- list.files(mosaic_scan_dir, pattern = "week_([0-9]{2})_([0-9]{4})_[0-9]{2}\\.tif$", - full.names = FALSE) - - # Extract week and year from filenames - available_weeks <- data.frame() - for (filename in tif_files) { - # Parse: week_02_2026_03.tif - matches <- regmatches(filename, gregexpr("week_([0-9]{2})_([0-9]{4})", filename))[[1]] - if (length(matches) > 0) { - week_year <- strsplit(matches[1], "_")[[1]] - if (length(week_year) == 3) { - week_num <- as.numeric(week_year[2]) - year_num <- as.numeric(week_year[3]) - - # Only keep weeks that are in expected_weeks - if (week_num %in% expected_weeks$week && year_num %in% expected_weeks$year) { - available_weeks <- rbind(available_weeks, - data.frame(week = week_num, year = year_num)) - } - } - } - } - - # Remove duplicates and sort by date (descending - most recent first) - available_weeks <- unique(available_weeks) - # Merge with dates to sort properly - available_weeks <- merge(available_weeks, expected_weeks[, c("week", "year", "date")], by = c("week", "year")) - available_weeks <- available_weeks[order(available_weeks$date, decreasing = TRUE), ] - - if (nrow(available_weeks) == 0) { - message(" ✗ No matching mosaic files found") - message(paste(" Scanned directory:", mosaic_scan_dir)) - return(historical_data) - } - - message(paste(" Found", nrow(available_weeks), "week(s) with available mosaics:")) - - # Calculate stats for each available week - for (i in seq_len(nrow(available_weeks))) { - week_to_calc <- available_weeks$week[i] - year_to_calc <- available_weeks$year[i] - date_to_calc <- available_weeks$date[i] - - # Find all tiles for this week/year combination - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_to_calc, year_to_calc) - tile_files <- list.files(mosaic_scan_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) == 0) { - message(paste(" ✗ Week", sprintf("%02d", week_to_calc), year_to_calc, "- no tiles found")) - next - } - - message(paste(" ✓ Week", sprintf("%02d", week_to_calc), year_to_calc, "-", length(tile_files), "mosaics")) - - tryCatch({ - # Calculate stats for this week/year - week_stats <- load_or_calculate_weekly_stats( - week_num = week_to_calc, - year = year_to_calc, - project_dir = project_dir, - field_boundaries_sf = field_boundaries_sf, - mosaic_dir = mosaic_scan_dir, - reports_dir = reports_dir, - report_date = date_to_calc # Use actual date for this week - ) - - if (!is.null(week_stats) && nrow(week_stats) > 0) { - message(paste(" ✓ Calculated stats for", nrow(week_stats), "fields")) - - # Add to historical data (use unique key: week_year combo) - historical_data[[length(historical_data) + 1]] <- list( - week = week_to_calc, - year = year_to_calc, - data = week_stats - ) - loaded_weeks <- c(loaded_weeks, paste0(week_to_calc, "_", year_to_calc)) - } - }, error = function(e) { - message(paste(" ✗ Error:", e$message)) - }) - } - } - - if (length(historical_data) == 0) { - message(paste("Error: No historical field data found and could not auto-generate weeks")) - return(NULL) - } - - message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", - paste(loaded_weeks, collapse = ", "))) - - return(historical_data) -} - -USE_UNIFORM_AGE <- TRUE -UNIFORM_PLANTING_DATE <- as.Date("2026-01-01") - -extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { - if (USE_UNIFORM_AGE) { - message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) - # Return a data frame with all field IDs mapped to uniform planting date - if (!is.null(field_boundaries_sf)) { - return(data.frame( - field_id = field_boundaries_sf$field, - date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)), - stringsAsFactors = FALSE - )) - } else { - # Fallback if field_boundaries_sf not provided - return(NULL) - } - } - - if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { - message("Warning: No harvesting data available.") - return(NULL) - } - - tryCatch({ - planting_dates <- harvesting_data %>% - arrange(field, desc(season_start)) %>% - distinct(field, .keep_all = TRUE) %>% - select(field, season_start) %>% - rename(field_id = field, planting_date = season_start) %>% - filter(!is.na(planting_date)) %>% - as.data.frame() - - message(paste("Extracted planting dates for", nrow(planting_dates), "fields")) - return(planting_dates) - }, error = function(e) { - message(paste("Error extracting planting dates:", e$message)) - return(NULL) - }) -} - -# ============================================================================ -# MODULAR STATISTICS CALCULATION (Reusable for any week) -# ============================================================================ - -calculate_field_statistics <- function(field_boundaries_sf, week_num, year, - mosaic_dir, report_date = Sys.Date()) { - - message(paste("Calculating statistics for all fields - Week", week_num, year)) - - # Debug: Check if constants are available - message(paste(" DEBUG: YOUNG_FIELDS_FOR_TESTING =", paste(YOUNG_FIELDS_FOR_TESTING, collapse=", "))) - message(paste(" DEBUG: YOUNG_FIELD_PLANTING_DATE =", YOUNG_FIELD_PLANTING_DATE)) - - # Build tile file list - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) - tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) == 0) { - stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) - } - - message(paste(" Found", length(tile_files), "tiles for week", week_num)) - - results_list <- list() - fields_processed <- 0 - - # SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile - for (tile_idx in seq_along(tile_files)) { - tile_file <- tile_files[tile_idx] - - tryCatch({ - # Load tile - current_rast <- terra::rast(tile_file) - ci_band <- current_rast[["CI"]] - - if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { - message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) - return(NULL) - } - - # Extract all fields from this tile in ONE call - # terra::extract returns a dataframe with columns: ID, CI - # where each row is one pixel, and ID indicates which polygon it came from - extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) - - # Group by field ID and calculate statistics for each field - # extracted$ID contains the field polygon index (1 to nrow(field_boundaries_sf)) - unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) - - for (field_poly_idx in unique_field_ids) { - # Get all CI values for this field from this tile - field_id <- field_boundaries_sf$field[field_poly_idx] - ci_vals <- extracted$CI[extracted$ID == field_poly_idx] - ci_vals <- ci_vals[!is.na(ci_vals)] - - # Skip if no data for this field in this tile - if (length(ci_vals) == 0) { - next - } - - # Calculate statistics - mean_ci <- mean(ci_vals, na.rm = TRUE) - ci_std <- sd(ci_vals, na.rm = TRUE) - cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ - range_min <- min(ci_vals, na.rm = TRUE) - range_max <- max(ci_vals, na.rm = TRUE) - range_str <- sprintf("%.1f-%.1f", range_min, range_max) - ci_percentiles_str <- get_ci_percentiles(ci_vals) - - # Cloud coverage: count total pixels vs non-NA pixels for this field - field_rows <- extracted[extracted$ID == field_poly_idx, ] - num_total <- nrow(field_rows) - num_data <- sum(!is.na(field_rows$CI)) - pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 - cloud_cat <- if (num_data == 0) "No image available" - else if (pct_clear >= 99.5) "Clear view" - else "Partial coverage" - - # Age and Phase - age_weeks <- NA_real_ - if (USE_UNIFORM_AGE) { - # Check if this field is in the "young fields" list (for testing germination progress) - is_young_field <- field_id %in% YOUNG_FIELDS_FOR_TESTING - if (is_young_field) { - age_weeks <- as.numeric(difftime(report_date, YOUNG_FIELD_PLANTING_DATE, units = "weeks")) - # Debug for first 2 matches - if (field_id %in% c("1", "2")) { - message(paste(" DEBUG: Field", field_id, "is young field, age =", round(age_weeks, 2), "weeks")) - } - } else { - age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) - } - } - phase <- get_phase_by_age(age_weeks) - - # Germination progress (only for young plants, age < 17 weeks) - germination_progress <- NA_character_ - if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) { - pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100 - germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold) - } - - # Store result (check if field already exists from another tile) - existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) - - if (length(existing_idx) > 0) { - # Field already in results from previous tile - keep first occurrence or average - # For now, keep the first one (earlier tiles) - next - } - - # Store new field result - results_list[[length(results_list) + 1]] <- data.frame( - Field_id = field_id, - Mean_CI = round(mean_ci, 2), - CV = round(cv, 4), - CI_range = range_str, - CI_Percentiles = ci_percentiles_str, - Cloud_pct_clear = pct_clear, - Cloud_category = cloud_cat, - Age_week = round(age_weeks, 1), - Phase = phase, - Germination_progress = germination_progress, - stringsAsFactors = FALSE - ) - - fields_processed <- fields_processed + 1 - } - - message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) - - }, error = function(e) { - message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) - }) - } - - if (length(results_list) == 0) { - stop(paste("No fields processed successfully for week", week_num)) - } - - stats_df <- dplyr::bind_rows(results_list) - message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) - - return(stats_df) -} - -# ============================================================================ -# CALCULATE KPI TRENDS (Requires previous week RDS) -# ============================================================================ - -calculate_kpi_trends <- function(current_stats, prev_stats = NULL, - project_dir = NULL, reports_dir = NULL, - current_week = NULL, year = NULL) { - - message("Calculating KPI trends from current and previous week data") - - # Initialize new columns with defaults - current_stats$Weekly_ci_change <- NA_real_ - current_stats$CV_Trend_Short_Term <- NA_real_ - current_stats$Four_week_trend <- NA_real_ - current_stats$CV_Trend_Long_Term <- NA_real_ - current_stats$nmr_weeks_in_this_phase <- 1L - - # If no previous week data, return with defaults - if (is.null(prev_stats) || nrow(prev_stats) == 0) { - message(" No previous week data available - using defaults") - return(current_stats) - } - - message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) - - # Build lookup indices for previous week (by Field_id) - prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) - - # Try to load previous week's field_analysis to get nmr_weeks_in_this_phase history - prev_field_analysis <- NULL - - tryCatch({ - analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") - if (dir.exists(analysis_dir)) { - analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) - if (length(analysis_files) > 0) { - recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] - prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, - col_select = c(Field_id, nmr_weeks_in_this_phase, Phase)) - } - } - }, error = function(e) { - message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) - }) - - if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { - message(paste(" Using previous field_analysis to track nmr_weeks_in_this_phase")) - } - - # ============================================================ - # PHASE 3: Load 4-8 weeks of historical data for trend calculations - # ============================================================ - - historical_4weeks <- list() - historical_8weeks <- list() - - if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { - message(" Loading historical field_stats for 4-week and 8-week trends...") - - # Load up to 4 weeks back for four_week_trend - for (lookback in 1:4) { - target_week <- current_week - lookback - if (target_week < 1) target_week <- target_week + 52 - - rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - tryCatch({ - stats_data <- readRDS(rds_path) - historical_4weeks[[length(historical_4weeks) + 1]] <- list( - week = target_week, - stats = stats_data - ) - }, error = function(e) { - message(paste(" Warning: Could not load week", target_week, ":", e$message)) - }) - } - } - - # Load up to 8 weeks back for cv_trend_long_term - for (lookback in 1:8) { - target_week <- current_week - lookback - if (target_week < 1) target_week <- target_week + 52 - - rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - if (file.exists(rds_path)) { - tryCatch({ - stats_data <- readRDS(rds_path) - historical_8weeks[[length(historical_8weeks) + 1]] <- list( - week = target_week, - stats = stats_data - ) - }, error = function(e) { - # Silently skip - we'll work with whatever weeks exist - }) - } - } - - if (length(historical_4weeks) > 0) { - message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) - } - if (length(historical_8weeks) > 0) { - message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) - } - } - - # For each field in current week, lookup previous values and calculate trends - cv_trends_calculated <- 0 - four_week_trends_calculated <- 0 - cv_long_term_calculated <- 0 - - for (i in seq_len(nrow(current_stats))) { - field_id <- current_stats$Field_id[i] - prev_idx <- prev_lookup[field_id] - - if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { - prev_row <- prev_stats[prev_idx, , drop = FALSE] - - # WEEKLY CI CHANGE - prev_ci <- prev_row$Mean_CI[1] - if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { - current_stats$Weekly_ci_change[i] <- - round(current_stats$Mean_CI[i] - prev_ci, 2) - } - - # CV TREND SHORT TERM (2-week comparison) - prev_cv <- prev_row$CV[1] - if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { - current_stats$CV_Trend_Short_Term[i] <- - calculate_cv_trend(current_stats$CV[i], prev_cv) - cv_trends_calculated <- cv_trends_calculated + 1 - } - - # FOUR-WEEK TREND (if available) - if (length(historical_4weeks) > 0) { - ci_values_4week <- numeric() - - # Add oldest available weeks (reverse order to get oldest first) - for (hist_idx in rev(seq_along(historical_4weeks))) { - hist_data <- historical_4weeks[[hist_idx]]$stats - hist_field <- which(hist_data$Field_id == field_id) - if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { - ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) - } - } - - # Add current week CI - ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) - - if (length(ci_values_4week) >= 2) { - current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) - four_week_trends_calculated <- four_week_trends_calculated + 1 - } - } - - # CV TREND LONG TERM (8-week slope) - if (length(historical_8weeks) > 0) { - cv_values_8week <- numeric() - - # Add oldest available weeks (reverse order to get oldest first) - for (hist_idx in rev(seq_along(historical_8weeks))) { - hist_data <- historical_8weeks[[hist_idx]]$stats - hist_field <- which(hist_data$Field_id == field_id) - if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { - cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) - } - } - - # Add current week CV - cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) - - if (length(cv_values_8week) >= 2) { - slope <- calculate_cv_trend_long_term(cv_values_8week) - current_stats$CV_Trend_Long_Term[i] <- slope - cv_long_term_calculated <- cv_long_term_calculated + 1 - } - } - - # WEEKS IN CURRENT PHASE (track phase transitions) - # Use previous field_analysis if available for proper counter progression - if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { - # Look up this field in previous analysis - prev_analysis_row <- prev_field_analysis %>% - dplyr::filter(Field_id == field_id) - - if (nrow(prev_analysis_row) > 0) { - prev_phase_analysis <- prev_analysis_row$Phase[1] - prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1] - - if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) { - if (current_stats$Phase[i] == prev_phase_analysis) { - # Same phase - increment the counter - current_stats$nmr_weeks_in_this_phase[i] <- - if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L - } else { - # Phase changed - reset to 1 - current_stats$nmr_weeks_in_this_phase[i] <- 1L - } - } - } else if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) { - # Field not in previous analysis, fall back to prev_stats phase comparison - if (current_stats$Phase[i] == prev_row$Phase[1]) { - current_stats$nmr_weeks_in_this_phase[i] <- 2L - } else { - current_stats$nmr_weeks_in_this_phase[i] <- 1L - } - } - } else { - # No previous field_analysis available - use phase from prev_stats - if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) { - if (current_stats$Phase[i] == prev_row$Phase[1]) { - # Same phase - increment counter (start with 2 since prev week was in this phase) - current_stats$nmr_weeks_in_this_phase[i] <- 2L - } else { - # Phase changed - reset to 1 - current_stats$nmr_weeks_in_this_phase[i] <- 1L - } - } - } - } - } - - message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) - message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) - message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) - return(current_stats) -} - -# ============================================================================ -# LOAD OR CALCULATE WEEKLY STATISTICS (RDS Caching) -# ============================================================================ - -load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, - mosaic_dir, reports_dir, report_date = Sys.Date()) { - - # Build RDS file path - rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num) - rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) - - # Try to load existing RDS (fast cache) - if (file.exists(rds_path)) { - message(paste("Loading cached statistics from:", basename(rds_path))) - return(readRDS(rds_path)) - } - - # RDS not found - calculate from tiles - message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) - stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, - mosaic_dir, report_date) - - # Create output directory if needed - output_dir <- file.path(reports_dir, "kpis", "field_stats") - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) - } - - # Export RDS (for fast lookup next week) - saveRDS(stats_df, rds_path) - message(paste("Saved weekly statistics RDS:", basename(rds_path))) - - # Export CSV (for user review) - csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num) - csv_path <- file.path(output_dir, csv_filename) - readr::write_csv(stats_df, csv_path) - message(paste("Saved weekly statistics CSV:", basename(csv_path))) - - return(stats_df) -} - -# ============================================================================ -# PARALLEL FIELD ANALYSIS FUNCTION -# ============================================================================ - -analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, - mosaic_dir, historical_data = NULL, planting_dates = NULL, - report_date = Sys.Date(), harvest_imminence_data = NULL, - harvesting_data = NULL) { - - tryCatch({ - field_id <- field_boundaries_sf$field[field_idx] - farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) { - field_boundaries_sf$sub_area[field_idx] - } else { - NA_character_ - } - field_name <- field_id - - # DEBUG: Print for first few fields - if (field_idx <= 3) { - message(paste("[DEBUG] Field", field_idx, ":", field_id)) - } - - field_sf <- field_boundaries_sf[field_idx, ] - if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) { - return(data.frame( - Field_id = field_id, - error = "Empty or invalid geometry" - )) - } - - field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000 - field_area_acres <- field_area_ha / 0.404686 - - tile_ids <- get_tile_ids_for_field(field_sf, tile_grid, field_id = field_id) - - # DEBUG: Print tile IDs for first field - if (field_idx == 1) { - message(paste("[DEBUG] First field tile_ids:", paste(tile_ids, collapse=","))) - message(paste("[DEBUG] tile_grid nrows:", nrow(tile_grid), "ncols:", ncol(tile_grid))) - message(paste("[DEBUG] mosaic_dir:", mosaic_dir)) - } - - current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir) - - if (is.null(current_ci)) { - return(data.frame( - Field_id = field_id, - error = "No tile data available" - )) - } - - # SINGLE EXTRACTION: Get all pixel values for this field, then calculate all stats from it - field_bbox <- sf::st_bbox(field_sf) - ci_cropped <- terra::crop(current_ci, terra::ext(field_bbox), snap = "out") - - # Extract all pixels in one call (no fun= parameter means we get raw pixel values) - all_extracted <- terra::extract(ci_cropped, field_sf)[, 2] - current_ci_vals <- all_extracted[!is.na(all_extracted)] - - if (length(current_ci_vals) == 0) { - return(data.frame( - Field_id = field_id, - error = "No CI values extracted from tiles" - )) - } - - # Calculate all statistics from the single extraction - mean_ci_current <- mean(current_ci_vals, na.rm = TRUE) - ci_std <- sd(current_ci_vals, na.rm = TRUE) - cv_current <- if (mean_ci_current > 0) ci_std / mean_ci_current else NA_real_ - range_min <- min(current_ci_vals, na.rm = TRUE) - range_max <- max(current_ci_vals, na.rm = TRUE) - range_str <- sprintf("%.1f-%.1f", range_min, range_max) - ci_percentiles_str <- get_ci_percentiles(current_ci_vals) - - # Cloud coverage from extraction metadata - num_total <- length(all_extracted) - num_data <- length(current_ci_vals) - pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 - cloud_cat <- if (num_data == 0) "No image available" - else if (pct_clear >= 99.5) "Clear view" - else "Partial coverage" - cloud_pct <- 100 - pct_clear - cloud_interval <- round_cloud_to_intervals(pct_clear) - - # Weekly change (extract previous week same way - single extraction) - weekly_ci_change <- NA - previous_ci_vals <- NULL - - tryCatch({ - previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir) - if (!is.null(previous_ci)) { - prev_bbox <- sf::st_bbox(field_sf) - prev_ci_cropped <- terra::crop(previous_ci, terra::ext(prev_bbox), snap = "out") - prev_extracted_all <- terra::extract(prev_ci_cropped, field_sf)[, 2] - previous_ci_vals <- prev_extracted_all[!is.na(prev_extracted_all)] - if (length(previous_ci_vals) > 0) { - mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE) - weekly_ci_change <- mean_ci_current - mean_ci_previous - } - } - }, error = function(e) { - # Silent fail - }) - - if (is.na(weekly_ci_change)) { - weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std) - } else { - weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ%.1f)", mean_ci_current, ci_std, weekly_ci_change) - } - - age_weeks <- NA - if (!is.null(planting_dates) && nrow(planting_dates) > 0) { - field_planting <- planting_dates %>% - filter(field_id == !!field_id) %>% - pull(planting_date) - - if (length(field_planting) > 0) { - age_weeks <- as.numeric(difftime(report_date, field_planting[1], units = "weeks")) - } - } - - if (USE_UNIFORM_AGE) { - age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) - } - - pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100 - pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100 - germination_progress_str <- NA_character_ - if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) { - germination_progress_str <- sprintf("%.0f%%", pct_ci_ge_2) - } - - phase <- "Unknown" - imminent_prob_val <- NA - if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) { - imminence_row <- harvest_imminence_data %>% - filter(field_id == !!field_id) - if (nrow(imminence_row) > 0) { - imminent_prob_val <- imminence_row$probability[1] - if (imminent_prob_val > 0.5) { - phase <- "Harvest Imminent (Model)" - } - } - } - - if (phase == "Unknown") { - phase <- get_phase_by_age(age_weeks) - } - - status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks) - - nmr_weeks_in_phase <- 1 - - four_week_trend <- NA_character_ - ci_values_for_trend <- c(mean_ci_current) - - if (!is.null(historical_data) && length(historical_data) > 0) { - for (hist in historical_data) { - hist_week <- hist$week - hist_data <- hist$data - - field_row <- hist_data %>% - filter(Field_id == !!field_id) - - if (nrow(field_row) > 0 && !is.na(field_row$Mean_CI[1])) { - ci_values_for_trend <- c(field_row$Mean_CI[1], ci_values_for_trend) - } - } - - if (length(ci_values_for_trend) >= 2) { - four_week_trend <- categorize_four_week_trend(ci_values_for_trend) - } - } - - cv_trend_short <- NA_real_ - cv_trend_long <- NA_real_ - - if (!is.null(historical_data) && length(historical_data) > 0) { - if (length(historical_data) >= 2) { - cv_2w <- historical_data[[2]]$data %>% - filter(Field_id == !!field_id) %>% - pull(CV) - if (length(cv_2w) > 0 && !is.na(cv_2w[1])) { - cv_trend_short <- calculate_cv_trend(cv_current, cv_2w[1]) - } - } - - if (length(historical_data) >= 8) { - cv_8w <- historical_data[[8]]$data %>% - filter(Field_id == !!field_id) %>% - pull(CV) - if (length(cv_8w) > 0 && !is.na(cv_8w[1])) { - cv_trend_long <- calculate_cv_trend(cv_current, cv_8w[1]) - } - } - } - - last_harvest_date <- NA_character_ - if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) { - last_harvest_row <- harvesting_data %>% - filter(field == !!field_id) %>% - arrange(desc(season_start)) %>% - slice(1) - - if (nrow(last_harvest_row) > 0 && !is.na(last_harvest_row$season_start[1])) { - last_harvest_date <- as.character(last_harvest_row$season_start[1]) - } - } - - result <- data.frame( - Field_id = field_id, - Farm_Section = farm_section, - Field_name = field_name, - Hectare = round(field_area_ha, 2), - Acreage = round(field_area_acres, 2), - Mean_CI = round(mean_ci_current, 2), - Weekly_ci_change = if (is.na(weekly_ci_change)) NA_real_ else round(weekly_ci_change, 2), - Weekly_ci_change_str = weekly_ci_change_str, - Four_week_trend = four_week_trend, - Last_harvest_or_planting_date = last_harvest_date, - Age_week = if (is.na(age_weeks)) NA_integer_ else as.integer(round(age_weeks)), - `Phase (age based)` = phase, - nmr_weeks_in_this_phase = nmr_weeks_in_phase, - Germination_progress = germination_progress_str, - Imminent_prob = imminent_prob_val, - Status_trigger = status_trigger, - CI_range = range_str, - CI_Percentiles = ci_percentiles_str, - CV = round(cv_current, 4), - CV_Trend_Short_Term = cv_trend_short, - CV_Trend_Long_Term = cv_trend_long, - Cloud_pct_clear = pct_clear, - Cloud_pct_clear_interval = cloud_interval, - Cloud_pct = cloud_pct, - Cloud_category = cloud_cat, - stringsAsFactors = FALSE - ) - - return(result) - - }, error = function(e) { - message(paste("Error analyzing field", field_idx, ":", e$message)) - return(data.frame( - Field_id = NA_character_, - error = e$message - )) - }) -} - -# ============================================================================ -# SUMMARY GENERATION -# ============================================================================ - -generate_field_analysis_summary <- function(field_df) { - message("Generating summary statistics...") - - total_acreage <- sum(field_df$Acreage, na.rm = TRUE) - - germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE) - tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE) - grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE) - maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE) - unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE) - - harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) - stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) - recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) - growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) - germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) - germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) - no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) - - clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) - partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) - no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) - total_fields <- nrow(field_df) - - clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) - partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) - no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) - - summary_df <- data.frame( - Category = c( - "--- PHASE DISTRIBUTION ---", - "Germination", - "Tillering", - "Grand Growth", - "Maturation", - "Unknown phase", - "--- STATUS TRIGGERS ---", - "Harvest ready", - "Stress detected", - "Strong recovery", - "Growth on track", - "Germination complete", - "Germination started", - "No trigger", - "--- CLOUD COVERAGE (FIELDS) ---", - "Clear view", - "Partial coverage", - "No image available", - "--- CLOUD COVERAGE (ACREAGE) ---", - "Clear view", - "Partial coverage", - "No image available", - "--- TOTAL ---", - "Total Acreage" - ), - Acreage = c( - NA, - round(germination_acreage, 2), - round(tillering_acreage, 2), - round(grand_growth_acreage, 2), - round(maturation_acreage, 2), - round(unknown_phase_acreage, 2), - NA, - round(harvest_ready_acreage, 2), - round(stress_acreage, 2), - round(recovery_acreage, 2), - round(growth_on_track_acreage, 2), - round(germination_complete_acreage, 2), - round(germination_started_acreage, 2), - round(no_trigger_acreage, 2), - NA, - paste0(clear_fields, " fields"), - paste0(partial_fields, " fields"), - paste0(no_image_fields, " fields"), - NA, - round(clear_acreage, 2), - round(partial_acreage, 2), - round(no_image_acreage, 2), - NA, - round(total_acreage, 2) - ), - stringsAsFactors = FALSE - ) - - return(summary_df) -} - -# ============================================================================ -# EXPORT FUNCTIONS -# ============================================================================ - -export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { - message("Exporting per-field analysis to Excel, CSV, and RDS...") - - # Round all numeric columns to 2 decimals - field_df_rounded <- field_df %>% - mutate(across(where(is.numeric), ~ round(., 2))) - - summary_df_rounded <- summary_df %>% - mutate(across(where(is.numeric), ~ round(., 2))) - - output_subdir <- file.path(reports_dir, "kpis", "field_analysis") - if (!dir.exists(output_subdir)) { - dir.create(output_subdir, recursive = TRUE) - } - - excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx") - excel_path <- file.path(output_subdir, excel_filename) - excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) - - sheets <- list( - "Field Data" = field_df_rounded, - "Summary" = summary_df_rounded - ) - - write_xlsx(sheets, excel_path) - message(paste("✓ Field analysis Excel exported to:", excel_path)) - - kpi_data <- list( - field_analysis = field_df_rounded, - field_analysis_summary = summary_df_rounded, - metadata = list( - current_week = current_week, - project = project_dir, - created_at = Sys.time() - ) - ) - - rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") - rds_path <- file.path(reports_dir, "kpis", rds_filename) - - saveRDS(kpi_data, rds_path) - message(paste("✓ Field analysis RDS exported to:", rds_path)) - - csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") - csv_path <- file.path(output_subdir, csv_filename) - write_csv(field_df_rounded, csv_path) - message(paste("✓ Field analysis CSV exported to:", csv_path)) - - return(list(excel = excel_path, rds = rds_path, csv = csv_path)) -} - -# ============================================================================ -# TILE-BASED KPI EXTRACTION FUNCTION -# ============================================================================ - -calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) { - # Loop through tiles, extract KPI statistics per field per tile - # Follows the same pattern as extract_ci_from_tiles in CI extraction - - message("Calculating field-level KPI statistics from tiles...") - - # Get all tile files for this week - tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) - tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE) - - if (length(tile_files) == 0) { - message("No tiles found for week", week_num, year) - return(NULL) - } - - # Process tiles in parallel using furrr (same as CI extraction) - message(paste("Processing", length(tile_files), "tiles in parallel...")) - - field_kpi_list <- furrr::future_map( - tile_files, - ~ process_single_kpi_tile( - tile_file = ., - field_boundaries_sf = field_boundaries_sf, - tile_grid = tile_grid - ), - .progress = TRUE, - .options = furrr::furrr_options(seed = TRUE) - ) - - # Combine results from all tiles - field_kpi_stats <- dplyr::bind_rows(field_kpi_list) - - if (nrow(field_kpi_stats) == 0) { - message(" No KPI data extracted from tiles") - return(NULL) - } - - message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields")) - return(field_kpi_stats) -} - -# Helper function to process a single tile (like process_single_tile in CI extraction) -process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) { - tryCatch({ - tile_basename <- basename(tile_file) - # Load tile raster - tile_raster <- terra::rast(tile_file) - - # Get first band (CI band for weekly mosaics) - ci_band <- tile_raster[[1]] - - # EXACTLY LIKE SCRIPT 20: Crop to field bounding box first, then extract with sf directly - field_bbox <- sf::st_bbox(field_boundaries_sf) - ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out") - - # Extract CI values for ALL fields at once using sf object directly (NOT terra::vect) - # terra::extract() works with sf objects and handles geometries properly - extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE) - - # Initialize results for this tile - tile_results <- data.frame() - - # Get tile ID from filename - tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename)) - - # Process each field: extracted_vals is a data.frame with ID column (field indices) + extracted values - for (field_idx in seq_len(nrow(field_boundaries_sf))) { - field_id <- field_boundaries_sf$field[field_idx] - - # extracted_vals columns: 1=ID, 2=mean_CI (since we used fun="mean") - mean_ci <- extracted_vals[field_idx, 2] - - # Skip if no data for this field in this tile - if (is.na(mean_ci)) { - next - } - - # For tile-level stats, we only have mean from extraction (no variance without all pixels) - # Add to results - tile_results <- rbind(tile_results, data.frame( - field = field_id, - tile_id = tile_id_match, - tile_file = tile_basename, - mean_ci = round(mean_ci, 4), - stringsAsFactors = FALSE - )) - } - - return(tile_results) - - }, error = function(e) { - message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message)) - return(data.frame()) - }) -} - -calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf, - harvesting_data, cumulative_CI_vals_dir, - weekly_CI_mosaic, reports_dir, current_week, year, - tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") { - message("\n=== CALCULATING FARM-LEVEL KPIs ===") - message("(6 high-level KPI metrics with tile-based extraction)") - - output_dir <- file.path(reports_dir, "kpis") - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE) - } - - # Get mosaic directory with grid size if using tiles - mosaic_dir <- if (use_tile_mosaic && !is.null(tile_grid_size)) { - file.path(weekly_CI_mosaic, tile_grid_size) - } else { - weekly_CI_mosaic - } - - # Extract field-level KPI statistics from tiles - field_kpi_stats <- calculate_field_kpis_from_tiles( - tile_dir = mosaic_dir, - week_num = current_week, - year = year, - field_boundaries_sf = field_boundaries_sf, - tile_grid = tile_grid - ) - - if (is.null(field_kpi_stats) || nrow(field_kpi_stats) == 0) { - message("Warning: No field KPI statistics extracted from tiles") - return(NULL) - } - - # Aggregate tile-based statistics by field (average across tiles for each field) - field_summary_stats <- field_kpi_stats %>% - dplyr::group_by(field) %>% - dplyr::summarise( - mean_ci = mean(mean_ci, na.rm = TRUE), - cv_ci = mean(cv_ci, na.rm = TRUE), - min_ci = min(min_ci, na.rm = TRUE), - max_ci = max(max_ci, na.rm = TRUE), - total_pixels = sum(n_pixels, na.rm = TRUE), - num_tiles = n_distinct(tile_id), - .groups = 'drop' - ) - - # Create results list - kpi_results <- list( - field_kpi_stats = field_kpi_stats, - field_summary_stats = field_summary_stats, - metadata = list( - report_date = report_date, - current_week = current_week, - year = year, - calculation_method = "tile_based_extraction", - num_fields_processed = length(unique(field_kpi_stats$field)), - num_tiles_processed = length(unique(field_kpi_stats$tile_id)) - ) - ) - - # Save results - rds_filename <- paste0(project_dir, "_farm_kpi_stats_week", sprintf("%02d", current_week), ".rds") - rds_path <- file.path(output_dir, rds_filename) - saveRDS(kpi_results, rds_path) - message(paste("✓ Farm-level KPI stats exported to:", rds_path)) - - # Print summary - cat("\n=== FARM-LEVEL KPI SUMMARY ===\n") - cat("Report Date:", as.character(report_date), "\n") - cat("Week:", current_week, "Year:", year, "\n") - cat("Fields Processed:", length(unique(field_kpi_stats$field)), "\n") - cat("Tiles Processed:", length(unique(field_kpi_stats$tile_id)), "\n") - cat("\n--- Field Summary Statistics (Mean across tiles) ---\n") - print(head(field_summary_stats, 20)) - - return(kpi_results) -} - -# ============================================================================ -# HELPER: Extract field-level statistics from CI raster (all pixels, single call) -# ============================================================================ - -extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { - #' Extract CI statistics for all fields from a single CI raster band - #' - #' This function extracts all pixel values for each field in one terra::extract call, - #' then calculates mean, CV, and percentiles from those pixels. - #' - #' @param ci_band Single CI band from terra raster - #' @param field_boundaries_sf SF object with field geometries - #' @return Data frame with columns: field_idx, mean_ci, cv, p10, p90, min_ci, max_ci, pixel_count_valid, pixel_count_total - - # SINGLE EXTRACTION: Get all pixels for all fields at once (no aggregation function) - # Result: data.frame with ID column (field indices) and value column (pixel values) - extract_result <- terra::extract(ci_band, field_boundaries_sf) - - # Calculate statistics for each field - stats_list <- list() - - for (field_idx in seq_len(nrow(field_boundaries_sf))) { - # Get all pixels for this field from the single extraction - # extract_result has columns [ID, value] where ID is field index (1-based) - field_pixels <- extract_result[extract_result$ID == field_idx, 2] - pixels <- as.numeric(field_pixels[!is.na(field_pixels)]) # Remove NAs - - if (length(pixels) == 0) { - # No data for this field - stats_list[[field_idx]] <- data.frame( - field_idx = field_idx, - mean_ci = NA_real_, - cv = NA_real_, - p10 = NA_real_, - p90 = NA_real_, - min_ci = NA_real_, - max_ci = NA_real_, - pixel_count_valid = 0, - pixel_count_total = 0, - stringsAsFactors = FALSE - ) - next - } - - # Calculate all statistics from pixels array - mean_val <- mean(pixels, na.rm = TRUE) - cv_val <- if (mean_val > 0) sd(pixels, na.rm = TRUE) / mean_val else NA_real_ - p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]] - p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]] - min_val <- min(pixels, na.rm = TRUE) - max_val <- max(pixels, na.rm = TRUE) - - stats_list[[field_idx]] <- data.frame( - field_idx = field_idx, - mean_ci = mean_val, - cv = cv_val, - p10 = p10_val, - p90 = p90_val, - min_ci = min_val, - max_ci = max_val, - pixel_count_valid = length(pixels), - pixel_count_total = nrow(extract_result[extract_result$ID == field_idx, ]), - stringsAsFactors = FALSE - ) - } - - return(dplyr::bind_rows(stats_list)) -} - # ============================================================================ # MAIN # ============================================================================ diff --git a/r_app/80_report_building_utils.R b/r_app/80_report_building_utils.R new file mode 100644 index 0000000..35f20a7 --- /dev/null +++ b/r_app/80_report_building_utils.R @@ -0,0 +1,249 @@ +# 80_REPORT_BUILDING_UTILS.R +# ============================================================================ +# UTILITY FUNCTIONS FOR REPORT GENERATION AND EXCEL/CSV EXPORT +# +# This file contains reusable functions for: +# - Field analysis summary generation +# - Excel/CSV/RDS export functionality +# - Farm-level KPI aggregation and summary +# - Tile-based KPI extraction (alternative calculation method) +# +# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts +# ============================================================================ + +# ============================================================================ +# SUMMARY GENERATION +# ============================================================================ + +generate_field_analysis_summary <- function(field_df) { + message("Generating summary statistics...") + + total_acreage <- sum(field_df$Acreage, na.rm = TRUE) + + germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE) + tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE) + grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE) + maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE) + unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE) + + harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) + stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) + recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) + growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) + germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) + germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) + no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) + + clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) + partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) + no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) + total_fields <- nrow(field_df) + + clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) + partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) + no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) + + summary_df <- data.frame( + Category = c( + "--- PHASE DISTRIBUTION ---", + "Germination", + "Tillering", + "Grand Growth", + "Maturation", + "Unknown phase", + "--- STATUS TRIGGERS ---", + "Harvest ready", + "Stress detected", + "Strong recovery", + "Growth on track", + "Germination complete", + "Germination started", + "No trigger", + "--- CLOUD COVERAGE (FIELDS) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- CLOUD COVERAGE (ACREAGE) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- TOTAL ---", + "Total Acreage" + ), + Acreage = c( + NA, + round(germination_acreage, 2), + round(tillering_acreage, 2), + round(grand_growth_acreage, 2), + round(maturation_acreage, 2), + round(unknown_phase_acreage, 2), + NA, + round(harvest_ready_acreage, 2), + round(stress_acreage, 2), + round(recovery_acreage, 2), + round(growth_on_track_acreage, 2), + round(germination_complete_acreage, 2), + round(germination_started_acreage, 2), + round(no_trigger_acreage, 2), + NA, + paste0(clear_fields, " fields"), + paste0(partial_fields, " fields"), + paste0(no_image_fields, " fields"), + NA, + round(clear_acreage, 2), + round(partial_acreage, 2), + round(no_image_acreage, 2), + NA, + round(total_acreage, 2) + ), + stringsAsFactors = FALSE + ) + + return(summary_df) +} + +# ============================================================================ +# EXPORT FUNCTIONS +# ============================================================================ + +export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { + message("Exporting per-field analysis to Excel, CSV, and RDS...") + + field_df_rounded <- field_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + + summary_df_rounded <- summary_df %>% + mutate(across(where(is.numeric), ~ round(., 2))) + + output_subdir <- file.path(reports_dir, "kpis", "field_analysis") + if (!dir.exists(output_subdir)) { + dir.create(output_subdir, recursive = TRUE) + } + + excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx") + excel_path <- file.path(output_subdir, excel_filename) + excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) + + sheets <- list( + "Field Data" = field_df_rounded, + "Summary" = summary_df_rounded + ) + + write_xlsx(sheets, excel_path) + message(paste("✓ Field analysis Excel exported to:", excel_path)) + + kpi_data <- list( + field_analysis = field_df_rounded, + field_analysis_summary = summary_df_rounded, + metadata = list( + current_week = current_week, + project = project_dir, + created_at = Sys.time() + ) + ) + + rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") + rds_path <- file.path(reports_dir, "kpis", rds_filename) + + saveRDS(kpi_data, rds_path) + message(paste("✓ Field analysis RDS exported to:", rds_path)) + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") + csv_path <- file.path(output_subdir, csv_filename) + write_csv(field_df_rounded, csv_path) + message(paste("✓ Field analysis CSV exported to:", csv_path)) + + return(list(excel = excel_path, rds = rds_path, csv = csv_path)) +} + +# ============================================================================ +# TILE-BASED KPI EXTRACTION (Alternative calculation method) +# ============================================================================ + +# [COMMENTED OUT / UNUSED - kept for reference] +# These functions provide tile-based extraction as an alternative to field_statistics approach +# Currently replaced by calculate_field_statistics() in 80_weekly_stats_utils.R +# Uncomment if parallel processing of tiles is needed in future + +# calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) { +# message("Calculating field-level KPI statistics from tiles...") +# +# tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) +# tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE) +# +# if (length(tile_files) == 0) { +# message("No tiles found for week", week_num, year) +# return(NULL) +# } +# +# message(paste("Processing", length(tile_files), "tiles in parallel...")) +# +# field_kpi_list <- furrr::future_map( +# tile_files, +# ~ process_single_kpi_tile( +# tile_file = ., +# field_boundaries_sf = field_boundaries_sf, +# tile_grid = tile_grid +# ), +# .progress = TRUE, +# .options = furrr::furrr_options(seed = TRUE) +# ) +# +# field_kpi_stats <- dplyr::bind_rows(field_kpi_list) +# +# if (nrow(field_kpi_stats) == 0) { +# message(" No KPI data extracted from tiles") +# return(NULL) +# } +# +# message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields")) +# return(field_kpi_stats) +# } + +# process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) { +# # Helper function for calculate_field_kpis_from_tiles +# tryCatch({ +# tile_basename <- basename(tile_file) +# tile_raster <- terra::rast(tile_file) +# ci_band <- tile_raster[[1]] +# +# field_bbox <- sf::st_bbox(field_boundaries_sf) +# ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out") +# +# extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE) +# +# tile_results <- data.frame() +# tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename)) +# +# for (field_idx in seq_len(nrow(field_boundaries_sf))) { +# field_id <- field_boundaries_sf$field[field_idx] +# mean_ci <- extracted_vals[field_idx, 2] +# +# if (is.na(mean_ci)) { +# next +# } +# +# tile_results <- rbind(tile_results, data.frame( +# field = field_id, +# tile_id = tile_id_match, +# tile_file = tile_basename, +# mean_ci = round(mean_ci, 4), +# stringsAsFactors = FALSE +# )) +# } +# +# return(tile_results) +# +# }, error = function(e) { +# message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message)) +# return(data.frame()) +# }) +# } + +# calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf, +# harvesting_data, cumulative_CI_vals_dir, +# weekly_CI_mosaic, reports_dir, current_week, year, +# tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") { +# # Farm-level KPI calculation using tile-based extraction (alternative approach) +# # [Implementation kept as reference for alternative calculation method] +# } diff --git a/r_app/80_weekly_stats_utils.R b/r_app/80_weekly_stats_utils.R new file mode 100644 index 0000000..3a5f1d2 --- /dev/null +++ b/r_app/80_weekly_stats_utils.R @@ -0,0 +1,953 @@ +# 80_WEEKLY_STATS_UTILS.R +# ============================================================================ +# UTILITY FUNCTIONS FOR WEEKLY STATISTICS CALCULATION +# +# This file contains reusable functions for: +# - Tile grid management +# - Tile loading and merging +# - Field-level statistics calculation from CI rasters +# - Weekly stats caching (RDS/CSV export/import) +# - KPI trend calculations +# - Historical data loading and auto-generation from mosaics +# +# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts +# ============================================================================ + +# ============================================================================ +# TILE-AWARE HELPER FUNCTIONS +# ============================================================================ + +get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) { + if (inherits(field_geom, "sf")) { + field_bbox <- sf::st_bbox(field_geom) + field_xmin <- field_bbox["xmin"] + field_xmax <- field_bbox["xmax"] + field_ymin <- field_bbox["ymin"] + field_ymax <- field_bbox["ymax"] + } else if (inherits(field_geom, "SpatVector")) { + field_bbox <- terra::ext(field_geom) + field_xmin <- field_bbox$xmin + field_xmax <- field_bbox$xmax + field_ymin <- field_bbox$ymin + field_ymax <- field_bbox$ymax + } else { + stop("field_geom must be sf or terra::vect object") + } + + intersecting_tiles <- tile_grid$id[ + !(tile_grid$xmax < field_xmin | + tile_grid$xmin > field_xmax | + tile_grid$ymax < field_ymin | + tile_grid$ymin > field_ymax) + ] + + return(as.numeric(intersecting_tiles)) +} + +load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { + if (length(tile_ids) == 0) { + return(NULL) + } + + tiles_list <- list() + for (tile_id in sort(tile_ids)) { + tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id) + tile_path <- file.path(mosaic_dir, tile_filename) + + if (file.exists(tile_path)) { + tryCatch({ + tile_rast <- terra::rast(tile_path) + ci_band <- terra::subset(tile_rast, 5) + tiles_list[[length(tiles_list) + 1]] <- ci_band + }, error = function(e) { + message(paste(" Warning: Could not load tile", tile_id, ":", e$message)) + }) + } + } + + if (length(tiles_list) == 0) { + return(NULL) + } + + if (length(tiles_list) == 1) { + return(tiles_list[[1]]) + } else { + tryCatch({ + rsrc <- terra::sprc(tiles_list) + merged <- terra::mosaic(rsrc, fun = "max") + return(merged) + }, error = function(e) { + message(paste(" Warning: Could not merge tiles:", e$message)) + return(tiles_list[[1]]) + }) + } +} + +build_tile_grid <- function(mosaic_dir, week_num, year) { + # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/) + detected_grid_size <- NA + if (dir.exists(mosaic_dir)) { + subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE) + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + + if (length(grid_patterns) > 0) { + detected_grid_size <- grid_patterns[1] + mosaic_dir <- file.path(mosaic_dir, detected_grid_size) + message(paste(" Using grid-size subdirectory:", detected_grid_size)) + } + } + + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) + } + + tile_grid <- data.frame( + id = integer(), + xmin = numeric(), + xmax = numeric(), + ymin = numeric(), + ymax = numeric(), + stringsAsFactors = FALSE + ) + + for (tile_file in tile_files) { + tryCatch({ + matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file))) + if (length(matches) > 0) { + tile_id <- as.integer(sub("_|\\.tif", "", matches[1])) + tile_rast <- terra::rast(tile_file) + tile_ext <- terra::ext(tile_rast) + tile_grid <- rbind(tile_grid, data.frame( + id = tile_id, + xmin = tile_ext$xmin, + xmax = tile_ext$xmax, + ymin = tile_ext$ymin, + ymax = tile_ext$ymax, + stringsAsFactors = FALSE + )) + } + }, error = function(e) { + message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message)) + }) + } + + if (nrow(tile_grid) == 0) { + stop("Could not extract extents from any tile files") + } + + return(list( + tile_grid = tile_grid, + mosaic_dir = mosaic_dir, + grid_size = detected_grid_size + )) +} + +# ============================================================================ +# STATISTICAL CATEGORIZATION FUNCTIONS +# ============================================================================ + +categorize_four_week_trend <- function(ci_values_list) { + if (is.null(ci_values_list) || length(ci_values_list) < 2) { + return(NA_character_) + } + + ci_values_list <- ci_values_list[!is.na(ci_values_list)] + if (length(ci_values_list) < 2) { + return(NA_character_) + } + + weekly_changes <- diff(ci_values_list) + avg_weekly_change <- mean(weekly_changes, na.rm = TRUE) + + if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) { + return("strong growth") + } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN && + avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) { + return("growth") + } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) { + return("no growth") + } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN && + avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("decline") + } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("strong decline") + } else { + return("no growth") + } +} + +round_cloud_to_intervals <- function(cloud_pct_clear) { + if (is.na(cloud_pct_clear)) { + return(NA_character_) + } + + if (cloud_pct_clear < 50) return("<50%") + if (cloud_pct_clear < 60) return("50-60%") + if (cloud_pct_clear < 70) return("60-70%") + if (cloud_pct_clear < 80) return("70-80%") + if (cloud_pct_clear < 90) return("80-90%") + return(">90%") +} + +get_ci_percentiles <- function(ci_values) { + if (is.null(ci_values) || length(ci_values) == 0) { + return(NA_character_) + } + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) { + return(NA_character_) + } + + p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE) + p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE) + + return(sprintf("%.1f-%.1f", p10, p90)) +} + +calculate_cv_trend <- function(cv_current, cv_previous) { + if (is.na(cv_current) || is.na(cv_previous)) { + return(NA_real_) + } + return(round(cv_current - cv_previous, 4)) +} + +calculate_four_week_trend <- function(mean_ci_values) { + #' Calculate four-week CI trend from available weeks + #' Uses whatever weeks are available (1-4 weeks) to estimate trend + + if (is.null(mean_ci_values) || length(mean_ci_values) == 0) { + return(NA_real_) + } + + ci_clean <- mean_ci_values[!is.na(mean_ci_values)] + + if (length(ci_clean) < 2) { + return(NA_real_) + } + + trend <- ci_clean[length(ci_clean)] - ci_clean[1] + return(round(trend, 2)) +} + +categorize_cv_slope <- function(slope) { + #' Categorize CV slope (8-week regression) into field uniformity interpretation + + if (is.na(slope)) { + return(NA_character_) + } + + if (slope <= CV_SLOPE_IMPROVEMENT_MIN) { + return("Excellent uniformity") + } else if (slope < CV_SLOPE_HOMOGENOUS_MIN) { + return("Homogenous growth") + } else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) { + return("Homogenous growth") + } else if (slope <= CV_SLOPE_PATCHINESS_MAX) { + return("Minor patchiness") + } else { + return("Severe fragmentation") + } +} + +calculate_cv_trend_long_term <- function(cv_values) { + #' Calculate 8-week CV trend via linear regression slope + + if (is.null(cv_values) || length(cv_values) == 0) { + return(NA_real_) + } + + cv_clean <- cv_values[!is.na(cv_values)] + + if (length(cv_clean) < 2) { + return(NA_real_) + } + + weeks <- seq_along(cv_clean) + + tryCatch({ + lm_fit <- lm(cv_clean ~ weeks) + slope <- coef(lm_fit)["weeks"] + return(round(as.numeric(slope), 4)) + }, error = function(e) { + return(NA_real_) + }) +} + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +get_phase_by_age <- function(age_weeks) { + if (is.na(age_weeks)) return(NA_character_) + for (i in seq_len(nrow(PHASE_DEFINITIONS))) { + if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && + age_weeks <= PHASE_DEFINITIONS$age_end[i]) { + return(PHASE_DEFINITIONS$phase[i]) + } + } + return("Unknown") +} + +get_status_trigger <- function(ci_values, ci_change, age_weeks) { + if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) return(NA_character_) + + pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 + pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 + ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 + mean_ci <- mean(ci_values, na.rm = TRUE) + + if (age_weeks >= 0 && age_weeks <= 6) { + if (pct_at_or_above_2 >= 70) { + return("germination_complete") + } else if (pct_above_2 > 10) { + return("germination_started") + } + } + + if (age_weeks >= 45) { + return("harvest_ready") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { + return("stress_detected_whole_field") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { + return("strong_recovery") + } + + if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { + return("growth_on_track") + } + + if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { + return("maturation_progressing") + } + + return(NA_character_) +} + +extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) { + if (USE_UNIFORM_AGE) { + message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) + if (!is.null(field_boundaries_sf)) { + return(data.frame( + field_id = field_boundaries_sf$field, + date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)), + stringsAsFactors = FALSE + )) + } else { + return(NULL) + } + } + + if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { + message("Warning: No harvesting data available.") + return(NULL) + } + + tryCatch({ + planting_dates <- harvesting_data %>% + arrange(field, desc(season_start)) %>% + distinct(field, .keep_all = TRUE) %>% + select(field, season_start) %>% + rename(field_id = field, planting_date = season_start) %>% + filter(!is.na(planting_date)) %>% + as.data.frame() + + message(paste("Extracted planting dates for", nrow(planting_dates), "fields")) + return(planting_dates) + }, error = function(e) { + message(paste("Error extracting planting dates:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# MODULAR STATISTICS CALCULATION +# ============================================================================ + +calculate_field_statistics <- function(field_boundaries_sf, week_num, year, + mosaic_dir, report_date = Sys.Date()) { + + message(paste("Calculating statistics for all fields - Week", week_num, year)) + + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) + } + + message(paste(" Found", length(tile_files), "tiles for week", week_num)) + + results_list <- list() + fields_processed <- 0 + + for (tile_idx in seq_along(tile_files)) { + tile_file <- tile_files[tile_idx] + + tryCatch({ + current_rast <- terra::rast(tile_file) + ci_band <- current_rast[["CI"]] + + if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) { + message(paste(" [SKIP] Tile", basename(tile_file), "- CI band not found")) + return(NULL) + } + + extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE) + unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)]) + + for (field_poly_idx in unique_field_ids) { + field_id <- field_boundaries_sf$field[field_poly_idx] + ci_vals <- extracted$CI[extracted$ID == field_poly_idx] + ci_vals <- ci_vals[!is.na(ci_vals)] + + if (length(ci_vals) == 0) { + next + } + + mean_ci <- mean(ci_vals, na.rm = TRUE) + ci_std <- sd(ci_vals, na.rm = TRUE) + cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_ + range_min <- min(ci_vals, na.rm = TRUE) + range_max <- max(ci_vals, na.rm = TRUE) + range_str <- sprintf("%.1f-%.1f", range_min, range_max) + ci_percentiles_str <- get_ci_percentiles(ci_vals) + + field_rows <- extracted[extracted$ID == field_poly_idx, ] + num_total <- nrow(field_rows) + num_data <- sum(!is.na(field_rows$CI)) + pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 + cloud_cat <- if (num_data == 0) "No image available" + else if (pct_clear >= 99.5) "Clear view" + else "Partial coverage" + + age_weeks <- NA_real_ + if (USE_UNIFORM_AGE) { + age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) + } + phase <- get_phase_by_age(age_weeks) + + germination_progress <- NA_character_ + if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks < 17) { + pct_ci_ge_threshold <- sum(ci_vals >= GERMINATION_CI_THRESHOLD) / length(ci_vals) * 100 + germination_progress <- sprintf("%.1f%%", pct_ci_ge_threshold) + } + + existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id) + + if (length(existing_idx) > 0) { + next + } + + results_list[[length(results_list) + 1]] <- data.frame( + Field_id = field_id, + Mean_CI = round(mean_ci, 2), + CV = round(cv, 4), + CI_range = range_str, + CI_Percentiles = ci_percentiles_str, + Cloud_pct_clear = pct_clear, + Cloud_category = cloud_cat, + Age_week = round(age_weeks, 1), + Phase = phase, + Germination_progress = germination_progress, + stringsAsFactors = FALSE + ) + + fields_processed <- fields_processed + 1 + } + + message(paste(" Tile", tile_idx, "of", length(tile_files), "processed")) + + }, error = function(e) { + message(paste(" [ERROR] Tile", basename(tile_file), ":", e$message)) + }) + } + + if (length(results_list) == 0) { + stop(paste("No fields processed successfully for week", week_num)) + } + + stats_df <- dplyr::bind_rows(results_list) + message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields")) + + return(stats_df) +} + +# ============================================================================ +# CALCULATE KPI TRENDS +# ============================================================================ + +calculate_kpi_trends <- function(current_stats, prev_stats = NULL, + project_dir = NULL, reports_dir = NULL, + current_week = NULL, year = NULL) { + + message("Calculating KPI trends from current and previous week data") + + current_stats$Weekly_ci_change <- NA_real_ + current_stats$CV_Trend_Short_Term <- NA_real_ + current_stats$Four_week_trend <- NA_real_ + current_stats$CV_Trend_Long_Term <- NA_real_ + current_stats$nmr_weeks_in_this_phase <- 1L + + if (is.null(prev_stats) || nrow(prev_stats) == 0) { + message(" No previous week data available - using defaults") + return(current_stats) + } + + message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns")) + + prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id) + + prev_field_analysis <- NULL + + tryCatch({ + analysis_dir <- file.path(reports_dir, "kpis", "field_analysis") + if (dir.exists(analysis_dir)) { + analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE) + if (length(analysis_files) > 0) { + recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)] + prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE, + col_select = c(Field_id, nmr_weeks_in_this_phase, Phase)) + } + } + }, error = function(e) { + message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message)) + }) + + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + message(paste(" Using previous field_analysis to track nmr_weeks_in_this_phase")) + } + + historical_4weeks <- list() + historical_8weeks <- list() + + if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) { + message(" Loading historical field_stats for 4-week and 8-week trends...") + + for (lookback in 1:4) { + target_week <- current_week - lookback + if (target_week < 1) target_week <- target_week + 52 + + rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) + rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) + + if (file.exists(rds_path)) { + tryCatch({ + stats_data <- readRDS(rds_path) + historical_4weeks[[length(historical_4weeks) + 1]] <- list( + week = target_week, + stats = stats_data + ) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, ":", e$message)) + }) + } + } + + for (lookback in 1:8) { + target_week <- current_week - lookback + if (target_week < 1) target_week <- target_week + 52 + + rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, target_week) + rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) + + if (file.exists(rds_path)) { + tryCatch({ + stats_data <- readRDS(rds_path) + historical_8weeks[[length(historical_8weeks) + 1]] <- list( + week = target_week, + stats = stats_data + ) + }, error = function(e) { + # Silently skip + }) + } + } + + if (length(historical_4weeks) > 0) { + message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend")) + } + if (length(historical_8weeks) > 0) { + message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend")) + } + } + + cv_trends_calculated <- 0 + four_week_trends_calculated <- 0 + cv_long_term_calculated <- 0 + + for (i in seq_len(nrow(current_stats))) { + field_id <- current_stats$Field_id[i] + prev_idx <- prev_lookup[field_id] + + if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) { + prev_row <- prev_stats[prev_idx, , drop = FALSE] + + prev_ci <- prev_row$Mean_CI[1] + if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) { + current_stats$Weekly_ci_change[i] <- + round(current_stats$Mean_CI[i] - prev_ci, 2) + } + + prev_cv <- prev_row$CV[1] + if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) { + current_stats$CV_Trend_Short_Term[i] <- + calculate_cv_trend(current_stats$CV[i], prev_cv) + cv_trends_calculated <- cv_trends_calculated + 1 + } + + if (length(historical_4weeks) > 0) { + ci_values_4week <- numeric() + + for (hist_idx in rev(seq_along(historical_4weeks))) { + hist_data <- historical_4weeks[[hist_idx]]$stats + hist_field <- which(hist_data$Field_id == field_id) + if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) { + ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]]) + } + } + + ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) + + if (length(ci_values_4week) >= 2) { + current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) + four_week_trends_calculated <- four_week_trends_calculated + 1 + } + } + + if (length(historical_8weeks) > 0) { + cv_values_8week <- numeric() + + for (hist_idx in rev(seq_along(historical_8weeks))) { + hist_data <- historical_8weeks[[hist_idx]]$stats + hist_field <- which(hist_data$Field_id == field_id) + if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) { + cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]]) + } + } + + cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) + + if (length(cv_values_8week) >= 2) { + slope <- calculate_cv_trend_long_term(cv_values_8week) + current_stats$CV_Trend_Long_Term[i] <- slope + cv_long_term_calculated <- cv_long_term_calculated + 1 + } + } + + if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) { + prev_analysis_row <- prev_field_analysis %>% + dplyr::filter(Field_id == field_id) + + if (nrow(prev_analysis_row) > 0) { + prev_phase_analysis <- prev_analysis_row$Phase[1] + prev_nmr_weeks_analysis <- prev_analysis_row$nmr_weeks_in_this_phase[1] + + if (!is.na(current_stats$Phase[i]) && !is.na(prev_phase_analysis)) { + if (current_stats$Phase[i] == prev_phase_analysis) { + current_stats$nmr_weeks_in_this_phase[i] <- + if (!is.na(prev_nmr_weeks_analysis)) prev_nmr_weeks_analysis + 1L else 2L + } else { + current_stats$nmr_weeks_in_this_phase[i] <- 1L + } + } + } else if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) { + if (current_stats$Phase[i] == prev_row$Phase[1]) { + current_stats$nmr_weeks_in_this_phase[i] <- 2L + } else { + current_stats$nmr_weeks_in_this_phase[i] <- 1L + } + } + } else { + if (!is.na(current_stats$Phase[i]) && !is.na(prev_row$Phase[1])) { + if (current_stats$Phase[i] == prev_row$Phase[1]) { + current_stats$nmr_weeks_in_this_phase[i] <- 2L + } else { + current_stats$nmr_weeks_in_this_phase[i] <- 1L + } + } + } + } + } + + message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields")) + message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields")) + message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields")) + return(current_stats) +} + +# ============================================================================ +# LOAD OR CALCULATE WEEKLY STATISTICS +# ============================================================================ + +load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf, + mosaic_dir, reports_dir, report_date = Sys.Date()) { + + rds_filename <- sprintf("%s_field_stats_week%02d.rds", project_dir, week_num) + rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename) + + if (file.exists(rds_path)) { + message(paste("Loading cached statistics from:", basename(rds_path))) + return(readRDS(rds_path)) + } + + message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num)) + stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, + mosaic_dir, report_date) + + output_dir <- file.path(reports_dir, "kpis", "field_stats") + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + } + + saveRDS(stats_df, rds_path) + message(paste("Saved weekly statistics RDS:", basename(rds_path))) + + csv_filename <- sprintf("%s_field_stats_week%02d.csv", project_dir, week_num) + csv_path <- file.path(output_dir, csv_filename) + readr::write_csv(stats_df, csv_path) + message(paste("Saved weekly statistics CSV:", basename(csv_path))) + + return(stats_df) +} + +load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) { + historical_data <- list() + loaded_weeks <- c() + missing_weeks <- c() + + for (lookback in 0:(num_weeks - 1)) { + target_week <- current_week - lookback + if (target_week < 1) target_week <- target_week + 52 + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv") + csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) + + if (file.exists(csv_path)) { + tryCatch({ + data <- read_csv(csv_path, show_col_types = FALSE) + historical_data[[lookback + 1]] <- list( + week = target_week, + data = data + ) + loaded_weeks <- c(loaded_weeks, target_week) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, ":", e$message)) + missing_weeks <<- c(missing_weeks, target_week) + }) + } else { + missing_weeks <- c(missing_weeks, target_week) + } + } + + if (length(missing_weeks) > 0 && auto_generate) { + message(paste("⚠ Missing weeks:", paste(missing_weeks, collapse = ", "))) + message("Scanning for ALL available weekly mosaics and calculating stats...\n") + + if (is.null(field_boundaries_sf)) { + message(" Error: field_boundaries_sf not provided - cannot auto-generate") + return(historical_data) + } + + if (!exists("weekly_tile_max")) { + message(" ✗ weekly_tile_max path not defined") + return(historical_data) + } + + check_paths <- c(file.path(weekly_tile_max, "5x5"), weekly_tile_max) + mosaic_scan_dir <- NA + + for (check_path in check_paths) { + if (dir.exists(check_path)) { + tif_files <- list.files(check_path, pattern = "week_.*\\.tif$", full.names = TRUE) + if (length(tif_files) > 0) { + mosaic_scan_dir <- check_path + break + } + } + } + + if (is.na(mosaic_scan_dir)) { + message(" ✗ No mosaic files found in weekly_tile_max") + return(historical_data) + } + + weeks_to_load <- 8 + today <- Sys.Date() + target_dates <- today - (0:(weeks_to_load - 1)) * 7 + + expected_weeks <- data.frame( + date = target_dates, + week = as.numeric(format(target_dates, "%V")), + year = as.numeric(format(target_dates, "%Y")), + stringsAsFactors = FALSE + ) + expected_weeks <- unique(expected_weeks) + + message(paste(" Expected weeks (last 8 from", format(today, "%Y-%m-%d"), "):")) + for (i in seq_len(nrow(expected_weeks))) { + message(paste(" Week", sprintf("%02d", expected_weeks$week[i]), expected_weeks$year[i])) + } + message("") + + tif_files <- list.files(mosaic_scan_dir, pattern = "week_([0-9]{2})_([0-9]{4})_[0-9]{2}\\.tif$", + full.names = FALSE) + + available_weeks <- data.frame() + for (filename in tif_files) { + matches <- regmatches(filename, gregexpr("week_([0-9]{2})_([0-9]{4})", filename))[[1]] + if (length(matches) > 0) { + week_year <- strsplit(matches[1], "_")[[1]] + if (length(week_year) == 3) { + week_num <- as.numeric(week_year[2]) + year_num <- as.numeric(week_year[3]) + + if (week_num %in% expected_weeks$week && year_num %in% expected_weeks$year) { + available_weeks <- rbind(available_weeks, + data.frame(week = week_num, year = year_num)) + } + } + } + } + + available_weeks <- unique(available_weeks) + available_weeks <- merge(available_weeks, expected_weeks[, c("week", "year", "date")], by = c("week", "year")) + available_weeks <- available_weeks[order(available_weeks$date, decreasing = TRUE), ] + + if (nrow(available_weeks) == 0) { + message(" ✗ No matching mosaic files found") + message(paste(" Scanned directory:", mosaic_scan_dir)) + return(historical_data) + } + + message(paste(" Found", nrow(available_weeks), "week(s) with available mosaics:")) + + for (i in seq_len(nrow(available_weeks))) { + week_to_calc <- available_weeks$week[i] + year_to_calc <- available_weeks$year[i] + date_to_calc <- available_weeks$date[i] + + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_to_calc, year_to_calc) + tile_files <- list.files(mosaic_scan_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + message(paste(" ✗ Week", sprintf("%02d", week_to_calc), year_to_calc, "- no tiles found")) + next + } + + message(paste(" ✓ Week", sprintf("%02d", week_to_calc), year_to_calc, "-", length(tile_files), "mosaics")) + + tryCatch({ + week_stats <- load_or_calculate_weekly_stats( + week_num = week_to_calc, + year = year_to_calc, + project_dir = project_dir, + field_boundaries_sf = field_boundaries_sf, + mosaic_dir = mosaic_scan_dir, + reports_dir = reports_dir, + report_date = date_to_calc + ) + + if (!is.null(week_stats) && nrow(week_stats) > 0) { + message(paste(" ✓ Calculated stats for", nrow(week_stats), "fields")) + + historical_data[[length(historical_data) + 1]] <- list( + week = week_to_calc, + year = year_to_calc, + data = week_stats + ) + loaded_weeks <- c(loaded_weeks, paste0(week_to_calc, "_", year_to_calc)) + } + }, error = function(e) { + message(paste(" ✗ Error:", e$message)) + }) + } + } + + if (length(historical_data) == 0) { + message(paste("Error: No historical field data found and could not auto-generate weeks")) + return(NULL) + } + + message(paste("✓ Loaded", length(historical_data), "weeks of historical data:", + paste(loaded_weeks, collapse = ", "))) + + return(historical_data) +} + +# ============================================================================ +# HELPER: Extract field-level statistics from CI raster +# ============================================================================ + +extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) { + #' Extract CI statistics for all fields from a single CI raster band + + extract_result <- terra::extract(ci_band, field_boundaries_sf) + + stats_list <- list() + + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + field_pixels <- extract_result[extract_result$ID == field_idx, 2] + pixels <- as.numeric(field_pixels[!is.na(field_pixels)]) + + if (length(pixels) == 0) { + stats_list[[field_idx]] <- data.frame( + field_idx = field_idx, + mean_ci = NA_real_, + cv = NA_real_, + p10 = NA_real_, + p90 = NA_real_, + min_ci = NA_real_, + max_ci = NA_real_, + pixel_count_valid = 0, + pixel_count_total = 0, + stringsAsFactors = FALSE + ) + next + } + + mean_val <- mean(pixels, na.rm = TRUE) + cv_val <- if (mean_val > 0) sd(pixels, na.rm = TRUE) / mean_val else NA_real_ + p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]] + p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]] + min_val <- min(pixels, na.rm = TRUE) + max_val <- max(pixels, na.rm = TRUE) + + stats_list[[field_idx]] <- data.frame( + field_idx = field_idx, + mean_ci = mean_val, + cv = cv_val, + p10 = p10_val, + p90 = p90_val, + min_ci = min_val, + max_ci = max_val, + pixel_count_valid = length(pixels), + pixel_count_total = nrow(extract_result[extract_result$ID == field_idx, ]), + stringsAsFactors = FALSE + ) + } + + return(dplyr::bind_rows(stats_list)) +} + +# ============================================================================ +# COMMENTED OUT / UNUSED FUNCTIONS (kept for future use) +# ============================================================================ + +# analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, +# mosaic_dir, historical_data = NULL, planting_dates = NULL, +# report_date = Sys.Date(), harvest_imminence_data = NULL, +# harvesting_data = NULL) { +# # [Function kept as reference for parallel field analysis] +# # Currently replaced by calculate_field_statistics() for efficiency +# }