diff --git a/.renvignore b/.renvignore new file mode 100644 index 0000000..d48919b --- /dev/null +++ b/.renvignore @@ -0,0 +1,15 @@ +# Ignore large Python experiment directories during renv dependency discovery +# These slow down startup and contain no R dependencies + +laravel_app/ +data_validation_tool/ +python_app/harvest_detection_experiments/ +python_app/experiments/ +phase2_refinement/ +webapps/ +tools/ +output/ +renv/ +*.py +*.ipynb +.git/ diff --git a/r_app/02_ci_extraction.R b/r_app/02_ci_extraction.R index b430c02..42cea28 100644 --- a/r_app/02_ci_extraction.R +++ b/r_app/02_ci_extraction.R @@ -31,6 +31,7 @@ suppressPackageStartupMessages({ library(readxl) library(here) library(furrr) + library(future) }) # 2. Process command line arguments @@ -118,19 +119,44 @@ main <- function() { stop(e) }) + # 4. Generate date list for processing # --------------------------------- dates <- date_list(end_date, 7) log_message(paste("Processing data for week", dates$week, "of", dates$year)) - # 5. Find and filter raster files by date + # 5. Find and filter raster files by date - with grid size detection # ----------------------------------- log_message("Searching for raster files") - # Check if tiles exist (Script 01 output) - tile_folder <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split") + # Check if tiles exist (Script 01 output) - detect grid size dynamically + tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split") + + # Detect grid size from daily_tiles_split folder structure + # Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc. + grid_size <- NA + if (dir.exists(tiles_split_base)) { + subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) + # Look for grid size patterns like "5x5", "10x10", "20x20" + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + if (length(grid_patterns) > 0) { + grid_size <- grid_patterns[1] # Use first grid size found + log_message(paste("Detected grid size:", grid_size)) + } + } + + # Construct tile folder path with grid size + if (!is.na(grid_size)) { + tile_folder <- file.path(tiles_split_base, grid_size) + } else { + tile_folder <- tiles_split_base + } + use_tiles <- dir.exists(tile_folder) + # Make grid_size available globally for other functions + assign("grid_size", grid_size, envir = .GlobalEnv) + tryCatch({ if (use_tiles) { # Use tile-based processing @@ -145,7 +171,8 @@ main <- function() { field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, cumulative_CI_vals_dir = cumulative_CI_vals_dir, - merged_final_dir = merged_final + merged_final_dir = merged_final, + grid_size = grid_size ) } else { diff --git a/r_app/09c_field_analysis_weekly.R b/r_app/09c_field_analysis_weekly.R new file mode 100644 index 0000000..159245e --- /dev/null +++ b/r_app/09c_field_analysis_weekly.R @@ -0,0 +1,1057 @@ +# 09c_FIELD_ANALYSIS_WEEKLY.R (ENHANCED - SC-64 NEW COLUMNS) +# ============================================================================ +# Per-field weekly analysis with NEW columns for trend analysis and advanced metrics +# +# ENHANCEMENTS OVER 09b: +# - Four_week_trend: Smoothed CI trend categorization (strong growth, growth, no growth, decline, strong decline) +# - Last_harvest_or_planting_date: Date of most recent harvest/planting from harvesting_data +# - CI_Percentiles: 10th and 90th percentiles (robust to outliers from roads/trees) +# - CV_Trend_Short_Term: Week-over-week CV change (2-week comparison) +# - CV_Trend_Long_Term: Long-term CV trend (8-week comparison) +# - Cloud_pct_clear: Rounded to 5% intervals for client reporting +# +# Key improvement: All threshold values are MANUALLY DEFINED at the top of this script +# and can be easily updated. In future, these may be replaced with model-derived parameters. +# +# Usage: Rscript 09c_field_analysis_weekly.R [end_date] [project_dir] +# - end_date: End date for analysis (YYYY-MM-DD format), default: today +# - project_dir: Project directory name (e.g., "aura", "esa", "angata") +# +# Example: +# Rscript 09c_field_analysis_weekly.R 2026-01-08 angata +# + +# ============================================================================ +# *** CONFIGURATION SECTION - MANUALLY DEFINED THRESHOLDS *** +# *** These values define decision logic and can be easily updated or replaced +# by model-derived parameters in the future *** +# ============================================================================ + +# TEST MODE (for development with limited historical data) +# Set to TRUE to test with fewer historical weeks; set to FALSE for production +TEST_MODE <- TRUE +TEST_MODE_NUM_WEEKS <- 2 # Number of historical weeks to load in test mode + +# FOUR-WEEK TREND THRESHOLDS (for categorizing mean_CI trends) +# These define the boundaries for growth categorization based on weekly change rate +FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 # Average weekly increase >= 0.5 = "strong growth" +FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 # Average weekly increase >= 0.1 = "growth" +FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 # Average weekly increase < 0.5 +FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 # ±0.1 around 0 = "no growth" +FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 # Average weekly change > -0.1 = "no growth" +FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 # Average weekly decrease >= -0.1 = "decline" +FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 # Average weekly decrease < -0.5 = "strong decline" + +# CV TREND THRESHOLDS (for categorizing field uniformity trends) +# These determine if CV change is significant enough to report +CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05 # CV change >= 0.05 is considered significant + +# CLOUD COVER ROUNDING INTERVALS (for client-friendly reporting) +# Rounds cloud_pct_clear to 5% intervals to show impact while avoiding false precision +CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100) # Used for bucketing: <50%, 50-60%, 60-70%, etc. + +# PERCENTILE CALCULATIONS (for robust CI range estimation) +CI_PERCENTILE_LOW <- 0.10 # 10th percentile +CI_PERCENTILE_HIGH <- 0.90 # 90th percentile + +# HISTORICAL DATA LOOKBACK (for trend calculations) +WEEKS_FOR_FOUR_WEEK_TREND <- 4 # Use 4 weeks of data for trend +WEEKS_FOR_CV_TREND_SHORT <- 2 # Compare CV over 2 weeks +WEEKS_FOR_CV_TREND_LONG <- 8 # Compare CV over 8 weeks + +# ============================================================================ +# 1. Load required libraries +# ============================================================================ + +suppressPackageStartupMessages({ + library(here) + library(sf) + library(terra) + library(dplyr) + library(tidyr) + library(lubridate) + library(readr) + library(readxl) + library(writexl) + library(purrr) + library(furrr) + library(future) + tryCatch({ + library(torch) + }, error = function(e) { + message("Note: torch package not available - harvest model inference will be skipped") + }) +}) + +# ============================================================================ +# PHASE AND STATUS TRIGGER DEFINITIONS +# ============================================================================ + +PHASE_DEFINITIONS <- data.frame( + phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), + age_start = c(0, 4, 17, 39), + age_end = c(6, 16, 39, 200), + stringsAsFactors = FALSE +) + +STATUS_TRIGGERS <- data.frame( + trigger = c( + "germination_started", + "germination_complete", + "stress_detected_whole_field", + "strong_recovery", + "growth_on_track", + "maturation_progressing", + "harvest_ready" + ), + age_min = c(0, 0, NA, NA, 4, 39, 45), + age_max = c(6, 6, NA, NA, 39, 200, 200), + description = c( + "10% of field CI > 2", + "70% of field CI >= 2", + "CI decline > -1.5 + low CV", + "CI increase > +1.5", + "CI increasing consistently", + "High CI, stable/declining", + "Age 45+ weeks (ready to harvest)" + ), + stringsAsFactors = FALSE +) + +# ============================================================================ +# TILE-AWARE HELPER FUNCTIONS +# ============================================================================ + +#' Get tile IDs that a field geometry intersects +#' +#' @param field_geom Single field geometry (sf or terra::vect) +#' @param tile_grid Data frame with tile extents (id, xmin, xmax, ymin, ymax) +#' @return Numeric vector of tile IDs that field intersects +#' +get_tile_ids_for_field <- function(field_geom, tile_grid) { + if (inherits(field_geom, "sf")) { + field_bbox <- sf::st_bbox(field_geom) + field_xmin <- field_bbox["xmin"] + field_xmax <- field_bbox["xmax"] + field_ymin <- field_bbox["ymin"] + field_ymax <- field_bbox["ymax"] + } else if (inherits(field_geom, "SpatVector")) { + field_bbox <- terra::ext(field_geom) + field_xmin <- field_bbox$xmin + field_xmax <- field_bbox$xmax + field_ymin <- field_bbox$ymin + field_ymax <- field_bbox$ymax + } else { + stop("field_geom must be sf or terra::vect object") + } + + intersecting_tiles <- tile_grid$id[ + !(tile_grid$xmax < field_xmin | + tile_grid$xmin > field_xmax | + tile_grid$ymax < field_ymin | + tile_grid$ymin > field_ymax) + ] + + return(as.numeric(intersecting_tiles)) +} + +#' Load CI tiles that a field intersects +#' +#' @param field_geom Single field geometry +#' @param tile_ids Numeric vector of tile IDs to load +#' @param week_num Week number +#' @param year Year +#' @param mosaic_dir Directory with weekly tiles +#' @return Single CI raster (merged if multiple tiles, or single tile) +#' +load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { + if (length(tile_ids) == 0) { + return(NULL) + } + + tiles_list <- list() + for (tile_id in sort(tile_ids)) { + tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id) + tile_path <- file.path(mosaic_dir, tile_filename) + + if (file.exists(tile_path)) { + tryCatch({ + tile_rast <- terra::rast(tile_path) + ci_band <- terra::subset(tile_rast, 5) + tiles_list[[length(tiles_list) + 1]] <- ci_band + }, error = function(e) { + message(paste(" Warning: Could not load tile", tile_id, ":", e$message)) + }) + } + } + + if (length(tiles_list) == 0) { + return(NULL) + } + + if (length(tiles_list) == 1) { + return(tiles_list[[1]]) + } else { + tryCatch({ + rsrc <- terra::sprc(tiles_list) + merged <- terra::mosaic(rsrc, fun = "max") + return(merged) + }, error = function(e) { + message(paste(" Warning: Could not merge tiles:", e$message)) + return(tiles_list[[1]]) + }) + } +} + +#' Build tile grid from available weekly tile files +#' +#' @param mosaic_dir Directory with weekly tiles +#' @param week_num Week number to discover tiles +#' @param year Year to discover tiles +#' @return Data frame with columns: id, xmin, xmax, ymin, ymax +#' +build_tile_grid <- function(mosaic_dir, week_num, year) { + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) + } + + tile_grid <- data.frame( + id = integer(), + xmin = numeric(), + xmax = numeric(), + ymin = numeric(), + ymax = numeric(), + stringsAsFactors = FALSE + ) + + for (tile_file in tile_files) { + tryCatch({ + matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file))) + if (length(matches) > 0) { + tile_id <- as.integer(sub("_|\\.tif", "", matches[1])) + tile_rast <- terra::rast(tile_file) + tile_ext <- terra::ext(tile_rast) + tile_grid <- rbind(tile_grid, data.frame( + id = tile_id, + xmin = tile_ext$xmin, + xmax = tile_ext$xmax, + ymin = tile_ext$ymin, + ymax = tile_ext$ymax, + stringsAsFactors = FALSE + )) + } + }, error = function(e) { + message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message)) + }) + } + + if (nrow(tile_grid) == 0) { + stop("Could not extract extents from any tile files") + } + + return(tile_grid) +} + +# ============================================================================ +# HELPER FUNCTIONS FOR NEW COLUMNS (SC-64) +# ============================================================================ + +#' Categorize four-week trend based on average weekly CI change +#' +#' @param ci_values_list List of CI mean values (chronological order, oldest to newest) +#' @return Character: "strong growth", "growth", "no growth", "decline", "strong decline" +#' +categorize_four_week_trend <- function(ci_values_list) { + if (is.null(ci_values_list) || length(ci_values_list) < 2) { + return(NA_character_) + } + + ci_values_list <- ci_values_list[!is.na(ci_values_list)] + if (length(ci_values_list) < 2) { + return(NA_character_) + } + + # Calculate average weekly change + weekly_changes <- diff(ci_values_list) + avg_weekly_change <- mean(weekly_changes, na.rm = TRUE) + + # Categorize based on thresholds + if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) { + return("strong growth") + } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN && + avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) { + return("growth") + } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) { + return("no growth") + } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN && + avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("decline") + } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("strong decline") + } else { + return("no growth") # Default fallback + } +} + +#' Round cloud percentage to 5% intervals for client reporting +#' +#' @param cloud_pct_clear Numeric cloud clear percentage (0-100) +#' @return Character representing the interval bucket +#' +round_cloud_to_intervals <- function(cloud_pct_clear) { + if (is.na(cloud_pct_clear)) { + return(NA_character_) + } + + if (cloud_pct_clear < 50) return("<50%") + if (cloud_pct_clear < 60) return("50-60%") + if (cloud_pct_clear < 70) return("60-70%") + if (cloud_pct_clear < 80) return("70-80%") + if (cloud_pct_clear < 90) return("80-90%") + return(">90%") +} + +#' Extract CI percentiles (10th and 90th) to avoid outlier distortion +#' +#' @param ci_values Numeric vector of CI values +#' @return Character string: "p10-p90" format +#' +get_ci_percentiles <- function(ci_values) { + if (is.null(ci_values) || length(ci_values) == 0) { + return(NA_character_) + } + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) { + return(NA_character_) + } + + p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE) + p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE) + + return(sprintf("%.1f-%.1f", p10, p90)) +} + +#' Calculate CV trend between two weeks +#' +#' @param cv_current Current week's CV +#' @param cv_previous Previous week's CV +#' @return Numeric: change in CV (positive = increased heterogeneity) +#' +calculate_cv_trend <- function(cv_current, cv_previous) { + if (is.na(cv_current) || is.na(cv_previous)) { + return(NA_real_) + } + return(round(cv_current - cv_previous, 4)) +} + +# ============================================================================ +# HELPER FUNCTIONS (FROM 09b) +# ============================================================================ + +get_phase_by_age <- function(age_weeks) { + if (is.na(age_weeks)) return(NA_character_) + for (i in seq_len(nrow(PHASE_DEFINITIONS))) { + if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && + age_weeks <= PHASE_DEFINITIONS$age_end[i]) { + return(PHASE_DEFINITIONS$phase[i]) + } + } + return("Unknown") +} + +get_status_trigger <- function(ci_values, ci_change, age_weeks) { + if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) return(NA_character_) + + pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 + pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 + ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 + mean_ci <- mean(ci_values, na.rm = TRUE) + + if (age_weeks >= 0 && age_weeks <= 6) { + if (pct_at_or_above_2 >= 70) { + return("germination_complete") + } else if (pct_above_2 > 10) { + return("germination_started") + } + } + + if (age_weeks >= 45) { + return("harvest_ready") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { + return("stress_detected_whole_field") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { + return("strong_recovery") + } + + if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { + return("growth_on_track") + } + + if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { + return("maturation_progressing") + } + + return(NA_character_) +} + +#' Load multiple weeks of CSV data for trend calculations +#' +#' @param project_dir Project name +#' @param current_week Current week number +#' @param reports_dir Reports directory +#' @param num_weeks Number of weeks to load (default 4) +#' @return List with data frames for each week, or NULL if not enough data +#' +load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4) { + historical_data <- list() + loaded_weeks <- c() + + for (lookback in 0:(num_weeks - 1)) { + target_week <- current_week - lookback + if (target_week < 1) target_week <- target_week + 52 + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv") + csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) + + if (file.exists(csv_path)) { + tryCatch({ + data <- read_csv(csv_path, show_col_types = FALSE) + historical_data[[lookback + 1]] <- list( + week = target_week, + data = data + ) + loaded_weeks <- c(loaded_weeks, target_week) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, ":", e$message)) + }) + } + } + + if (length(historical_data) == 0) { + message(paste("Warning: No historical field data found for trend calculations")) + return(NULL) + } + + message(paste("Loaded", length(historical_data), "weeks of historical data:", + paste(loaded_weeks, collapse = ", "))) + + return(historical_data) +} + +USE_UNIFORM_AGE <- TRUE +UNIFORM_PLANTING_DATE <- as.Date("2025-01-01") + +extract_planting_dates <- function(harvesting_data) { + if (USE_UNIFORM_AGE) { + message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) + return(data.frame( + field_id = character(), + planting_date = as.Date(character()), + stringsAsFactors = FALSE + )) + } + + if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { + message("Warning: No harvesting data available.") + return(NULL) + } + + tryCatch({ + planting_dates <- harvesting_data %>% + arrange(field, desc(season_start)) %>% + distinct(field, .keep_all = TRUE) %>% + select(field, season_start) %>% + rename(field_id = field, planting_date = season_start) %>% + filter(!is.na(planting_date)) %>% + as.data.frame() + + message(paste("Extracted planting dates for", nrow(planting_dates), "fields")) + return(planting_dates) + }, error = function(e) { + message(paste("Error extracting planting dates:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# PARALLEL FIELD ANALYSIS FUNCTION +# ============================================================================ + +#' Analyze single field with SC-64 enhancements +#' +#' @param field_idx Index in field_boundaries_sf +#' @param field_boundaries_sf All field boundaries (sf object) +#' @param tile_grid Data frame with tile extents +#' @param week_num Current week number +#' @param year Current year +#' @param mosaic_dir Directory with weekly tiles +#' @param historical_data Historical weekly data for trend calculations +#' @param planting_dates Planting dates lookup +#' @param report_date Report date +#' @param harvest_imminence_data Harvest imminence predictions (optional) +#' +#' @return Single-row data frame with field analysis including new SC-64 columns +#' +analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, + mosaic_dir, historical_data = NULL, planting_dates = NULL, + report_date = Sys.Date(), harvest_imminence_data = NULL) { + + tryCatch({ + # Get field info + field_id <- field_boundaries_sf$field[field_idx] + farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) { + field_boundaries_sf$sub_area[field_idx] + } else { + NA_character_ + } + field_name <- field_id + + # Get field geometry and validate + field_sf <- field_boundaries_sf[field_idx, ] + if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) { + return(data.frame( + Field_id = field_id, + error = "Empty or invalid geometry" + )) + } + + # Calculate field area + field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000 + field_area_acres <- field_area_ha / 0.404686 + + # Determine which tiles this field intersects + tile_ids <- get_tile_ids_for_field(field_sf, tile_grid) + + # Load current CI tiles for this field + current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir) + + if (is.null(current_ci)) { + return(data.frame( + Field_id = field_id, + error = "No tile data available" + )) + } + + # Extract CI values for current field + field_vect <- terra::vect(sf::as_Spatial(field_sf)) + terra::crs(field_vect) <- terra::crs(current_ci) + + all_extracted <- terra::extract(current_ci, field_vect)[, 2] + current_ci_vals <- all_extracted[!is.na(all_extracted)] + + # Calculate cloud coverage + num_total <- length(all_extracted) + num_data <- sum(!is.na(all_extracted)) + pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 + + cloud_cat <- if (num_data == 0) "No image available" + else if (pct_clear >= 99.5) "Clear view" + else "Partial coverage" + cloud_pct <- 100 - pct_clear + cloud_interval <- round_cloud_to_intervals(pct_clear) # NEW: Rounded intervals + + if (length(current_ci_vals) == 0) { + return(data.frame( + Field_id = field_id, + error = "No CI values extracted" + )) + } + + # Calculate current CI statistics + mean_ci_current <- mean(current_ci_vals, na.rm = TRUE) + ci_std <- sd(current_ci_vals, na.rm = TRUE) + cv_current <- ci_std / mean_ci_current + range_min <- min(current_ci_vals, na.rm = TRUE) + range_max <- max(current_ci_vals, na.rm = TRUE) + range_str <- sprintf("%.1f-%.1f", range_min, range_max) + + # NEW: Get CI percentiles (10th-90th) + ci_percentiles_str <- get_ci_percentiles(current_ci_vals) + + # Calculate weekly CI change + weekly_ci_change <- NA + previous_ci_vals <- NULL + + tryCatch({ + previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir) + if (!is.null(previous_ci)) { + prev_extracted <- terra::extract(previous_ci, field_vect)[, 2] + previous_ci_vals <- prev_extracted[!is.na(prev_extracted)] + if (length(previous_ci_vals) > 0) { + mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE) + weekly_ci_change <- mean_ci_current - mean_ci_previous + } + } + }, error = function(e) { + # Silent fail + }) + + if (is.na(weekly_ci_change)) { + weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std) + } else { + weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ%.1f)", mean_ci_current, ci_std, weekly_ci_change) + } + + # Calculate age + age_weeks <- NA + if (!is.null(planting_dates) && nrow(planting_dates) > 0) { + field_planting <- planting_dates %>% + filter(field_id == !!field_id) %>% + pull(planting_date) + + if (length(field_planting) > 0) { + age_weeks <- as.numeric(difftime(report_date, field_planting[1], units = "weeks")) + } + } + + if (USE_UNIFORM_AGE) { + age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) + } + + # Calculate germination progress + pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100 + pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100 + germination_progress_str <- NA_character_ + if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) { + germination_progress_str <- sprintf("%.0f%%", pct_ci_ge_2) + } + + # Assign phase and trigger + phase <- "Unknown" + imminent_prob_val <- NA + if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) { + imminence_row <- harvest_imminence_data %>% + filter(field_id == !!field_id) + if (nrow(imminence_row) > 0) { + imminent_prob_val <- imminence_row$probability[1] + if (imminent_prob_val > 0.5) { + phase <- "Harvest Imminent (Model)" + } + } + } + + if (phase == "Unknown") { + phase <- get_phase_by_age(age_weeks) + } + + status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks) + + nmr_weeks_in_phase <- 1 + + # NEW: Load historical data to calculate four_week_trend + four_week_trend <- NA_character_ + ci_values_for_trend <- c(mean_ci_current) + + if (!is.null(historical_data) && length(historical_data) > 0) { + # Extract this field's CI mean values from historical weeks + for (hist in historical_data) { + hist_week <- hist$week + hist_data <- hist$data + + field_row <- hist_data %>% + filter(Field_id == !!field_id) + + if (nrow(field_row) > 0 && !is.na(field_row$Mean_CI[1])) { + ci_values_for_trend <- c(field_row$Mean_CI[1], ci_values_for_trend) + } + } + + if (length(ci_values_for_trend) >= 2) { + four_week_trend <- categorize_four_week_trend(ci_values_for_trend) + } + } + + # NEW: Load previous weeks for CV trends + cv_trend_short <- NA_real_ + cv_trend_long <- NA_real_ + + if (!is.null(historical_data) && length(historical_data) > 0) { + # CV from 2 weeks ago (short-term trend) + if (length(historical_data) >= 2) { + cv_2w <- historical_data[[2]]$data %>% + filter(Field_id == !!field_id) %>% + pull(CV) + if (length(cv_2w) > 0 && !is.na(cv_2w[1])) { + cv_trend_short <- calculate_cv_trend(cv_current, cv_2w[1]) + } + } + + # CV from 8 weeks ago (long-term trend) + if (length(historical_data) >= 8) { + cv_8w <- historical_data[[8]]$data %>% + filter(Field_id == !!field_id) %>% + pull(CV) + if (length(cv_8w) > 0 && !is.na(cv_8w[1])) { + cv_trend_long <- calculate_cv_trend(cv_current, cv_8w[1]) + } + } + } + + # NEW: Last harvest/planting date (from harvesting_data if available) + last_harvest_date <- NA_character_ + if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) { + last_harvest_row <- harvesting_data %>% + filter(field == !!field_id) %>% + arrange(desc(season_start)) %>% + slice(1) + + if (nrow(last_harvest_row) > 0 && !is.na(last_harvest_row$season_start[1])) { + last_harvest_date <- as.character(last_harvest_row$season_start[1]) + } + } + + # Compile result with all SC-64 columns + result <- data.frame( + Field_id = field_id, + Farm_Section = farm_section, + Field_name = field_name, + Hectare = round(field_area_ha, 2), + Acreage = round(field_area_acres, 2), + Mean_CI = round(mean_ci_current, 2), + Weekly_ci_change = if (is.na(weekly_ci_change)) NA_real_ else round(weekly_ci_change, 2), + Weekly_ci_change_str = weekly_ci_change_str, + Four_week_trend = four_week_trend, # NEW + Last_harvest_or_planting_date = last_harvest_date, # NEW + Age_week = if (is.na(age_weeks)) NA_integer_ else as.integer(round(age_weeks)), + `Phase (age based)` = phase, + nmr_weeks_in_this_phase = nmr_weeks_in_phase, + Germination_progress = germination_progress_str, + Imminent_prob = imminent_prob_val, + Status_trigger = status_trigger, + CI_range = range_str, + CI_Percentiles = ci_percentiles_str, # NEW + CV = round(cv_current, 4), + CV_Trend_Short_Term = cv_trend_short, # NEW (2-week) + CV_Trend_Long_Term = cv_trend_long, # NEW (8-week) + Cloud_pct_clear = pct_clear, + Cloud_pct_clear_interval = cloud_interval, # NEW: Rounded intervals + Cloud_pct = cloud_pct, + Cloud_category = cloud_cat, + stringsAsFactors = FALSE + ) + + return(result) + + }, error = function(e) { + message(paste("Error analyzing field", field_idx, ":", e$message)) + return(data.frame( + Field_id = NA_character_, + error = e$message + )) + }) +} + +# ============================================================================ +# SUMMARY GENERATION +# ============================================================================ + +generate_field_analysis_summary <- function(field_df) { + message("Generating summary statistics...") + + total_acreage <- sum(field_df$Acreage, na.rm = TRUE) + + germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE) + tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE) + grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE) + maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE) + unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE) + + harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) + stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) + recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) + growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) + germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) + germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) + no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) + + clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) + partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) + no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) + total_fields <- nrow(field_df) + + clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) + partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) + no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) + + summary_df <- data.frame( + Category = c( + "--- PHASE DISTRIBUTION ---", + "Germination", + "Tillering", + "Grand Growth", + "Maturation", + "Unknown phase", + "--- STATUS TRIGGERS ---", + "Harvest ready", + "Stress detected", + "Strong recovery", + "Growth on track", + "Germination complete", + "Germination started", + "No trigger", + "--- CLOUD COVERAGE (FIELDS) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- CLOUD COVERAGE (ACREAGE) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- TOTAL ---", + "Total Acreage" + ), + Acreage = c( + NA, + round(germination_acreage, 2), + round(tillering_acreage, 2), + round(grand_growth_acreage, 2), + round(maturation_acreage, 2), + round(unknown_phase_acreage, 2), + NA, + round(harvest_ready_acreage, 2), + round(stress_acreage, 2), + round(recovery_acreage, 2), + round(growth_on_track_acreage, 2), + round(germination_complete_acreage, 2), + round(germination_started_acreage, 2), + round(no_trigger_acreage, 2), + NA, + paste0(clear_fields, " fields"), + paste0(partial_fields, " fields"), + paste0(no_image_fields, " fields"), + NA, + round(clear_acreage, 2), + round(partial_acreage, 2), + round(no_image_acreage, 2), + NA, + round(total_acreage, 2) + ), + stringsAsFactors = FALSE + ) + + attr(summary_df, "cloud_fields_clear") <- clear_fields + attr(summary_df, "cloud_fields_partial") <- partial_fields + attr(summary_df, "cloud_fields_no_image") <- no_image_fields + attr(summary_df, "cloud_fields_total") <- total_fields + + return(summary_df) +} + +# ============================================================================ +# EXPORT FUNCTIONS +# ============================================================================ + +export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { + message("Exporting per-field analysis to Excel, CSV, and RDS...") + + output_subdir <- file.path(reports_dir, "kpis", "field_analysis") + if (!dir.exists(output_subdir)) { + dir.create(output_subdir, recursive = TRUE) + } + + excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), "_test.xlsx") + excel_path <- file.path(output_subdir, excel_filename) + excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) + + sheets <- list( + "Field Data" = field_df, + "Summary" = summary_df + ) + + write_xlsx(sheets, excel_path) + message(paste("✓ Field analysis Excel exported to:", excel_path)) + + kpi_data <- list( + field_analysis = field_df, + field_analysis_summary = summary_df, + metadata = list( + current_week = current_week, + project = project_dir, + created_at = Sys.time() + ) + ) + + rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") + rds_path <- file.path(reports_dir, "kpis", rds_filename) + + saveRDS(kpi_data, rds_path) + message(paste("✓ Field analysis RDS exported to:", rds_path)) + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") + csv_path <- file.path(output_subdir, csv_filename) + write_csv(field_df, csv_path) + message(paste("✓ Field analysis CSV exported to:", csv_path)) + + return(list(excel = excel_path, rds = rds_path, csv = csv_path)) +} + +# ============================================================================ +# MAIN +# ============================================================================ + +main <- function() { + args <- commandArgs(trailingOnly = TRUE) + + end_date <- if (length(args) >= 1 && !is.na(args[1])) { + as.Date(args[1]) + } else if (exists("end_date_str", envir = .GlobalEnv)) { + as.Date(get("end_date_str", envir = .GlobalEnv)) + } else { + Sys.Date() + } + + project_dir <- if (length(args) >= 2 && !is.na(args[2])) { + as.character(args[2]) + } else if (exists("project_dir", envir = .GlobalEnv)) { + get("project_dir", envir = .GlobalEnv) + } else { + "angata" + } + + assign("project_dir", project_dir, envir = .GlobalEnv) + + source(here("r_app", "crop_messaging_utils.R")) + source(here("r_app", "parameters_project.R")) + + message("=== FIELD ANALYSIS WEEKLY (SC-64 ENHANCEMENTS) ===") + message(paste("Date:", end_date)) + message(paste("Project:", project_dir)) + message("") + message("CONFIGURATION:") + message(paste(" Four-week trend thresholds: growth >= ", FOUR_WEEK_TREND_GROWTH_MIN, + ", strong growth >= ", FOUR_WEEK_TREND_STRONG_GROWTH_MIN, sep = "")) + message(paste(" CV trend significant threshold:", CV_TREND_THRESHOLD_SIGNIFICANT)) + message(paste(" Cloud intervals:", paste(CLOUD_INTERVALS, collapse = ", "))) + message("") + + current_week <- as.numeric(format(end_date, "%V")) + year <- as.numeric(format(end_date, "%Y")) + previous_week <- current_week - 1 + if (previous_week < 1) previous_week <- 52 + + message(paste("Week:", current_week, "/ Year:", year)) + + message("Building tile grid from available weekly tiles...") + tile_grid <- build_tile_grid(weekly_tile_max, current_week, year) + message(paste(" Found", nrow(tile_grid), "tiles for analysis")) + + tryCatch({ + boundaries_result <- load_field_boundaries(data_dir) + + if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { + field_boundaries_sf <- boundaries_result$field_boundaries_sf + } else { + field_boundaries_sf <- boundaries_result + } + + if (!is.data.frame(field_boundaries_sf) && !inherits(field_boundaries_sf, "sf")) { + stop("field_boundaries_sf is not a valid SF object") + } + + if (nrow(field_boundaries_sf) == 0) { + stop("No fields loaded from boundaries") + } + + message(paste(" Loaded", nrow(field_boundaries_sf), "fields from boundaries")) + }, error = function(e) { + stop("ERROR loading field boundaries: ", e$message, + "\nCheck that pivot.geojson exists in ", data_dir) + }) + + # Load historical data for trend calculations + message("Loading historical field data for trend calculations...") + num_weeks_to_load <- if (TEST_MODE) TEST_MODE_NUM_WEEKS else max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) + if (TEST_MODE) { + message(paste(" TEST MODE: Loading only", num_weeks_to_load, "weeks of historical data")) + } + historical_data <- load_historical_field_data( + project_dir, current_week, reports_dir, + num_weeks = num_weeks_to_load + ) + + planting_dates <- extract_planting_dates(harvesting_data) + + message("Setting up parallel processing...") + current_plan <- class(future::plan())[1] + if (current_plan == "sequential") { + num_workers <- parallel::detectCores() - 1 + message(paste(" Using", num_workers, "workers for parallel processing")) + future::plan(future::multisession, workers = num_workers) + } else { + message(paste(" Using existing plan:", current_plan)) + } + + message("Analyzing fields in parallel...") + + field_analysis_list <- furrr::future_map( + seq_len(nrow(field_boundaries_sf)), + ~ analyze_single_field( + field_idx = ., + field_boundaries_sf = field_boundaries_sf, + tile_grid = tile_grid, + week_num = current_week, + year = year, + mosaic_dir = weekly_tile_max, + historical_data = historical_data, + planting_dates = planting_dates, + report_date = end_date, + harvest_imminence_data = NULL + ), + .progress = TRUE, + .options = furrr::furrr_options(seed = TRUE) + ) + + field_analysis_df <- dplyr::bind_rows(field_analysis_list) + + if (nrow(field_analysis_df) == 0) { + stop("No fields analyzed successfully!") + } + + message(paste("✓ Analyzed", nrow(field_analysis_df), "fields")) + + summary_statistics_df <- generate_field_analysis_summary(field_analysis_df) + + export_paths <- export_field_analysis_excel( + field_analysis_df, + summary_statistics_df, + project_dir, + current_week, + reports_dir + ) + + cat("\n=== FIELD ANALYSIS SUMMARY ===\n") + cat("Fields analyzed:", nrow(field_analysis_df), "\n") + cat("Excel export:", export_paths$excel, "\n") + cat("RDS export:", export_paths$rds, "\n") + cat("CSV export:", export_paths$csv, "\n\n") + + cat("--- Per-field results (first 10) ---\n") + available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", + "Four_week_trend", "Status_trigger", "Cloud_category") + available_cols <- available_cols[available_cols %in% names(field_analysis_df)] + if (length(available_cols) > 0) { + print(head(field_analysis_df[, available_cols], 10)) + } else { + print(head(field_analysis_df, 10)) + } + + cat("\n--- Summary statistics ---\n") + print(summary_statistics_df) + + message("\n✓ Field analysis complete!") +} + +if (sys.nframe() == 0) { + main() +} diff --git a/r_app/ANGATA_KPI_UPDATES.md b/r_app/ANGATA_KPI_UPDATES.md deleted file mode 100644 index eda35b1..0000000 --- a/r_app/ANGATA_KPI_UPDATES.md +++ /dev/null @@ -1,155 +0,0 @@ -# Angata KPI Script Updates - 09_calculate_kpis_Angata.R - -## Overview -The script has been restructured to focus on **4 required KPIs** for Angata, with legacy KPIs disabled by default but retained for future use. - -## Changes Made - -### 1. **Script Configuration** -- **File**: `09_calculate_kpis_Angata.R` -- **Toggle Variable**: `ENABLE_LEGACY_KPIS` (default: `FALSE`) - - Set to `TRUE` to run the 6 original KPIs - - Set to `FALSE` for Angata's 4 KPIs only - -### 2. **Angata KPIs (4 Required)** - -#### KPI 1: **Area Change Summary** ✅ REAL DATA -- **File**: Embedded in script as `calculate_area_change_kpi()` -- **Method**: Compares current week CI to previous week CI -- **Classification**: - - **Improving areas**: Mean change > +0.5 CI units - - **Stable areas**: Mean change between -0.5 and +0.5 CI units - - **Declining areas**: Mean change < -0.5 CI units -- **Output**: Hectares, Acres, and % of farm for each category -- **Data Type**: REAL DATA (processed from satellite imagery) - -#### KPI 2: **Germination Acreage** ✅ REAL DATA -- **Function**: `calculate_germination_acreage_kpi()` -- **Germination Phase Detection**: - - **Start germination**: When 10% of field's CI > 2 - - **End germination**: When 70% of field's CI ≥ 2 -- **Output**: - - Count of fields in germination phase - - Count of fields in post-germination phase - - Total acres and % of farm for each phase -- **Data Type**: REAL DATA (CI-based, calculated from satellite imagery) - -#### KPI 3: **Harvested Acreage** ⚠️ DUMMY DATA -- **Function**: `calculate_harvested_acreage_kpi()` -- **Current Status**: Returns zero values with clear "DUMMY DATA - Detection TBD" label -- **TODO**: Implement harvesting detection logic - - Likely indicators: CI drops below 1.5, sudden backscatter change, etc. -- **Output Format**: - - Number of harvested fields - - Total acres - - % of farm - - Clearly marked as DUMMY DATA in output table - -#### KPI 4: **Mature Acreage** ⚠️ DUMMY DATA -- **Function**: `calculate_mature_acreage_kpi()` -- **Current Status**: Returns zero values with clear "DUMMY DATA - Definition TBD" label -- **Concept**: Mature fields have high and stable CI for several weeks -- **TODO**: Implement stability-based maturity detection - - Calculate CI trend over last 3-4 weeks per field - - Stability metric: low CV over period, high CI relative to field max - - Threshold: e.g., field reaches 80%+ of max CI and stable for 3+ weeks -- **Output Format**: - - Number of mature fields - - Total acres - - % of farm - - Clearly marked as DUMMY DATA in output table - -### 3. **Legacy KPIs (Disabled by Default)** - -These original 6 KPIs are **disabled** but code is preserved for future use: -1. Field Uniformity Summary -2. TCH Forecasted -3. Growth Decline Index -4. Weed Presence Score -5. Gap Filling Score - -To enable: Set `ENABLE_LEGACY_KPIS <- TRUE` in the script - -### 4. **Output & Logging** - -#### Console Output (STDOUT) -``` -=== ANGATA KPI CALCULATION SUMMARY === -Report Date: [date] -Current Week: [week] -Previous Week: [week] -Total Fields Analyzed: [count] -Project: [project_name] -Calculation Time: [timestamp] -Legacy KPIs Enabled: FALSE - ---- REQUIRED ANGATA KPIs --- - -1. Area Change Summary (REAL DATA): - [table] - -2. Germination Acreage (CI-based, REAL DATA): - [table] - -3. Harvested Acreage (DUMMY DATA - Detection TBD): - [table with "DUMMY" marker] - -4. Mature Acreage (DUMMY DATA - Definition TBD): - [table with "DUMMY" marker] - -=== ANGATA KPI CALCULATION COMPLETED === -``` - -#### File Output (RDS) -- **Location**: `laravel_app/storage/app/[project]/reports/kpis/` -- **Filename**: `[project]_kpi_summary_tables_week[XX].rds` -- **Contents**: - - `area_change_summary`: Summary table - - `germination_summary`: Summary table - - `harvested_summary`: Summary table (DUMMY) - - `mature_summary`: Summary table (DUMMY) - - Field-level results for each KPI - - Metadata (report_date, weeks, total_fields, etc.) - -### 5. **Data Clarity Markers** - -All tables in output clearly indicate: -- **REAL DATA**: Derived from satellite CI measurements -- **DUMMY DATA - [TBD Item]**: Placeholder values; actual method to be implemented - -This prevents misinterpretation of preliminary results. - -## Usage - -```powershell -# Run Angata KPIs only (default, legacy disabled) -Rscript r_app/09_calculate_kpis_Angata.R 2025-11-27 7 angata - -# With specific date -Rscript r_app/09_calculate_kpis_Angata.R 2025-11-20 7 angata -``` - -## Future Work - -1. **Harvesting Detection**: Implement CI threshold + temporal pattern analysis -2. **Maturity Definition**: Define stability metrics and thresholds based on field CI ranges -3. **Legacy KPIs**: Adapt or retire based on Angata's needs -4. **Integration**: Connect outputs to reporting system (R Markdown, Word reports, etc.) - -## File Structure - -``` -r_app/ -├── 09_calculate_kpis_Angata.R (main script - UPDATED) -├── kpi_utils.R (optional - legacy functions) -├── crop_messaging_utils.R (dependencies) -├── parameters_project.R (project config) -└── growth_model_utils.R (optional) - -Output: -└── laravel_app/storage/app/angata/reports/kpis/ - └── angata_kpi_summary_tables_week[XX].rds -``` - ---- -**Updated**: November 27, 2025 diff --git a/r_app/ci_extraction_utils.R b/r_app/ci_extraction_utils.R index 99a1fee..d75ff6a 100644 --- a/r_app/ci_extraction_utils.R +++ b/r_app/ci_extraction_utils.R @@ -653,21 +653,27 @@ process_ci_values <- function(dates, field_boundaries, merged_final_dir, #' Process CI values from pre-split tiles (Script 01 output) #' #' This function processes CI values from tiles instead of full-extent rasters. -#' Tiles are created by Script 01 and stored in daily_tiles_split/[DATE]/ folders. +#' Tiles are created by Script 01 and stored in daily_tiles_split/[GRID_SIZE]/[DATE]/ folders. #' For each field, it aggregates CI statistics from all tiles that intersect that field. +#' Output follows the same grid structure: merged_final_tif/[GRID_SIZE]/[DATE]/ +#' +#' NOTE: Processes dates SEQUENTIALLY but tiles WITHIN EACH DATE in parallel (furrr) +#' This avoids worker process communication issues while still getting good speedup. #' #' @param dates List of dates from date_list() -#' @param tile_folder Path to the tile folder (daily_tiles_split) +#' @param tile_folder Path to the tile folder (daily_tiles_split/[GRID_SIZE]) #' @param field_boundaries Field boundaries as vector object #' @param field_boundaries_sf Field boundaries as SF object #' @param daily_CI_vals_dir Directory to save daily CI values #' @param cumulative_CI_vals_dir Directory to save cumulative CI values -#' @param merged_final_dir Directory to save processed tiles with CI band +#' @param merged_final_dir Base directory to save processed tiles with CI band +#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return NULL (used for side effects) #' process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, field_boundaries_sf, daily_CI_vals_dir, - cumulative_CI_vals_dir, merged_final_dir) { + cumulative_CI_vals_dir, merged_final_dir, + grid_size = NA) { # Define path for combined CI data combined_ci_path <- here::here(cumulative_CI_vals_dir, "combined_CI_data.rds") @@ -691,11 +697,29 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, if (!file.exists(combined_ci_path)) { safe_log("combined_CI_data.rds does not exist. Creating new file with all available tile data.") - # Process all tile dates + # Process all tile dates SEQUENTIALLY but with parallel tile processing + # Tiles within each date are processed in parallel via extract_ci_from_tiles() all_pivot_stats <- list() - for (date in tile_dates) { - safe_log(paste("Processing tiles for date:", date)) + for (i in seq_along(tile_dates)) { + date <- tile_dates[i] + + # SKIP: Check if this date already has processed output tiles + if (!is.na(grid_size)) { + output_date_folder <- file.path(merged_final_dir, grid_size, date) + } else { + output_date_folder <- file.path(merged_final_dir, date) + } + + if (dir.exists(output_date_folder)) { + existing_tiles <- list.files(output_date_folder, pattern = "\\.tif$") + if (length(existing_tiles) > 0) { + safe_log(paste("[", i, "/", length(tile_dates), "] SKIP:", date, "- already has", length(existing_tiles), "tiles")) + next + } + } + + safe_log(paste("[", i, "/", length(tile_dates), "] Processing tiles for date:", date)) date_tile_dir <- file.path(tile_folder, date) tile_files <- list.files(date_tile_dir, pattern = "\\.tif$", full.names = TRUE) @@ -705,15 +729,17 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, next } - safe_log(paste(" Found", length(tile_files), "tiles for date", date)) + safe_log(paste(" Found", length(tile_files), "tiles - processing in parallel")) # Process all tiles for this date and aggregate to fields + # Tiles are processed in parallel via furrr::future_map() inside extract_ci_from_tiles() date_stats <- extract_ci_from_tiles( tile_files = tile_files, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir + merged_final_tif_dir = merged_final_dir, + grid_size = grid_size ) if (!is.null(date_stats)) { @@ -735,13 +761,37 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, } } else { - # Process only new dates + # Process only new dates SEQUENTIALLY but with parallel tile processing safe_log("combined_CI_data.rds exists, adding new tile data.") + if (length(dates_to_process) == 0) { + safe_log("No new dates to process", "WARNING") + return(invisible(NULL)) + } + + safe_log(paste("Processing", length(dates_to_process), "new dates...")) + new_pivot_stats_list <- list() - for (date in dates_to_process[1:2]) { - safe_log(paste("Processing tiles for date:", date)) + for (i in seq_along(dates_to_process)) { + date <- dates_to_process[i] + + # SKIP: Check if this date already has processed output tiles + if (!is.na(grid_size)) { + output_date_folder <- file.path(merged_final_dir, grid_size, date) + } else { + output_date_folder <- file.path(merged_final_dir, date) + } + + if (dir.exists(output_date_folder)) { + existing_tiles <- list.files(output_date_folder, pattern = "\\.tif$") + if (length(existing_tiles) > 0) { + safe_log(paste("[", i, "/", length(dates_to_process), "] SKIP:", date, "- already has", length(existing_tiles), "tiles")) + next + } + } + + safe_log(paste("[", i, "/", length(dates_to_process), "] Processing tiles for date:", date)) date_tile_dir <- file.path(tile_folder, date) tile_files <- list.files(date_tile_dir, pattern = "\\.tif$", full.names = TRUE) @@ -751,7 +801,7 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, next } - safe_log(paste(" Found", length(tile_files), "tiles for date", date)) + safe_log(paste(" Found", length(tile_files), "tiles - processing in parallel")) # Extract CI from tiles for this date date_stats <- extract_ci_from_tiles( @@ -759,7 +809,8 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir + merged_final_tif_dir = merged_final_dir, + grid_size = grid_size ) if (!is.null(date_stats)) { @@ -788,17 +839,18 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, #' 1. Loads tile #' 2. Creates/extracts CI band #' 3. Creates output raster with Red, Green, Blue, NIR, CI bands -#' 4. Saves to merged_final_tif_dir/[DATE]/ mirroring daily_tiles_split structure +#' 4. Saves to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ mirroring daily_tiles_split structure #' 5. Extracts field-level CI statistics #' Returns statistics aggregated to field level. #' #' @param tile_file Path to a single tile TIF file #' @param field_boundaries_sf Field boundaries as SF object #' @param date Character string of the date (YYYY-MM-DD format) -#' @param merged_final_tif_dir Directory to save processed tiles with CI band +#' @param merged_final_tif_dir Base directory to save processed tiles with CI band +#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for this tile, or NULL if processing failed #' -process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir) { +process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = NA) { tryCatch({ tile_filename <- basename(tile_file) safe_log(paste(" [TILE] Loading:", tile_filename)) @@ -845,8 +897,14 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band) names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI") - # Save processed tile to merged_final_tif_dir/[DATE]/ with same filename - date_dir <- file.path(merged_final_tif_dir, date) + # Save processed tile to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ with same filename + # This mirrors the input structure: daily_tiles_split/[GRID_SIZE]/[DATE]/ + if (!is.na(grid_size)) { + date_dir <- file.path(merged_final_tif_dir, grid_size, date) + } else { + date_dir <- file.path(merged_final_tif_dir, date) + } + if (!dir.exists(date_dir)) { dir.create(date_dir, recursive = TRUE, showWarnings = FALSE) } @@ -883,7 +941,7 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin #' Given a set of tile files for a single date, this function: #' 1. Loads each tile IN PARALLEL using furrr #' 2. Creates/extracts CI band -#' 3. Saves processed tile (Red, Green, Blue, NIR, CI) to merged_final_tif_dir/[DATE]/ +#' 3. Saves processed tile (Red, Green, Blue, NIR, CI) to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ #' 4. Calculates field statistics from CI band #' 5. Aggregates field statistics across tiles #' 6. Saves individual date file (matching legacy workflow) @@ -894,24 +952,43 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin #' @param date Character string of the date (YYYY-MM-DD format) #' @param field_boundaries_sf Field boundaries as SF object #' @param daily_CI_vals_dir Directory to save individual date RDS files -#' @param merged_final_tif_dir Directory to save processed tiles with CI band (mirrors daily_tiles_split structure) +#' @param merged_final_tif_dir Base directory to save processed tiles with CI band +#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for the date #' -extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL) { +extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL, grid_size = NA) { if (!inherits(field_boundaries_sf, "sf")) { field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf) } - safe_log(paste(" Processing", length(tile_files), "tiles for date", date, "(parallel processing)")) + safe_log(paste(" Processing", length(tile_files), "tiles for date", date, "(3-tile parallel batch)")) - # Process tiles in parallel using furrr::future_map - # This replaces the sequential for loop, processing 2-4 tiles simultaneously - stats_list <- furrr::future_map( - .x = tile_files, - .f = ~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir), - .options = furrr::furrr_options(seed = TRUE) - ) + # Windows-compatible parallelization: Process tiles in small batches + # Use future_map with 3 workers - stable and efficient on Windows + + # Set up minimal future plan (3 workers max) + future::plan(future::multisession, workers = 3) + + # Process tiles using furrr with 2 workers + # Use retry logic for worker stability + stats_list <- tryCatch({ + furrr::future_map( + tile_files, + ~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size), + .progress = FALSE, + .options = furrr::furrr_options(seed = TRUE) + ) + }, error = function(e) { + safe_log(paste("Parallel processing failed:", e$message, "- falling back to sequential"), "WARNING") + # Fallback to sequential if parallel fails + lapply( + tile_files, + function(tile_file) { + process_single_tile(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size) + } + ) + }) # Extract names and filter out NULL results (failed tiles) tile_names <- basename(tile_files) diff --git a/webapps.zip b/webapps.zip deleted file mode 100644 index cf2f49b..0000000 Binary files a/webapps.zip and /dev/null differ diff --git a/webapps/geojson_viewer.html b/webapps/geojson_viewer.html index 36511a1..df29392 100644 --- a/webapps/geojson_viewer.html +++ b/webapps/geojson_viewer.html @@ -56,8 +56,9 @@ border-radius: 8px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); display: flex; - align-items: center; - gap: 10px; + flex-direction: column; + align-items: flex-start; + gap: 12px; } .map-controls label { @@ -70,6 +71,19 @@ font-weight: 500; } + .map-controls-group { + display: flex; + flex-direction: column; + gap: 8px; + border-top: 1px solid #e0e0e0; + padding-top: 8px; + } + + .map-controls-group:first-child { + border-top: none; + padding-top: 0; + } + .toggle-switch { position: relative; display: inline-block; @@ -764,14 +778,32 @@