diff --git a/.renvignore b/.renvignore new file mode 100644 index 0000000..d48919b --- /dev/null +++ b/.renvignore @@ -0,0 +1,15 @@ +# Ignore large Python experiment directories during renv dependency discovery +# These slow down startup and contain no R dependencies + +laravel_app/ +data_validation_tool/ +python_app/harvest_detection_experiments/ +python_app/experiments/ +phase2_refinement/ +webapps/ +tools/ +output/ +renv/ +*.py +*.ipynb +.git/ diff --git a/r_app/02_ci_extraction.R b/r_app/02_ci_extraction.R index b430c02..42cea28 100644 --- a/r_app/02_ci_extraction.R +++ b/r_app/02_ci_extraction.R @@ -31,6 +31,7 @@ suppressPackageStartupMessages({ library(readxl) library(here) library(furrr) + library(future) }) # 2. Process command line arguments @@ -118,19 +119,44 @@ main <- function() { stop(e) }) + # 4. Generate date list for processing # --------------------------------- dates <- date_list(end_date, 7) log_message(paste("Processing data for week", dates$week, "of", dates$year)) - # 5. Find and filter raster files by date + # 5. Find and filter raster files by date - with grid size detection # ----------------------------------- log_message("Searching for raster files") - # Check if tiles exist (Script 01 output) - tile_folder <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split") + # Check if tiles exist (Script 01 output) - detect grid size dynamically + tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split") + + # Detect grid size from daily_tiles_split folder structure + # Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc. + grid_size <- NA + if (dir.exists(tiles_split_base)) { + subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE) + # Look for grid size patterns like "5x5", "10x10", "20x20" + grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE) + if (length(grid_patterns) > 0) { + grid_size <- grid_patterns[1] # Use first grid size found + log_message(paste("Detected grid size:", grid_size)) + } + } + + # Construct tile folder path with grid size + if (!is.na(grid_size)) { + tile_folder <- file.path(tiles_split_base, grid_size) + } else { + tile_folder <- tiles_split_base + } + use_tiles <- dir.exists(tile_folder) + # Make grid_size available globally for other functions + assign("grid_size", grid_size, envir = .GlobalEnv) + tryCatch({ if (use_tiles) { # Use tile-based processing @@ -145,7 +171,8 @@ main <- function() { field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, cumulative_CI_vals_dir = cumulative_CI_vals_dir, - merged_final_dir = merged_final + merged_final_dir = merged_final, + grid_size = grid_size ) } else { diff --git a/r_app/09c_field_analysis_weekly.R b/r_app/09c_field_analysis_weekly.R new file mode 100644 index 0000000..159245e --- /dev/null +++ b/r_app/09c_field_analysis_weekly.R @@ -0,0 +1,1057 @@ +# 09c_FIELD_ANALYSIS_WEEKLY.R (ENHANCED - SC-64 NEW COLUMNS) +# ============================================================================ +# Per-field weekly analysis with NEW columns for trend analysis and advanced metrics +# +# ENHANCEMENTS OVER 09b: +# - Four_week_trend: Smoothed CI trend categorization (strong growth, growth, no growth, decline, strong decline) +# - Last_harvest_or_planting_date: Date of most recent harvest/planting from harvesting_data +# - CI_Percentiles: 10th and 90th percentiles (robust to outliers from roads/trees) +# - CV_Trend_Short_Term: Week-over-week CV change (2-week comparison) +# - CV_Trend_Long_Term: Long-term CV trend (8-week comparison) +# - Cloud_pct_clear: Rounded to 5% intervals for client reporting +# +# Key improvement: All threshold values are MANUALLY DEFINED at the top of this script +# and can be easily updated. In future, these may be replaced with model-derived parameters. +# +# Usage: Rscript 09c_field_analysis_weekly.R [end_date] [project_dir] +# - end_date: End date for analysis (YYYY-MM-DD format), default: today +# - project_dir: Project directory name (e.g., "aura", "esa", "angata") +# +# Example: +# Rscript 09c_field_analysis_weekly.R 2026-01-08 angata +# + +# ============================================================================ +# *** CONFIGURATION SECTION - MANUALLY DEFINED THRESHOLDS *** +# *** These values define decision logic and can be easily updated or replaced +# by model-derived parameters in the future *** +# ============================================================================ + +# TEST MODE (for development with limited historical data) +# Set to TRUE to test with fewer historical weeks; set to FALSE for production +TEST_MODE <- TRUE +TEST_MODE_NUM_WEEKS <- 2 # Number of historical weeks to load in test mode + +# FOUR-WEEK TREND THRESHOLDS (for categorizing mean_CI trends) +# These define the boundaries for growth categorization based on weekly change rate +FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5 # Average weekly increase >= 0.5 = "strong growth" +FOUR_WEEK_TREND_GROWTH_MIN <- 0.1 # Average weekly increase >= 0.1 = "growth" +FOUR_WEEK_TREND_GROWTH_MAX <- 0.5 # Average weekly increase < 0.5 +FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1 # ±0.1 around 0 = "no growth" +FOUR_WEEK_TREND_DECLINE_MAX <- -0.1 # Average weekly change > -0.1 = "no growth" +FOUR_WEEK_TREND_DECLINE_MIN <- -0.5 # Average weekly decrease >= -0.1 = "decline" +FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5 # Average weekly decrease < -0.5 = "strong decline" + +# CV TREND THRESHOLDS (for categorizing field uniformity trends) +# These determine if CV change is significant enough to report +CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05 # CV change >= 0.05 is considered significant + +# CLOUD COVER ROUNDING INTERVALS (for client-friendly reporting) +# Rounds cloud_pct_clear to 5% intervals to show impact while avoiding false precision +CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100) # Used for bucketing: <50%, 50-60%, 60-70%, etc. + +# PERCENTILE CALCULATIONS (for robust CI range estimation) +CI_PERCENTILE_LOW <- 0.10 # 10th percentile +CI_PERCENTILE_HIGH <- 0.90 # 90th percentile + +# HISTORICAL DATA LOOKBACK (for trend calculations) +WEEKS_FOR_FOUR_WEEK_TREND <- 4 # Use 4 weeks of data for trend +WEEKS_FOR_CV_TREND_SHORT <- 2 # Compare CV over 2 weeks +WEEKS_FOR_CV_TREND_LONG <- 8 # Compare CV over 8 weeks + +# ============================================================================ +# 1. Load required libraries +# ============================================================================ + +suppressPackageStartupMessages({ + library(here) + library(sf) + library(terra) + library(dplyr) + library(tidyr) + library(lubridate) + library(readr) + library(readxl) + library(writexl) + library(purrr) + library(furrr) + library(future) + tryCatch({ + library(torch) + }, error = function(e) { + message("Note: torch package not available - harvest model inference will be skipped") + }) +}) + +# ============================================================================ +# PHASE AND STATUS TRIGGER DEFINITIONS +# ============================================================================ + +PHASE_DEFINITIONS <- data.frame( + phase = c("Germination", "Tillering", "Grand Growth", "Maturation"), + age_start = c(0, 4, 17, 39), + age_end = c(6, 16, 39, 200), + stringsAsFactors = FALSE +) + +STATUS_TRIGGERS <- data.frame( + trigger = c( + "germination_started", + "germination_complete", + "stress_detected_whole_field", + "strong_recovery", + "growth_on_track", + "maturation_progressing", + "harvest_ready" + ), + age_min = c(0, 0, NA, NA, 4, 39, 45), + age_max = c(6, 6, NA, NA, 39, 200, 200), + description = c( + "10% of field CI > 2", + "70% of field CI >= 2", + "CI decline > -1.5 + low CV", + "CI increase > +1.5", + "CI increasing consistently", + "High CI, stable/declining", + "Age 45+ weeks (ready to harvest)" + ), + stringsAsFactors = FALSE +) + +# ============================================================================ +# TILE-AWARE HELPER FUNCTIONS +# ============================================================================ + +#' Get tile IDs that a field geometry intersects +#' +#' @param field_geom Single field geometry (sf or terra::vect) +#' @param tile_grid Data frame with tile extents (id, xmin, xmax, ymin, ymax) +#' @return Numeric vector of tile IDs that field intersects +#' +get_tile_ids_for_field <- function(field_geom, tile_grid) { + if (inherits(field_geom, "sf")) { + field_bbox <- sf::st_bbox(field_geom) + field_xmin <- field_bbox["xmin"] + field_xmax <- field_bbox["xmax"] + field_ymin <- field_bbox["ymin"] + field_ymax <- field_bbox["ymax"] + } else if (inherits(field_geom, "SpatVector")) { + field_bbox <- terra::ext(field_geom) + field_xmin <- field_bbox$xmin + field_xmax <- field_bbox$xmax + field_ymin <- field_bbox$ymin + field_ymax <- field_bbox$ymax + } else { + stop("field_geom must be sf or terra::vect object") + } + + intersecting_tiles <- tile_grid$id[ + !(tile_grid$xmax < field_xmin | + tile_grid$xmin > field_xmax | + tile_grid$ymax < field_ymin | + tile_grid$ymin > field_ymax) + ] + + return(as.numeric(intersecting_tiles)) +} + +#' Load CI tiles that a field intersects +#' +#' @param field_geom Single field geometry +#' @param tile_ids Numeric vector of tile IDs to load +#' @param week_num Week number +#' @param year Year +#' @param mosaic_dir Directory with weekly tiles +#' @return Single CI raster (merged if multiple tiles, or single tile) +#' +load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) { + if (length(tile_ids) == 0) { + return(NULL) + } + + tiles_list <- list() + for (tile_id in sort(tile_ids)) { + tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id) + tile_path <- file.path(mosaic_dir, tile_filename) + + if (file.exists(tile_path)) { + tryCatch({ + tile_rast <- terra::rast(tile_path) + ci_band <- terra::subset(tile_rast, 5) + tiles_list[[length(tiles_list) + 1]] <- ci_band + }, error = function(e) { + message(paste(" Warning: Could not load tile", tile_id, ":", e$message)) + }) + } + } + + if (length(tiles_list) == 0) { + return(NULL) + } + + if (length(tiles_list) == 1) { + return(tiles_list[[1]]) + } else { + tryCatch({ + rsrc <- terra::sprc(tiles_list) + merged <- terra::mosaic(rsrc, fun = "max") + return(merged) + }, error = function(e) { + message(paste(" Warning: Could not merge tiles:", e$message)) + return(tiles_list[[1]]) + }) + } +} + +#' Build tile grid from available weekly tile files +#' +#' @param mosaic_dir Directory with weekly tiles +#' @param week_num Week number to discover tiles +#' @param year Year to discover tiles +#' @return Data frame with columns: id, xmin, xmax, ymin, ymax +#' +build_tile_grid <- function(mosaic_dir, week_num, year) { + tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year) + tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE) + + if (length(tile_files) == 0) { + stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir)) + } + + tile_grid <- data.frame( + id = integer(), + xmin = numeric(), + xmax = numeric(), + ymin = numeric(), + ymax = numeric(), + stringsAsFactors = FALSE + ) + + for (tile_file in tile_files) { + tryCatch({ + matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file))) + if (length(matches) > 0) { + tile_id <- as.integer(sub("_|\\.tif", "", matches[1])) + tile_rast <- terra::rast(tile_file) + tile_ext <- terra::ext(tile_rast) + tile_grid <- rbind(tile_grid, data.frame( + id = tile_id, + xmin = tile_ext$xmin, + xmax = tile_ext$xmax, + ymin = tile_ext$ymin, + ymax = tile_ext$ymax, + stringsAsFactors = FALSE + )) + } + }, error = function(e) { + message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message)) + }) + } + + if (nrow(tile_grid) == 0) { + stop("Could not extract extents from any tile files") + } + + return(tile_grid) +} + +# ============================================================================ +# HELPER FUNCTIONS FOR NEW COLUMNS (SC-64) +# ============================================================================ + +#' Categorize four-week trend based on average weekly CI change +#' +#' @param ci_values_list List of CI mean values (chronological order, oldest to newest) +#' @return Character: "strong growth", "growth", "no growth", "decline", "strong decline" +#' +categorize_four_week_trend <- function(ci_values_list) { + if (is.null(ci_values_list) || length(ci_values_list) < 2) { + return(NA_character_) + } + + ci_values_list <- ci_values_list[!is.na(ci_values_list)] + if (length(ci_values_list) < 2) { + return(NA_character_) + } + + # Calculate average weekly change + weekly_changes <- diff(ci_values_list) + avg_weekly_change <- mean(weekly_changes, na.rm = TRUE) + + # Categorize based on thresholds + if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) { + return("strong growth") + } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN && + avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) { + return("growth") + } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) { + return("no growth") + } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN && + avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("decline") + } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) { + return("strong decline") + } else { + return("no growth") # Default fallback + } +} + +#' Round cloud percentage to 5% intervals for client reporting +#' +#' @param cloud_pct_clear Numeric cloud clear percentage (0-100) +#' @return Character representing the interval bucket +#' +round_cloud_to_intervals <- function(cloud_pct_clear) { + if (is.na(cloud_pct_clear)) { + return(NA_character_) + } + + if (cloud_pct_clear < 50) return("<50%") + if (cloud_pct_clear < 60) return("50-60%") + if (cloud_pct_clear < 70) return("60-70%") + if (cloud_pct_clear < 80) return("70-80%") + if (cloud_pct_clear < 90) return("80-90%") + return(">90%") +} + +#' Extract CI percentiles (10th and 90th) to avoid outlier distortion +#' +#' @param ci_values Numeric vector of CI values +#' @return Character string: "p10-p90" format +#' +get_ci_percentiles <- function(ci_values) { + if (is.null(ci_values) || length(ci_values) == 0) { + return(NA_character_) + } + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) { + return(NA_character_) + } + + p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE) + p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE) + + return(sprintf("%.1f-%.1f", p10, p90)) +} + +#' Calculate CV trend between two weeks +#' +#' @param cv_current Current week's CV +#' @param cv_previous Previous week's CV +#' @return Numeric: change in CV (positive = increased heterogeneity) +#' +calculate_cv_trend <- function(cv_current, cv_previous) { + if (is.na(cv_current) || is.na(cv_previous)) { + return(NA_real_) + } + return(round(cv_current - cv_previous, 4)) +} + +# ============================================================================ +# HELPER FUNCTIONS (FROM 09b) +# ============================================================================ + +get_phase_by_age <- function(age_weeks) { + if (is.na(age_weeks)) return(NA_character_) + for (i in seq_len(nrow(PHASE_DEFINITIONS))) { + if (age_weeks >= PHASE_DEFINITIONS$age_start[i] && + age_weeks <= PHASE_DEFINITIONS$age_end[i]) { + return(PHASE_DEFINITIONS$phase[i]) + } + } + return("Unknown") +} + +get_status_trigger <- function(ci_values, ci_change, age_weeks) { + if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_) + + ci_values <- ci_values[!is.na(ci_values)] + if (length(ci_values) == 0) return(NA_character_) + + pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100 + pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100 + ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0 + mean_ci <- mean(ci_values, na.rm = TRUE) + + if (age_weeks >= 0 && age_weeks <= 6) { + if (pct_at_or_above_2 >= 70) { + return("germination_complete") + } else if (pct_above_2 > 10) { + return("germination_started") + } + } + + if (age_weeks >= 45) { + return("harvest_ready") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) { + return("stress_detected_whole_field") + } + + if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) { + return("strong_recovery") + } + + if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) { + return("growth_on_track") + } + + if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) { + return("maturation_progressing") + } + + return(NA_character_) +} + +#' Load multiple weeks of CSV data for trend calculations +#' +#' @param project_dir Project name +#' @param current_week Current week number +#' @param reports_dir Reports directory +#' @param num_weeks Number of weeks to load (default 4) +#' @return List with data frames for each week, or NULL if not enough data +#' +load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4) { + historical_data <- list() + loaded_weeks <- c() + + for (lookback in 0:(num_weeks - 1)) { + target_week <- current_week - lookback + if (target_week < 1) target_week <- target_week + 52 + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv") + csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename) + + if (file.exists(csv_path)) { + tryCatch({ + data <- read_csv(csv_path, show_col_types = FALSE) + historical_data[[lookback + 1]] <- list( + week = target_week, + data = data + ) + loaded_weeks <- c(loaded_weeks, target_week) + }, error = function(e) { + message(paste(" Warning: Could not load week", target_week, ":", e$message)) + }) + } + } + + if (length(historical_data) == 0) { + message(paste("Warning: No historical field data found for trend calculations")) + return(NULL) + } + + message(paste("Loaded", length(historical_data), "weeks of historical data:", + paste(loaded_weeks, collapse = ", "))) + + return(historical_data) +} + +USE_UNIFORM_AGE <- TRUE +UNIFORM_PLANTING_DATE <- as.Date("2025-01-01") + +extract_planting_dates <- function(harvesting_data) { + if (USE_UNIFORM_AGE) { + message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE)) + return(data.frame( + field_id = character(), + planting_date = as.Date(character()), + stringsAsFactors = FALSE + )) + } + + if (is.null(harvesting_data) || nrow(harvesting_data) == 0) { + message("Warning: No harvesting data available.") + return(NULL) + } + + tryCatch({ + planting_dates <- harvesting_data %>% + arrange(field, desc(season_start)) %>% + distinct(field, .keep_all = TRUE) %>% + select(field, season_start) %>% + rename(field_id = field, planting_date = season_start) %>% + filter(!is.na(planting_date)) %>% + as.data.frame() + + message(paste("Extracted planting dates for", nrow(planting_dates), "fields")) + return(planting_dates) + }, error = function(e) { + message(paste("Error extracting planting dates:", e$message)) + return(NULL) + }) +} + +# ============================================================================ +# PARALLEL FIELD ANALYSIS FUNCTION +# ============================================================================ + +#' Analyze single field with SC-64 enhancements +#' +#' @param field_idx Index in field_boundaries_sf +#' @param field_boundaries_sf All field boundaries (sf object) +#' @param tile_grid Data frame with tile extents +#' @param week_num Current week number +#' @param year Current year +#' @param mosaic_dir Directory with weekly tiles +#' @param historical_data Historical weekly data for trend calculations +#' @param planting_dates Planting dates lookup +#' @param report_date Report date +#' @param harvest_imminence_data Harvest imminence predictions (optional) +#' +#' @return Single-row data frame with field analysis including new SC-64 columns +#' +analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year, + mosaic_dir, historical_data = NULL, planting_dates = NULL, + report_date = Sys.Date(), harvest_imminence_data = NULL) { + + tryCatch({ + # Get field info + field_id <- field_boundaries_sf$field[field_idx] + farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) { + field_boundaries_sf$sub_area[field_idx] + } else { + NA_character_ + } + field_name <- field_id + + # Get field geometry and validate + field_sf <- field_boundaries_sf[field_idx, ] + if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) { + return(data.frame( + Field_id = field_id, + error = "Empty or invalid geometry" + )) + } + + # Calculate field area + field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000 + field_area_acres <- field_area_ha / 0.404686 + + # Determine which tiles this field intersects + tile_ids <- get_tile_ids_for_field(field_sf, tile_grid) + + # Load current CI tiles for this field + current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir) + + if (is.null(current_ci)) { + return(data.frame( + Field_id = field_id, + error = "No tile data available" + )) + } + + # Extract CI values for current field + field_vect <- terra::vect(sf::as_Spatial(field_sf)) + terra::crs(field_vect) <- terra::crs(current_ci) + + all_extracted <- terra::extract(current_ci, field_vect)[, 2] + current_ci_vals <- all_extracted[!is.na(all_extracted)] + + # Calculate cloud coverage + num_total <- length(all_extracted) + num_data <- sum(!is.na(all_extracted)) + pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0 + + cloud_cat <- if (num_data == 0) "No image available" + else if (pct_clear >= 99.5) "Clear view" + else "Partial coverage" + cloud_pct <- 100 - pct_clear + cloud_interval <- round_cloud_to_intervals(pct_clear) # NEW: Rounded intervals + + if (length(current_ci_vals) == 0) { + return(data.frame( + Field_id = field_id, + error = "No CI values extracted" + )) + } + + # Calculate current CI statistics + mean_ci_current <- mean(current_ci_vals, na.rm = TRUE) + ci_std <- sd(current_ci_vals, na.rm = TRUE) + cv_current <- ci_std / mean_ci_current + range_min <- min(current_ci_vals, na.rm = TRUE) + range_max <- max(current_ci_vals, na.rm = TRUE) + range_str <- sprintf("%.1f-%.1f", range_min, range_max) + + # NEW: Get CI percentiles (10th-90th) + ci_percentiles_str <- get_ci_percentiles(current_ci_vals) + + # Calculate weekly CI change + weekly_ci_change <- NA + previous_ci_vals <- NULL + + tryCatch({ + previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir) + if (!is.null(previous_ci)) { + prev_extracted <- terra::extract(previous_ci, field_vect)[, 2] + previous_ci_vals <- prev_extracted[!is.na(prev_extracted)] + if (length(previous_ci_vals) > 0) { + mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE) + weekly_ci_change <- mean_ci_current - mean_ci_previous + } + } + }, error = function(e) { + # Silent fail + }) + + if (is.na(weekly_ci_change)) { + weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std) + } else { + weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ%.1f)", mean_ci_current, ci_std, weekly_ci_change) + } + + # Calculate age + age_weeks <- NA + if (!is.null(planting_dates) && nrow(planting_dates) > 0) { + field_planting <- planting_dates %>% + filter(field_id == !!field_id) %>% + pull(planting_date) + + if (length(field_planting) > 0) { + age_weeks <- as.numeric(difftime(report_date, field_planting[1], units = "weeks")) + } + } + + if (USE_UNIFORM_AGE) { + age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks")) + } + + # Calculate germination progress + pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100 + pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100 + germination_progress_str <- NA_character_ + if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) { + germination_progress_str <- sprintf("%.0f%%", pct_ci_ge_2) + } + + # Assign phase and trigger + phase <- "Unknown" + imminent_prob_val <- NA + if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) { + imminence_row <- harvest_imminence_data %>% + filter(field_id == !!field_id) + if (nrow(imminence_row) > 0) { + imminent_prob_val <- imminence_row$probability[1] + if (imminent_prob_val > 0.5) { + phase <- "Harvest Imminent (Model)" + } + } + } + + if (phase == "Unknown") { + phase <- get_phase_by_age(age_weeks) + } + + status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks) + + nmr_weeks_in_phase <- 1 + + # NEW: Load historical data to calculate four_week_trend + four_week_trend <- NA_character_ + ci_values_for_trend <- c(mean_ci_current) + + if (!is.null(historical_data) && length(historical_data) > 0) { + # Extract this field's CI mean values from historical weeks + for (hist in historical_data) { + hist_week <- hist$week + hist_data <- hist$data + + field_row <- hist_data %>% + filter(Field_id == !!field_id) + + if (nrow(field_row) > 0 && !is.na(field_row$Mean_CI[1])) { + ci_values_for_trend <- c(field_row$Mean_CI[1], ci_values_for_trend) + } + } + + if (length(ci_values_for_trend) >= 2) { + four_week_trend <- categorize_four_week_trend(ci_values_for_trend) + } + } + + # NEW: Load previous weeks for CV trends + cv_trend_short <- NA_real_ + cv_trend_long <- NA_real_ + + if (!is.null(historical_data) && length(historical_data) > 0) { + # CV from 2 weeks ago (short-term trend) + if (length(historical_data) >= 2) { + cv_2w <- historical_data[[2]]$data %>% + filter(Field_id == !!field_id) %>% + pull(CV) + if (length(cv_2w) > 0 && !is.na(cv_2w[1])) { + cv_trend_short <- calculate_cv_trend(cv_current, cv_2w[1]) + } + } + + # CV from 8 weeks ago (long-term trend) + if (length(historical_data) >= 8) { + cv_8w <- historical_data[[8]]$data %>% + filter(Field_id == !!field_id) %>% + pull(CV) + if (length(cv_8w) > 0 && !is.na(cv_8w[1])) { + cv_trend_long <- calculate_cv_trend(cv_current, cv_8w[1]) + } + } + } + + # NEW: Last harvest/planting date (from harvesting_data if available) + last_harvest_date <- NA_character_ + if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) { + last_harvest_row <- harvesting_data %>% + filter(field == !!field_id) %>% + arrange(desc(season_start)) %>% + slice(1) + + if (nrow(last_harvest_row) > 0 && !is.na(last_harvest_row$season_start[1])) { + last_harvest_date <- as.character(last_harvest_row$season_start[1]) + } + } + + # Compile result with all SC-64 columns + result <- data.frame( + Field_id = field_id, + Farm_Section = farm_section, + Field_name = field_name, + Hectare = round(field_area_ha, 2), + Acreage = round(field_area_acres, 2), + Mean_CI = round(mean_ci_current, 2), + Weekly_ci_change = if (is.na(weekly_ci_change)) NA_real_ else round(weekly_ci_change, 2), + Weekly_ci_change_str = weekly_ci_change_str, + Four_week_trend = four_week_trend, # NEW + Last_harvest_or_planting_date = last_harvest_date, # NEW + Age_week = if (is.na(age_weeks)) NA_integer_ else as.integer(round(age_weeks)), + `Phase (age based)` = phase, + nmr_weeks_in_this_phase = nmr_weeks_in_phase, + Germination_progress = germination_progress_str, + Imminent_prob = imminent_prob_val, + Status_trigger = status_trigger, + CI_range = range_str, + CI_Percentiles = ci_percentiles_str, # NEW + CV = round(cv_current, 4), + CV_Trend_Short_Term = cv_trend_short, # NEW (2-week) + CV_Trend_Long_Term = cv_trend_long, # NEW (8-week) + Cloud_pct_clear = pct_clear, + Cloud_pct_clear_interval = cloud_interval, # NEW: Rounded intervals + Cloud_pct = cloud_pct, + Cloud_category = cloud_cat, + stringsAsFactors = FALSE + ) + + return(result) + + }, error = function(e) { + message(paste("Error analyzing field", field_idx, ":", e$message)) + return(data.frame( + Field_id = NA_character_, + error = e$message + )) + }) +} + +# ============================================================================ +# SUMMARY GENERATION +# ============================================================================ + +generate_field_analysis_summary <- function(field_df) { + message("Generating summary statistics...") + + total_acreage <- sum(field_df$Acreage, na.rm = TRUE) + + germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE) + tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE) + grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE) + maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE) + unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE) + + harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE) + stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE) + recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE) + growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE) + germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE) + germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE) + no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE) + + clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE) + partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE) + no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE) + total_fields <- nrow(field_df) + + clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE) + partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE) + no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE) + + summary_df <- data.frame( + Category = c( + "--- PHASE DISTRIBUTION ---", + "Germination", + "Tillering", + "Grand Growth", + "Maturation", + "Unknown phase", + "--- STATUS TRIGGERS ---", + "Harvest ready", + "Stress detected", + "Strong recovery", + "Growth on track", + "Germination complete", + "Germination started", + "No trigger", + "--- CLOUD COVERAGE (FIELDS) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- CLOUD COVERAGE (ACREAGE) ---", + "Clear view", + "Partial coverage", + "No image available", + "--- TOTAL ---", + "Total Acreage" + ), + Acreage = c( + NA, + round(germination_acreage, 2), + round(tillering_acreage, 2), + round(grand_growth_acreage, 2), + round(maturation_acreage, 2), + round(unknown_phase_acreage, 2), + NA, + round(harvest_ready_acreage, 2), + round(stress_acreage, 2), + round(recovery_acreage, 2), + round(growth_on_track_acreage, 2), + round(germination_complete_acreage, 2), + round(germination_started_acreage, 2), + round(no_trigger_acreage, 2), + NA, + paste0(clear_fields, " fields"), + paste0(partial_fields, " fields"), + paste0(no_image_fields, " fields"), + NA, + round(clear_acreage, 2), + round(partial_acreage, 2), + round(no_image_acreage, 2), + NA, + round(total_acreage, 2) + ), + stringsAsFactors = FALSE + ) + + attr(summary_df, "cloud_fields_clear") <- clear_fields + attr(summary_df, "cloud_fields_partial") <- partial_fields + attr(summary_df, "cloud_fields_no_image") <- no_image_fields + attr(summary_df, "cloud_fields_total") <- total_fields + + return(summary_df) +} + +# ============================================================================ +# EXPORT FUNCTIONS +# ============================================================================ + +export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) { + message("Exporting per-field analysis to Excel, CSV, and RDS...") + + output_subdir <- file.path(reports_dir, "kpis", "field_analysis") + if (!dir.exists(output_subdir)) { + dir.create(output_subdir, recursive = TRUE) + } + + excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), "_test.xlsx") + excel_path <- file.path(output_subdir, excel_filename) + excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) + + sheets <- list( + "Field Data" = field_df, + "Summary" = summary_df + ) + + write_xlsx(sheets, excel_path) + message(paste("✓ Field analysis Excel exported to:", excel_path)) + + kpi_data <- list( + field_analysis = field_df, + field_analysis_summary = summary_df, + metadata = list( + current_week = current_week, + project = project_dir, + created_at = Sys.time() + ) + ) + + rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds") + rds_path <- file.path(reports_dir, "kpis", rds_filename) + + saveRDS(kpi_data, rds_path) + message(paste("✓ Field analysis RDS exported to:", rds_path)) + + csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv") + csv_path <- file.path(output_subdir, csv_filename) + write_csv(field_df, csv_path) + message(paste("✓ Field analysis CSV exported to:", csv_path)) + + return(list(excel = excel_path, rds = rds_path, csv = csv_path)) +} + +# ============================================================================ +# MAIN +# ============================================================================ + +main <- function() { + args <- commandArgs(trailingOnly = TRUE) + + end_date <- if (length(args) >= 1 && !is.na(args[1])) { + as.Date(args[1]) + } else if (exists("end_date_str", envir = .GlobalEnv)) { + as.Date(get("end_date_str", envir = .GlobalEnv)) + } else { + Sys.Date() + } + + project_dir <- if (length(args) >= 2 && !is.na(args[2])) { + as.character(args[2]) + } else if (exists("project_dir", envir = .GlobalEnv)) { + get("project_dir", envir = .GlobalEnv) + } else { + "angata" + } + + assign("project_dir", project_dir, envir = .GlobalEnv) + + source(here("r_app", "crop_messaging_utils.R")) + source(here("r_app", "parameters_project.R")) + + message("=== FIELD ANALYSIS WEEKLY (SC-64 ENHANCEMENTS) ===") + message(paste("Date:", end_date)) + message(paste("Project:", project_dir)) + message("") + message("CONFIGURATION:") + message(paste(" Four-week trend thresholds: growth >= ", FOUR_WEEK_TREND_GROWTH_MIN, + ", strong growth >= ", FOUR_WEEK_TREND_STRONG_GROWTH_MIN, sep = "")) + message(paste(" CV trend significant threshold:", CV_TREND_THRESHOLD_SIGNIFICANT)) + message(paste(" Cloud intervals:", paste(CLOUD_INTERVALS, collapse = ", "))) + message("") + + current_week <- as.numeric(format(end_date, "%V")) + year <- as.numeric(format(end_date, "%Y")) + previous_week <- current_week - 1 + if (previous_week < 1) previous_week <- 52 + + message(paste("Week:", current_week, "/ Year:", year)) + + message("Building tile grid from available weekly tiles...") + tile_grid <- build_tile_grid(weekly_tile_max, current_week, year) + message(paste(" Found", nrow(tile_grid), "tiles for analysis")) + + tryCatch({ + boundaries_result <- load_field_boundaries(data_dir) + + if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { + field_boundaries_sf <- boundaries_result$field_boundaries_sf + } else { + field_boundaries_sf <- boundaries_result + } + + if (!is.data.frame(field_boundaries_sf) && !inherits(field_boundaries_sf, "sf")) { + stop("field_boundaries_sf is not a valid SF object") + } + + if (nrow(field_boundaries_sf) == 0) { + stop("No fields loaded from boundaries") + } + + message(paste(" Loaded", nrow(field_boundaries_sf), "fields from boundaries")) + }, error = function(e) { + stop("ERROR loading field boundaries: ", e$message, + "\nCheck that pivot.geojson exists in ", data_dir) + }) + + # Load historical data for trend calculations + message("Loading historical field data for trend calculations...") + num_weeks_to_load <- if (TEST_MODE) TEST_MODE_NUM_WEEKS else max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) + if (TEST_MODE) { + message(paste(" TEST MODE: Loading only", num_weeks_to_load, "weeks of historical data")) + } + historical_data <- load_historical_field_data( + project_dir, current_week, reports_dir, + num_weeks = num_weeks_to_load + ) + + planting_dates <- extract_planting_dates(harvesting_data) + + message("Setting up parallel processing...") + current_plan <- class(future::plan())[1] + if (current_plan == "sequential") { + num_workers <- parallel::detectCores() - 1 + message(paste(" Using", num_workers, "workers for parallel processing")) + future::plan(future::multisession, workers = num_workers) + } else { + message(paste(" Using existing plan:", current_plan)) + } + + message("Analyzing fields in parallel...") + + field_analysis_list <- furrr::future_map( + seq_len(nrow(field_boundaries_sf)), + ~ analyze_single_field( + field_idx = ., + field_boundaries_sf = field_boundaries_sf, + tile_grid = tile_grid, + week_num = current_week, + year = year, + mosaic_dir = weekly_tile_max, + historical_data = historical_data, + planting_dates = planting_dates, + report_date = end_date, + harvest_imminence_data = NULL + ), + .progress = TRUE, + .options = furrr::furrr_options(seed = TRUE) + ) + + field_analysis_df <- dplyr::bind_rows(field_analysis_list) + + if (nrow(field_analysis_df) == 0) { + stop("No fields analyzed successfully!") + } + + message(paste("✓ Analyzed", nrow(field_analysis_df), "fields")) + + summary_statistics_df <- generate_field_analysis_summary(field_analysis_df) + + export_paths <- export_field_analysis_excel( + field_analysis_df, + summary_statistics_df, + project_dir, + current_week, + reports_dir + ) + + cat("\n=== FIELD ANALYSIS SUMMARY ===\n") + cat("Fields analyzed:", nrow(field_analysis_df), "\n") + cat("Excel export:", export_paths$excel, "\n") + cat("RDS export:", export_paths$rds, "\n") + cat("CSV export:", export_paths$csv, "\n\n") + + cat("--- Per-field results (first 10) ---\n") + available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", + "Four_week_trend", "Status_trigger", "Cloud_category") + available_cols <- available_cols[available_cols %in% names(field_analysis_df)] + if (length(available_cols) > 0) { + print(head(field_analysis_df[, available_cols], 10)) + } else { + print(head(field_analysis_df, 10)) + } + + cat("\n--- Summary statistics ---\n") + print(summary_statistics_df) + + message("\n✓ Field analysis complete!") +} + +if (sys.nframe() == 0) { + main() +} diff --git a/r_app/ANGATA_KPI_UPDATES.md b/r_app/ANGATA_KPI_UPDATES.md deleted file mode 100644 index eda35b1..0000000 --- a/r_app/ANGATA_KPI_UPDATES.md +++ /dev/null @@ -1,155 +0,0 @@ -# Angata KPI Script Updates - 09_calculate_kpis_Angata.R - -## Overview -The script has been restructured to focus on **4 required KPIs** for Angata, with legacy KPIs disabled by default but retained for future use. - -## Changes Made - -### 1. **Script Configuration** -- **File**: `09_calculate_kpis_Angata.R` -- **Toggle Variable**: `ENABLE_LEGACY_KPIS` (default: `FALSE`) - - Set to `TRUE` to run the 6 original KPIs - - Set to `FALSE` for Angata's 4 KPIs only - -### 2. **Angata KPIs (4 Required)** - -#### KPI 1: **Area Change Summary** ✅ REAL DATA -- **File**: Embedded in script as `calculate_area_change_kpi()` -- **Method**: Compares current week CI to previous week CI -- **Classification**: - - **Improving areas**: Mean change > +0.5 CI units - - **Stable areas**: Mean change between -0.5 and +0.5 CI units - - **Declining areas**: Mean change < -0.5 CI units -- **Output**: Hectares, Acres, and % of farm for each category -- **Data Type**: REAL DATA (processed from satellite imagery) - -#### KPI 2: **Germination Acreage** ✅ REAL DATA -- **Function**: `calculate_germination_acreage_kpi()` -- **Germination Phase Detection**: - - **Start germination**: When 10% of field's CI > 2 - - **End germination**: When 70% of field's CI ≥ 2 -- **Output**: - - Count of fields in germination phase - - Count of fields in post-germination phase - - Total acres and % of farm for each phase -- **Data Type**: REAL DATA (CI-based, calculated from satellite imagery) - -#### KPI 3: **Harvested Acreage** ⚠️ DUMMY DATA -- **Function**: `calculate_harvested_acreage_kpi()` -- **Current Status**: Returns zero values with clear "DUMMY DATA - Detection TBD" label -- **TODO**: Implement harvesting detection logic - - Likely indicators: CI drops below 1.5, sudden backscatter change, etc. -- **Output Format**: - - Number of harvested fields - - Total acres - - % of farm - - Clearly marked as DUMMY DATA in output table - -#### KPI 4: **Mature Acreage** ⚠️ DUMMY DATA -- **Function**: `calculate_mature_acreage_kpi()` -- **Current Status**: Returns zero values with clear "DUMMY DATA - Definition TBD" label -- **Concept**: Mature fields have high and stable CI for several weeks -- **TODO**: Implement stability-based maturity detection - - Calculate CI trend over last 3-4 weeks per field - - Stability metric: low CV over period, high CI relative to field max - - Threshold: e.g., field reaches 80%+ of max CI and stable for 3+ weeks -- **Output Format**: - - Number of mature fields - - Total acres - - % of farm - - Clearly marked as DUMMY DATA in output table - -### 3. **Legacy KPIs (Disabled by Default)** - -These original 6 KPIs are **disabled** but code is preserved for future use: -1. Field Uniformity Summary -2. TCH Forecasted -3. Growth Decline Index -4. Weed Presence Score -5. Gap Filling Score - -To enable: Set `ENABLE_LEGACY_KPIS <- TRUE` in the script - -### 4. **Output & Logging** - -#### Console Output (STDOUT) -``` -=== ANGATA KPI CALCULATION SUMMARY === -Report Date: [date] -Current Week: [week] -Previous Week: [week] -Total Fields Analyzed: [count] -Project: [project_name] -Calculation Time: [timestamp] -Legacy KPIs Enabled: FALSE - ---- REQUIRED ANGATA KPIs --- - -1. Area Change Summary (REAL DATA): - [table] - -2. Germination Acreage (CI-based, REAL DATA): - [table] - -3. Harvested Acreage (DUMMY DATA - Detection TBD): - [table with "DUMMY" marker] - -4. Mature Acreage (DUMMY DATA - Definition TBD): - [table with "DUMMY" marker] - -=== ANGATA KPI CALCULATION COMPLETED === -``` - -#### File Output (RDS) -- **Location**: `laravel_app/storage/app/[project]/reports/kpis/` -- **Filename**: `[project]_kpi_summary_tables_week[XX].rds` -- **Contents**: - - `area_change_summary`: Summary table - - `germination_summary`: Summary table - - `harvested_summary`: Summary table (DUMMY) - - `mature_summary`: Summary table (DUMMY) - - Field-level results for each KPI - - Metadata (report_date, weeks, total_fields, etc.) - -### 5. **Data Clarity Markers** - -All tables in output clearly indicate: -- **REAL DATA**: Derived from satellite CI measurements -- **DUMMY DATA - [TBD Item]**: Placeholder values; actual method to be implemented - -This prevents misinterpretation of preliminary results. - -## Usage - -```powershell -# Run Angata KPIs only (default, legacy disabled) -Rscript r_app/09_calculate_kpis_Angata.R 2025-11-27 7 angata - -# With specific date -Rscript r_app/09_calculate_kpis_Angata.R 2025-11-20 7 angata -``` - -## Future Work - -1. **Harvesting Detection**: Implement CI threshold + temporal pattern analysis -2. **Maturity Definition**: Define stability metrics and thresholds based on field CI ranges -3. **Legacy KPIs**: Adapt or retire based on Angata's needs -4. **Integration**: Connect outputs to reporting system (R Markdown, Word reports, etc.) - -## File Structure - -``` -r_app/ -├── 09_calculate_kpis_Angata.R (main script - UPDATED) -├── kpi_utils.R (optional - legacy functions) -├── crop_messaging_utils.R (dependencies) -├── parameters_project.R (project config) -└── growth_model_utils.R (optional) - -Output: -└── laravel_app/storage/app/angata/reports/kpis/ - └── angata_kpi_summary_tables_week[XX].rds -``` - ---- -**Updated**: November 27, 2025 diff --git a/r_app/ci_extraction_utils.R b/r_app/ci_extraction_utils.R index 99a1fee..d75ff6a 100644 --- a/r_app/ci_extraction_utils.R +++ b/r_app/ci_extraction_utils.R @@ -653,21 +653,27 @@ process_ci_values <- function(dates, field_boundaries, merged_final_dir, #' Process CI values from pre-split tiles (Script 01 output) #' #' This function processes CI values from tiles instead of full-extent rasters. -#' Tiles are created by Script 01 and stored in daily_tiles_split/[DATE]/ folders. +#' Tiles are created by Script 01 and stored in daily_tiles_split/[GRID_SIZE]/[DATE]/ folders. #' For each field, it aggregates CI statistics from all tiles that intersect that field. +#' Output follows the same grid structure: merged_final_tif/[GRID_SIZE]/[DATE]/ +#' +#' NOTE: Processes dates SEQUENTIALLY but tiles WITHIN EACH DATE in parallel (furrr) +#' This avoids worker process communication issues while still getting good speedup. #' #' @param dates List of dates from date_list() -#' @param tile_folder Path to the tile folder (daily_tiles_split) +#' @param tile_folder Path to the tile folder (daily_tiles_split/[GRID_SIZE]) #' @param field_boundaries Field boundaries as vector object #' @param field_boundaries_sf Field boundaries as SF object #' @param daily_CI_vals_dir Directory to save daily CI values #' @param cumulative_CI_vals_dir Directory to save cumulative CI values -#' @param merged_final_dir Directory to save processed tiles with CI band +#' @param merged_final_dir Base directory to save processed tiles with CI band +#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return NULL (used for side effects) #' process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, field_boundaries_sf, daily_CI_vals_dir, - cumulative_CI_vals_dir, merged_final_dir) { + cumulative_CI_vals_dir, merged_final_dir, + grid_size = NA) { # Define path for combined CI data combined_ci_path <- here::here(cumulative_CI_vals_dir, "combined_CI_data.rds") @@ -691,11 +697,29 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, if (!file.exists(combined_ci_path)) { safe_log("combined_CI_data.rds does not exist. Creating new file with all available tile data.") - # Process all tile dates + # Process all tile dates SEQUENTIALLY but with parallel tile processing + # Tiles within each date are processed in parallel via extract_ci_from_tiles() all_pivot_stats <- list() - for (date in tile_dates) { - safe_log(paste("Processing tiles for date:", date)) + for (i in seq_along(tile_dates)) { + date <- tile_dates[i] + + # SKIP: Check if this date already has processed output tiles + if (!is.na(grid_size)) { + output_date_folder <- file.path(merged_final_dir, grid_size, date) + } else { + output_date_folder <- file.path(merged_final_dir, date) + } + + if (dir.exists(output_date_folder)) { + existing_tiles <- list.files(output_date_folder, pattern = "\\.tif$") + if (length(existing_tiles) > 0) { + safe_log(paste("[", i, "/", length(tile_dates), "] SKIP:", date, "- already has", length(existing_tiles), "tiles")) + next + } + } + + safe_log(paste("[", i, "/", length(tile_dates), "] Processing tiles for date:", date)) date_tile_dir <- file.path(tile_folder, date) tile_files <- list.files(date_tile_dir, pattern = "\\.tif$", full.names = TRUE) @@ -705,15 +729,17 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, next } - safe_log(paste(" Found", length(tile_files), "tiles for date", date)) + safe_log(paste(" Found", length(tile_files), "tiles - processing in parallel")) # Process all tiles for this date and aggregate to fields + # Tiles are processed in parallel via furrr::future_map() inside extract_ci_from_tiles() date_stats <- extract_ci_from_tiles( tile_files = tile_files, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir + merged_final_tif_dir = merged_final_dir, + grid_size = grid_size ) if (!is.null(date_stats)) { @@ -735,13 +761,37 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, } } else { - # Process only new dates + # Process only new dates SEQUENTIALLY but with parallel tile processing safe_log("combined_CI_data.rds exists, adding new tile data.") + if (length(dates_to_process) == 0) { + safe_log("No new dates to process", "WARNING") + return(invisible(NULL)) + } + + safe_log(paste("Processing", length(dates_to_process), "new dates...")) + new_pivot_stats_list <- list() - for (date in dates_to_process[1:2]) { - safe_log(paste("Processing tiles for date:", date)) + for (i in seq_along(dates_to_process)) { + date <- dates_to_process[i] + + # SKIP: Check if this date already has processed output tiles + if (!is.na(grid_size)) { + output_date_folder <- file.path(merged_final_dir, grid_size, date) + } else { + output_date_folder <- file.path(merged_final_dir, date) + } + + if (dir.exists(output_date_folder)) { + existing_tiles <- list.files(output_date_folder, pattern = "\\.tif$") + if (length(existing_tiles) > 0) { + safe_log(paste("[", i, "/", length(dates_to_process), "] SKIP:", date, "- already has", length(existing_tiles), "tiles")) + next + } + } + + safe_log(paste("[", i, "/", length(dates_to_process), "] Processing tiles for date:", date)) date_tile_dir <- file.path(tile_folder, date) tile_files <- list.files(date_tile_dir, pattern = "\\.tif$", full.names = TRUE) @@ -751,7 +801,7 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, next } - safe_log(paste(" Found", length(tile_files), "tiles for date", date)) + safe_log(paste(" Found", length(tile_files), "tiles - processing in parallel")) # Extract CI from tiles for this date date_stats <- extract_ci_from_tiles( @@ -759,7 +809,8 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, date = date, field_boundaries_sf = field_boundaries_sf, daily_CI_vals_dir = daily_CI_vals_dir, - merged_final_tif_dir = merged_final_dir + merged_final_tif_dir = merged_final_dir, + grid_size = grid_size ) if (!is.null(date_stats)) { @@ -788,17 +839,18 @@ process_ci_values_from_tiles <- function(dates, tile_folder, field_boundaries, #' 1. Loads tile #' 2. Creates/extracts CI band #' 3. Creates output raster with Red, Green, Blue, NIR, CI bands -#' 4. Saves to merged_final_tif_dir/[DATE]/ mirroring daily_tiles_split structure +#' 4. Saves to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ mirroring daily_tiles_split structure #' 5. Extracts field-level CI statistics #' Returns statistics aggregated to field level. #' #' @param tile_file Path to a single tile TIF file #' @param field_boundaries_sf Field boundaries as SF object #' @param date Character string of the date (YYYY-MM-DD format) -#' @param merged_final_tif_dir Directory to save processed tiles with CI band +#' @param merged_final_tif_dir Base directory to save processed tiles with CI band +#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for this tile, or NULL if processing failed #' -process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir) { +process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = NA) { tryCatch({ tile_filename <- basename(tile_file) safe_log(paste(" [TILE] Loading:", tile_filename)) @@ -845,8 +897,14 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin output_raster <- c(red_band, green_band, blue_band, nir_band, ci_band) names(output_raster) <- c("Red", "Green", "Blue", "NIR", "CI") - # Save processed tile to merged_final_tif_dir/[DATE]/ with same filename - date_dir <- file.path(merged_final_tif_dir, date) + # Save processed tile to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ with same filename + # This mirrors the input structure: daily_tiles_split/[GRID_SIZE]/[DATE]/ + if (!is.na(grid_size)) { + date_dir <- file.path(merged_final_tif_dir, grid_size, date) + } else { + date_dir <- file.path(merged_final_tif_dir, date) + } + if (!dir.exists(date_dir)) { dir.create(date_dir, recursive = TRUE, showWarnings = FALSE) } @@ -883,7 +941,7 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin #' Given a set of tile files for a single date, this function: #' 1. Loads each tile IN PARALLEL using furrr #' 2. Creates/extracts CI band -#' 3. Saves processed tile (Red, Green, Blue, NIR, CI) to merged_final_tif_dir/[DATE]/ +#' 3. Saves processed tile (Red, Green, Blue, NIR, CI) to merged_final_tif_dir/[GRID_SIZE]/[DATE]/ #' 4. Calculates field statistics from CI band #' 5. Aggregates field statistics across tiles #' 6. Saves individual date file (matching legacy workflow) @@ -894,24 +952,43 @@ process_single_tile <- function(tile_file, field_boundaries_sf, date, merged_fin #' @param date Character string of the date (YYYY-MM-DD format) #' @param field_boundaries_sf Field boundaries as SF object #' @param daily_CI_vals_dir Directory to save individual date RDS files -#' @param merged_final_tif_dir Directory to save processed tiles with CI band (mirrors daily_tiles_split structure) +#' @param merged_final_tif_dir Base directory to save processed tiles with CI band +#' @param grid_size Grid size label (e.g., "5x5", "10x10") for output path structure #' @return Data frame with field CI statistics for the date #' -extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL) { +extract_ci_from_tiles <- function(tile_files, date, field_boundaries_sf, daily_CI_vals_dir = NULL, merged_final_tif_dir = NULL, grid_size = NA) { if (!inherits(field_boundaries_sf, "sf")) { field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf) } - safe_log(paste(" Processing", length(tile_files), "tiles for date", date, "(parallel processing)")) + safe_log(paste(" Processing", length(tile_files), "tiles for date", date, "(3-tile parallel batch)")) - # Process tiles in parallel using furrr::future_map - # This replaces the sequential for loop, processing 2-4 tiles simultaneously - stats_list <- furrr::future_map( - .x = tile_files, - .f = ~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir), - .options = furrr::furrr_options(seed = TRUE) - ) + # Windows-compatible parallelization: Process tiles in small batches + # Use future_map with 3 workers - stable and efficient on Windows + + # Set up minimal future plan (3 workers max) + future::plan(future::multisession, workers = 3) + + # Process tiles using furrr with 2 workers + # Use retry logic for worker stability + stats_list <- tryCatch({ + furrr::future_map( + tile_files, + ~ process_single_tile(.x, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size), + .progress = FALSE, + .options = furrr::furrr_options(seed = TRUE) + ) + }, error = function(e) { + safe_log(paste("Parallel processing failed:", e$message, "- falling back to sequential"), "WARNING") + # Fallback to sequential if parallel fails + lapply( + tile_files, + function(tile_file) { + process_single_tile(tile_file, field_boundaries_sf, date, merged_final_tif_dir, grid_size = grid_size) + } + ) + }) # Extract names and filter out NULL results (failed tiles) tile_names <- basename(tile_files) diff --git a/webapps.zip b/webapps.zip deleted file mode 100644 index cf2f49b..0000000 Binary files a/webapps.zip and /dev/null differ diff --git a/webapps/geojson_viewer.html b/webapps/geojson_viewer.html index 36511a1..df29392 100644 --- a/webapps/geojson_viewer.html +++ b/webapps/geojson_viewer.html @@ -56,8 +56,9 @@ border-radius: 8px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); display: flex; - align-items: center; - gap: 10px; + flex-direction: column; + align-items: flex-start; + gap: 12px; } .map-controls label { @@ -70,6 +71,19 @@ font-weight: 500; } + .map-controls-group { + display: flex; + flex-direction: column; + gap: 8px; + border-top: 1px solid #e0e0e0; + padding-top: 8px; + } + + .map-controls-group:first-child { + border-top: none; + padding-top: 0; + } + .toggle-switch { position: relative; display: inline-block; @@ -764,14 +778,32 @@
- +
+ +
+
+ + +
@@ -807,8 +839,11 @@ let currentLayer = 'osm'; let geojsonLayer = null; + let labelsLayer = null; let currentGeojsonData = null; let featuresList = []; + let showLabels = true; + let labelPosition = 'center'; // 'center' or 'side' // Toggle map layer const mapToggle = document.getElementById('mapToggle'); @@ -830,6 +865,31 @@ } }); + // Toggle labels visibility + const labelsToggle = document.getElementById('labelsToggle'); + labelsToggle.addEventListener('change', () => { + showLabels = labelsToggle.checked; + if (labelsLayer) { + if (showLabels) { + labelsLayer.addTo(map); + } else { + map.removeLayer(labelsLayer); + } + } + }); + + // Toggle label position + const labelPositionToggle = document.getElementById('labelPositionToggle'); + const labelPositionText = document.getElementById('labelPositionText'); + labelPositionToggle.addEventListener('change', () => { + labelPosition = labelPositionToggle.checked ? 'side' : 'center'; + labelPositionText.textContent = labelPositionToggle.checked ? 'Side' : 'Center'; + // Recreate labels with new position + if (currentGeojsonData) { + loadGeojson(currentGeojsonData, ''); + } + }); + // Elements const geojsonInput = document.getElementById('geojsonFile'); const fileNameDisplay = document.getElementById('fileName'); @@ -863,6 +923,64 @@ setTimeout(() => el.classList.remove('active'), 5000); } + // Resolve label overlaps using collision detection algorithm + function resolveLabelOverlaps(labelPositions) { + const labelWidth = 150; // Approximate label width in pixels + const labelHeight = 30; // Approximate label height in pixels + const minDistance = Math.sqrt(labelWidth * labelWidth + labelHeight * labelHeight); + + const adjustedPositions = labelPositions.map(pos => ({ ...pos })); + + // Iteratively adjust positions to avoid overlaps + for (let iteration = 0; iteration < 5; iteration++) { + let hasAdjustment = false; + + for (let i = 0; i < adjustedPositions.length; i++) { + for (let j = i + 1; j < adjustedPositions.length; j++) { + const pos1 = adjustedPositions[i]; + const pos2 = adjustedPositions[j]; + + const p1 = map.project(pos1.latlng); + const p2 = map.project(pos2.latlng); + + const dx = p2.x - p1.x; + const dy = p2.y - p1.y; + const distance = Math.sqrt(dx * dx + dy * dy); + + // If labels are too close, push them apart + if (distance < minDistance && distance > 0) { + hasAdjustment = true; + const angle = Math.atan2(dy, dx); + const pushDistance = (minDistance - distance) / 2 + 5; + + // Move labels away from each other + const offset1 = map.unproject([ + p1.x - Math.cos(angle) * pushDistance, + p1.y - Math.sin(angle) * pushDistance + ]); + const offset2 = map.unproject([ + p2.x + Math.cos(angle) * pushDistance, + p2.y + Math.sin(angle) * pushDistance + ]); + + // Only adjust if original feature hasn't moved too far + const maxDrift = 0.003; + if (Math.abs(offset1.lat - pos1.originalLatlng.lat) < maxDrift) { + pos1.latlng = offset1; + } + if (Math.abs(offset2.lat - pos2.originalLatlng.lat) < maxDrift) { + pos2.latlng = offset2; + } + } + } + } + + if (!hasAdjustment) break; + } + + return adjustedPositions; + } + // Calculate area in square meters using Turf.js function getFeatureArea(feature) { try { @@ -957,6 +1075,11 @@ map.removeLayer(geojsonLayer); } + // Clear previous labels + if (labelsLayer) { + map.removeLayer(labelsLayer); + } + currentGeojsonData = geojson; featuresList = []; @@ -1018,6 +1141,86 @@ } }).addTo(map); + // Create labels layer + labelsLayer = L.featureGroup([]); + + // First pass: collect all label positions + const labelPositions = []; + features.forEach((feature, index) => { + const props = feature.properties || {}; + const fieldName = props.field || props.name || props.field_name || props.fieldName || props.id || `Field ${index + 1}`; + + if (feature.geometry && feature.geometry.type !== 'Point') { + // Get centroid for polygon features + const bounds = L.geoJSON(feature).getBounds(); + const center = bounds.getCenter(); + + let labelLatlng = center; + let iconAnchor = [0, 0]; + + // If side position, offset label to the top-left of the bounds + if (labelPosition === 'side') { + const ne = bounds.getNorthEast(); + labelLatlng = L.latLng(ne.lat, ne.lng); + iconAnchor = [-10, -30]; + } + + labelPositions.push({ + fieldName, + latlng: labelLatlng, + originalLatlng: labelLatlng, + iconAnchor, + isPoint: false + }); + } else if (feature.geometry && feature.geometry.type === 'Point') { + // For points, add label above the marker + const latlng = L.GeoJSON.coordsToLatLng(feature.geometry.coordinates); + labelPositions.push({ + fieldName, + latlng: latlng, + originalLatlng: latlng, + iconAnchor: [0, 30], + isPoint: true + }); + } + }); + + // Apply collision detection to resolve overlaps + const adjustedPositions = resolveLabelOverlaps(labelPositions); + + // Second pass: create markers with adjusted positions + adjustedPositions.forEach((pos) => { + const label = L.marker(pos.latlng, { + icon: L.divIcon({ + className: 'field-label', + html: `
${pos.fieldName}
`, + iconSize: [null, null], + iconAnchor: pos.iconAnchor + }), + interactive: false + }); + labelsLayer.addLayer(label); + }); + + if (showLabels) { + labelsLayer.addTo(map); + } + // Fit bounds const bounds = geojsonLayer.getBounds(); map.fitBounds(bounds, { padding: [50, 50] }); diff --git a/webapps/login.html b/webapps/login.html index 4dc5d83..39d7b9b 100644 --- a/webapps/login.html +++ b/webapps/login.html @@ -162,25 +162,48 @@ }); // Handle login form - document.getElementById('loginForm').addEventListener('submit', function(e) { + document.getElementById('loginForm').addEventListener('submit', async function(e) { e.preventDefault(); const password = document.getElementById('password').value; - const correctPassword = 'Activity3-Quaking4-Unashamed5-Penholder6'; const errorMessage = document.getElementById('errorMessage'); + const button = this.querySelector('button'); - if (password === correctPassword) { - // Store authentication in session - sessionStorage.setItem('authenticated', 'true'); + // Disable button during request + button.disabled = true; + button.textContent = 'Verifying...'; + + try { + // Send password to Netlify function for server-side verification + const response = await fetch('/.netlify/functions/login', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ password }) + }); - // Redirect to main apps page - window.location.href = 'index.html'; - } else { - // Show error message - errorMessage.textContent = 'Invalid password. Please try again.'; + const data = await response.json(); + + if (response.ok) { + // Authentication successful + sessionStorage.setItem('authenticated', 'true'); + window.location.href = 'index.html'; + } else { + // Authentication failed + errorMessage.textContent = data.error || 'Invalid password. Please try again.'; + errorMessage.classList.add('show'); + document.getElementById('password').value = ''; + document.getElementById('password').focus(); + button.disabled = false; + button.textContent = 'Access Tools'; + } + } catch (error) { + console.error('Login error:', error); + errorMessage.textContent = 'Connection error. Please check your internet and try again.'; errorMessage.classList.add('show'); - document.getElementById('password').value = ''; - document.getElementById('password').focus(); + button.disabled = false; + button.textContent = 'Access Tools'; } }); diff --git a/webapps/netlify.toml b/webapps/netlify.toml new file mode 100644 index 0000000..acb2ef5 --- /dev/null +++ b/webapps/netlify.toml @@ -0,0 +1,11 @@ +[build] + command = "# No build needed" + functions = "netlify/functions" + publish = "." + +# Environment variables - Set these in Netlify UI (Site settings > Build & deploy > Environment) +# DO NOT commit actual values to git +[env] + SMARTCANE_PASSWORD = "set_in_netlify_ui" + GOOGLE_SHEET_ID = "1ZHEIyhupNDHVd1EScBn0DnuiAzMFoZcAPZm3U65abkY" + GOOGLE_SHEET_PASSWORD = "optional_if_needed" diff --git a/webapps/netlify/functions/get-mills.js b/webapps/netlify/functions/get-mills.js new file mode 100644 index 0000000..f0a43e0 --- /dev/null +++ b/webapps/netlify/functions/get-mills.js @@ -0,0 +1,52 @@ +/** + * Netlify Function to fetch Google Sheets data securely + * Credentials are stored as environment variables, not exposed to frontend + */ + +exports.handler = async (event) => { + try { + // Read credentials from environment variables (set in Netlify UI) + const sheetId = process.env.GOOGLE_SHEET_ID; + const sheetPassword = process.env.GOOGLE_SHEET_PASSWORD; // If needed for authentication + + if (!sheetId) { + return { + statusCode: 500, + body: JSON.stringify({ error: 'GOOGLE_SHEET_ID not configured' }) + }; + } + + // Construct the export URL + const csvUrl = `https://docs.google.com/spreadsheets/d/${sheetId}/export?format=csv`; + + // Fetch the CSV from Google Sheets + const response = await fetch(csvUrl); + + if (!response.ok) { + return { + statusCode: response.status, + body: JSON.stringify({ error: `Google Sheets returned ${response.status}` }) + }; + } + + const csv = await response.text(); + + return { + statusCode: 200, + headers: { + 'Content-Type': 'text/csv', + 'Cache-Control': 'max-age=300' // Cache for 5 minutes + }, + body: csv + }; + } catch (error) { + console.error('Error fetching Google Sheet:', error); + return { + statusCode: 500, + body: JSON.stringify({ + error: 'Failed to fetch data', + message: error.message + }) + }; + } +}; diff --git a/webapps/netlify/functions/login.js b/webapps/netlify/functions/login.js new file mode 100644 index 0000000..aad31dc --- /dev/null +++ b/webapps/netlify/functions/login.js @@ -0,0 +1,66 @@ +/** + * Netlify Function for secure login + * Password is stored as environment variable on server, never exposed to frontend + */ + +exports.handler = async (event) => { + // Only allow POST requests + if (event.httpMethod !== 'POST') { + return { + statusCode: 405, + body: JSON.stringify({ error: 'Method not allowed' }) + }; + } + + try { + // Parse request body + const body = JSON.parse(event.body); + const submittedPassword = body.password; + + if (!submittedPassword) { + return { + statusCode: 400, + body: JSON.stringify({ error: 'Password required' }) + }; + } + + // Get correct password from environment variable (set in Netlify UI) + const correctPassword = process.env.SMARTCANE_PASSWORD; + + if (!correctPassword) { + console.error('SMARTCANE_PASSWORD environment variable not set'); + return { + statusCode: 500, + body: JSON.stringify({ error: 'Server configuration error' }) + }; + } + + // Compare passwords + if (submittedPassword === correctPassword) { + return { + statusCode: 200, + headers: { + 'Set-Cookie': `auth_token=smartcane_${Date.now()}; Path=/; SameSite=Strict; Secure; HttpOnly` + }, + body: JSON.stringify({ + success: true, + message: 'Login successful' + }) + }; + } else { + // Add delay to prevent brute force + await new Promise(resolve => setTimeout(resolve, 1000)); + + return { + statusCode: 401, + body: JSON.stringify({ error: 'Invalid password' }) + }; + } + } catch (error) { + console.error('Login error:', error); + return { + statusCode: 500, + body: JSON.stringify({ error: 'Server error' }) + }; + } +}; diff --git a/webapps/sugar_mill_locator/app.js b/webapps/sugar_mill_locator/app.js index 02dcb84..1542b24 100644 --- a/webapps/sugar_mill_locator/app.js +++ b/webapps/sugar_mill_locator/app.js @@ -225,6 +225,10 @@ function initMap() { // Load CSV data loadMillsData(); + // Initialize Google Sheets auto-refresh + initGoogleSheetsAutoRefresh(); + showGoogleSheetsSetup(); + // Attach mode button listeners attachModeListeners(); @@ -277,11 +281,22 @@ function updateMeasurementPanel() { // Load mills from CSV async function loadMillsData() { try { - const response = await fetch('sugar_cane_factories_africa.csv'); - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); + // Try to load from Google Sheets first + let csvText = await fetchGoogleSheetData(); + + // Fallback to local CSV if Google Sheets fails + if (!csvText) { + console.log('Falling back to local CSV file...'); + const response = await fetch('sugar_cane_factories_africa.csv'); + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + csvText = await response.text(); + showNotification('Using local data (Google Sheet unavailable)', 'warning'); + } else { + showNotification('✓ Data loaded from Google Sheet', 'success'); } - const csvText = await response.text(); + parseCSV(csvText); console.log('Mills loaded:', mills.length, mills.slice(0, 2)); renderMills(); @@ -289,11 +304,10 @@ async function loadMillsData() { updateLegend(); console.log('Legend updated'); } catch (error) { - console.error('Error loading CSV:', error); - // Show a notification to user + console.error('Error loading mills data:', error); const notification = document.createElement('div'); notification.style.cssText = 'position: fixed; top: 20px; right: 20px; background: #ff6b6b; color: white; padding: 15px 20px; border-radius: 5px; z-index: 9999;'; - notification.textContent = '⚠️ Could not load mill data. Make sure sugar_cane_factories_africa.csv is in the same folder.'; + notification.textContent = '⚠️ Could not load mill data. Check console for details.'; document.body.appendChild(notification); setTimeout(() => notification.remove(), 5000); } diff --git a/webapps/sugar_mill_locator/google-sheets-config.js b/webapps/sugar_mill_locator/google-sheets-config.js new file mode 100644 index 0000000..f6fa752 --- /dev/null +++ b/webapps/sugar_mill_locator/google-sheets-config.js @@ -0,0 +1,151 @@ +// Google Sheets Configuration +// This file connects to your Google Sheet for live data updates + +const GOOGLE_SHEETS_CONFIG = { + // Your Google Sheet ID (from the URL) + SHEET_ID: '1ZHEIyhupNDHVd1EScBn0DnuiAzMFoZcAPZm3U65abkY', + + // The sheet name or gid (the sheet tab you want to read from) + // If using gid=220881066, you can reference it this way + SHEET_NAME: 'Sheet1', // Change this to your actual sheet name if different + + // Auto-refresh interval in milliseconds (5 minutes = 300000ms) + REFRESH_INTERVAL: 300000, + + // Enable auto-refresh (set to false to disable) + AUTO_REFRESH_ENABLED: true +}; + +/** + * Fetch data from Google Sheet via Netlify Function + * The function keeps credentials secret on the server + */ +async function fetchGoogleSheetData() { + try { + // Call Netlify function instead of Google Sheets directly + // This keeps the Sheet ID and password hidden from browser dev tools + const response = await fetch('/.netlify/functions/get-mills'); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const csvText = await response.text(); + console.log('✓ Data fetched from Netlify Function (Google Sheet)'); + return csvText; + } catch (error) { + console.error('Error fetching data from Netlify Function:', error); + showNotification('Could not fetch data. Check your connection.', 'warning'); + return null; + } +} + +/** + * Initialize auto-refresh of data from Google Sheet + */ +function initGoogleSheetsAutoRefresh() { + if (!GOOGLE_SHEETS_CONFIG.AUTO_REFRESH_ENABLED) { + console.log('Google Sheets auto-refresh is disabled'); + return; + } + + console.log(`✓ Auto-refresh enabled (every ${GOOGLE_SHEETS_CONFIG.REFRESH_INTERVAL / 1000 / 60} minutes)`); + + // Refresh immediately on load + // (already done in initApp) + + // Then refresh periodically + setInterval(async () => { + console.log('🔄 Refreshing data from Google Sheet...'); + const csvData = await fetchGoogleSheetData(); + + if (csvData) { + // Clear old markers + Object.values(millMarkers).forEach(marker => map.removeLayer(marker)); + millMarkers = {}; + + // Parse new data + parseCSV(csvData); + + // Render updated mills + renderMills(); + updateLegend(); + + // Reapply filters + applyFilters(); + + console.log(`✓ Updated ${mills.length} mills from Google Sheet`); + showNotification(`Map updated with latest data (${mills.length} mills)`, 'success'); + } + }, GOOGLE_SHEETS_CONFIG.REFRESH_INTERVAL); +} + +/** + * Show notification to user + */ +function showNotification(message, type = 'info') { + const colors = { + 'success': '#4CAF50', + 'warning': '#FF9800', + 'error': '#F44336', + 'info': '#2196F3' + }; + + const notification = document.createElement('div'); + notification.style.cssText = ` + position: fixed; + top: 20px; + right: 20px; + background: ${colors[type] || colors.info}; + color: white; + padding: 15px 20px; + border-radius: 5px; + z-index: 9999; + box-shadow: 0 2px 8px rgba(0,0,0,0.2); + font-weight: 500; + `; + notification.textContent = message; + document.body.appendChild(notification); + + setTimeout(() => { + notification.style.transition = 'opacity 0.3s ease'; + notification.style.opacity = '0'; + setTimeout(() => notification.remove(), 300); + }, 4000); +} + +/** + * Provide setup instructions to the user + */ +function showGoogleSheetsSetup() { + console.log(` + ╔════════════════════════════════════════════════════════════╗ + ║ Google Sheets Integration Setup Instructions ║ + ╠════════════════════════════════════════════════════════════╣ + ║ ║ + ║ 1. Your Google Sheet is configured and ready! ║ + ║ ║ + ║ 2. Share the sheet with your colleagues: ║ + ║ - Click "Share" in the top-right ║ + ║ - Add their email addresses ║ + ║ - They must have "Editor" access ║ + ║ ║ + ║ 3. Column headers required (case-sensitive): ║ + ║ - Mill/Factory Name (or similar) ║ + ║ - Country ║ + ║ - Latitude ║ + ║ - Longitude ║ + ║ - Crushing Capacity (optional) ║ + ║ - Annual Sugar Production (optional) ║ + ║ - Notes (optional) ║ + ║ - Data Year (optional) ║ + ║ ║ + ║ 4. The map will automatically update every 5 minutes ║ + ║ with new data from the sheet ║ + ║ ║ + ║ 5. To change refresh interval, edit: ║ + ║ GOOGLE_SHEETS_CONFIG.REFRESH_INTERVAL ║ + ║ ║ + ╚════════════════════════════════════════════════════════════╝ + `); +} diff --git a/webapps/sugar_mill_locator/index.html b/webapps/sugar_mill_locator/index.html index bf866d7..b3b3286 100644 --- a/webapps/sugar_mill_locator/index.html +++ b/webapps/sugar_mill_locator/index.html @@ -619,6 +619,7 @@ +