From 03f269302addbaaf962e421081be24e5bda27073 Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 13 Jan 2026 09:35:51 +0100 Subject: [PATCH] SC-58: Implement grid filtering and per-date tiling - Filter master grid to only overlapping tiles based on GeoJSON field boundaries - Implement per-date tiling for resumable processing (can stop/restart anytime) - Add grid-size versioning (subfolder structure: daily_tiles_split/5x5/, etc) - Skip already-processed dates for idempotent execution - Upfront extent checking (one-time), per-date processing (resumable) - Reduce storage by excluding empty tiles outside field boundaries --- r_app/01_create_master_grid_and_split_tiffs.R | 242 ++++++++++++++---- 1 file changed, 189 insertions(+), 53 deletions(-) diff --git a/r_app/01_create_master_grid_and_split_tiffs.R b/r_app/01_create_master_grid_and_split_tiffs.R index a59057d..ca84350 100644 --- a/r_app/01_create_master_grid_and_split_tiffs.R +++ b/r_app/01_create_master_grid_and_split_tiffs.R @@ -16,15 +16,22 @@ library(sf) PROJECT <- "angata" TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b") -OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split") -# Grid dimensions will be auto-determined based on ROI size -# Default: 5×5 = 25 tiles -# If ROI < 10×10 km: 1×1 = 1 tile (no splitting needed) +# GRID SIZE CONFIGURATION - Change this to use different grid sizes +# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc. +# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc. GRID_NROWS <- 5 GRID_NCOLS <- 5 -cat("Combined: Create Master Grid (5x5) and Split TIFFs into Tiles\n") +# Construct grid-specific subfolder path +GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS) +OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL) + +# Load field boundaries for overlap checking +GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson") + +cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "") +cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") # ============================================================================ # PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID @@ -32,7 +39,70 @@ cat("Combined: Create Master Grid (5x5) and Split TIFFs into Tiles\n") cat("\n[PART 1] Creating Master Grid\n") -cat("\n[1] Checking TIFF extents...\n") +# Load field boundaries for overlap checking +cat("\n[1] Loading field boundaries from GeoJSON...\n") + +if (!file.exists(GEOJSON_PATH)) { + stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n", + "Please ensure ", PROJECT, " has a pivot.geojson file.") +} + +field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE) +field_boundaries_vect <- terra::vect(GEOJSON_PATH) + +cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s)\n", sep = "") + +# Try to find a name column (could be 'name', 'field', 'field_name', etc.) +field_names <- NA +if ("name" %in% names(field_boundaries_sf)) { + field_names <- field_boundaries_sf$name +} else if ("field" %in% names(field_boundaries_sf)) { + field_names <- field_boundaries_sf$field +} else if ("field_name" %in% names(field_boundaries_sf)) { + field_names <- field_boundaries_sf$field_name +} else { + field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices +} + +cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "") + +# Helper function: Check if a tile overlaps with any field +tile_overlaps_fields <- function(tile_extent, field_geoms) { + tryCatch({ + # Create a tile polygon from extent + tile_poly <- st_as_sfc(st_bbox(c( + xmin = tile_extent$xmin, + xmax = tile_extent$xmax, + ymin = tile_extent$ymin, + ymax = tile_extent$ymax + ), crs = 4326)) + + # Convert tile_poly to sf object for comparison + tile_sf <- st_sf(geometry = tile_poly) + + # Filter out empty geometries from field_geoms + field_valid <- field_geoms[!st_is_empty(field_geoms)] + + if (length(field_valid) == 0) { + return(FALSE) # No valid fields + } + + # Create sf from valid fields for intersection check + field_sf <- st_sf(geometry = field_valid) + + # Check for ANY intersection + result <- st_intersects(tile_sf, field_sf, sparse = FALSE) + + # Return TRUE if any intersection found + return(any(result, na.rm = TRUE)) + + }, error = function(e) { + cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "") + return(TRUE) # Default to including tile if there's an error + }) +} + +cat("\n[2] Checking TIFF extents...\n") tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE) tiff_files <- sort(tiff_files) @@ -41,19 +111,27 @@ if (length(tiff_files) == 0) { stop("No TIFF files found in ", TIFF_FOLDER) } -cat(" Found", length(tiff_files), "TIFF file(s)\n") +cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "") +cat(" Checking extents... (this may take a while)\n") -# Load all extents +# Load all extents - ONE TIME, upfront extents <- list() for (i in seq_along(tiff_files)) { tiff_path <- file.path(TIFF_FOLDER, tiff_files[i]) raster <- terra::rast(tiff_path) ext <- terra::ext(raster) extents[[i]] <- ext + + # Progress indicator every 50 files + if (i %% 50 == 0) { + cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "") + } } +cat(" ✓ All extents loaded\n") + # Check if all extents match -cat("\n[2] Comparing extents...\n") +cat("\n[3] Comparing extents...\n") tolerance <- 1e-8 all_match <- TRUE @@ -84,7 +162,7 @@ if (all_match) { } # Create master extent -cat("\n[3] Creating master extent...\n") +cat("\n[4] Creating master extent...\n") master_xmin <- min(sapply(extents, function(e) e$xmin)) master_xmax <- max(sapply(extents, function(e) e$xmax)) @@ -112,9 +190,9 @@ if (x_range_m < 10000 && y_range_m < 10000) { N_TILES <- GRID_NROWS * GRID_NCOLS # Check if master grid already exists -cat("\n[4] Checking if master grid exists...\n") +cat("\n[5] Checking if master grid exists...\n") -master_grid_file <- file.path(OUTPUT_FOLDER, "master_grid_5x5.geojson") +master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson")) if (file.exists(master_grid_file)) { cat(" ✓ Master grid exists! Loading existing grid...\n") @@ -125,7 +203,7 @@ if (file.exists(master_grid_file)) { cat(" Grid does not exist. Creating new master grid...\n") # Create 5×5 grid - cat("\n[5] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "") + cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "") master_bbox <- st_bbox(c( xmin = master_xmin, @@ -157,56 +235,101 @@ if (file.exists(master_grid_file)) { dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE) } st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE) - cat(" ✓ Master grid saved to: master_grid_5x5.geojson\n") + cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") } # ============================================================================ -# PART 2: SPLIT EACH TIFF INTO TILES +# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES) # ============================================================================ -cat("\n[PART 2] Splitting TIFFs into Tiles\n") +cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n") -cat("\n[6] Splitting TIFFs using master grid...\n") +cat("\n[7] Filtering master grid to only overlapping tiles...\n") + +# Check which tiles overlap with any field +overlapping_tile_indices <- c() +for (tile_idx in 1:nrow(master_grid_sf)) { + tile_geom <- master_grid_sf[tile_idx, ] + + # Check overlap with any field + if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) { + overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx) + } +} + +cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "") +cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "") + +# Create filtered grid with only overlapping tiles +filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ] +filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices) + +# Convert to SpatVector for makeTiles +filtered_grid_vect <- terra::vect(filtered_grid_sf) + +cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "") + +# ============================================================================ +# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE) +# ============================================================================ + +cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n") +cat("\n[8] Processing each date independently...\n") +cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n") total_tiles_created <- 0 +dates_skipped <- 0 +dates_processed <- 0 for (file_idx in seq_along(tiff_files)) { tiff_file <- tiff_files[file_idx] date_str <- gsub("\\.tif$", "", tiff_file) - cat("\n Processing: ", tiff_file, "\n", sep = "") + # Create date-specific output folder + date_output_folder <- file.path(OUTPUT_FOLDER, date_str) - # Load TIFF + # CHECK: Skip if date already processed (RESUME-SAFE) + if (dir.exists(date_output_folder)) { + existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$") + existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)] + + if (length(existing_tiles) > 0) { + cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str, + " (", length(existing_tiles), " tiles already exist)\n", sep = "") + dates_skipped <- dates_skipped + 1 + next # Skip this date + } + } + + cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "") + dates_processed <- dates_processed + 1 + + # Load TIFF for this date only tiff_path <- file.path(TIFF_FOLDER, tiff_file) raster <- terra::rast(tiff_path) dims <- dim(raster) - cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "") + cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "") # Create date-specific output folder - date_output_folder <- file.path(OUTPUT_FOLDER, date_str) if (!dir.exists(date_output_folder)) { dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE) } - cat(" Splitting into ", N_TILES, " tiles using master grid...\n", sep = "") + cat(" Creating ", length(overlapping_tile_indices), " tiles...\n", sep = "") - # Split using master grid zones + # Use makeTiles with FILTERED grid (only overlapping tiles) tiles_list <- terra::makeTiles( x = raster, - y = master_grid_vect, + y = filtered_grid_vect, filename = file.path(date_output_folder, "tile.tif"), overwrite = TRUE ) - cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "") - # Rename tiles to [DATE]_[TILE_ID].tif - cat(" Renaming tiles...\n") - for (tile_idx in seq_along(tiles_list)) { source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif")) - tile_id <- sprintf("%02d", tile_idx) + tile_id <- filtered_grid_sf$tile_id[tile_idx] final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif")) if (file.exists(source_file)) { @@ -214,7 +337,7 @@ for (file_idx in seq_along(tiff_files)) { } } - cat(" ✓ Saved ", N_TILES, " tiles in folder: ", date_str, "/\n", sep = "") + cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "") total_tiles_created <- total_tiles_created + length(tiles_list) } @@ -222,7 +345,7 @@ for (file_idx in seq_along(tiff_files)) { # VERIFICATION # ============================================================================ -cat("\n[7] Verifying output...\n") +cat("\n[9] Verifying output...\n") # Count tiles per date folder date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE) @@ -241,31 +364,44 @@ for (date_folder in date_folders) { # SUMMARY # ============================================================================ -cat("SUMMARY\n") +cat("\n\n========== SUMMARY ==========\n") -cat("\nMaster Grid:\n") -cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, "=", N_TILES, " tiles\n", sep = "") -cat(" - File: master_grid_5x5.geojson\n") +cat("\nGrid Configuration:\n") +cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "") +cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") +cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") -cat("\nTIFF Splitting:\n") -cat(" - TIFFs processed: ", length(tiff_files), "\n", sep = "") -cat(" - Total tile files created: ", total_tile_files, "\n", sep = "") -cat(" - Expected total: ", length(tiff_files) * N_TILES, "\n", sep = "") +cat("\nField Filtering:\n") +cat(" - Field boundaries loaded from pivot.geojson\n") +cat(" - Only overlapping tiles created (empty tiles deleted)\n") +cat(" - Significant storage savings for sparse fields!\n") -cat("\nDirectory Structure:\n") -cat(" daily_tiles/\n") -cat(" ├── master_grid_5x5.geojson\n") -for (date_folder in date_folders) { - cat(" ├── ", date_folder, "/\n", sep = "") - cat(" │ ├── ", date_folder, "_01.tif\n", sep = "") - cat(" │ ├── ", date_folder, "_02.tif\n", sep = "") - cat(" │ └── ...\n") +cat("\nProcessing Summary:\n") +cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "") +cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "") +cat(" - Dates processed: ", dates_processed, "\n", sep = "") +cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "") +if (dates_processed > 0) { + avg_tiles_per_date <- total_tiles_created / dates_processed + cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "") } -cat("\nKey Properties:\n") -cat(" - All tiles align perfectly across dates\n") -cat(" - Tile 01 covers same area in all dates\n") -cat(" - Date folders provide organizational hierarchy\n") -cat(" - Can do time-series analysis per tile\n") +cat("\nDirectory Structure:\n") +cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "") +cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "") +cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") +cat(" ├── 2024-01-15/\n") +cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n") +cat(" │ ├── 2024-01-15_05.tif\n") +cat(" │ └── ...\n") +cat(" ├── 2024-01-16/\n") +cat(" │ └── ...\n") +cat(" └── ...\n") -cat("✓ Script complete!\n") +cat("\n⭐ Key Benefits:\n") +cat(" ✓ Overlap-filtered: No wasted empty tiles\n") +cat(" ✓ Skip existing dates: Resume-safe, idempotent\n") +cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n") +cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n") + +cat("\n✓ Script complete!\n")