SC-58: Implement grid filtering and per-date tiling

- Filter master grid to only overlapping tiles based on GeoJSON field boundaries
- Implement per-date tiling for resumable processing (can stop/restart anytime)
- Add grid-size versioning (subfolder structure: daily_tiles_split/5x5/, etc)
- Skip already-processed dates for idempotent execution
- Upfront extent checking (one-time), per-date processing (resumable)
- Reduce storage by excluding empty tiles outside field boundaries
This commit is contained in:
Timon 2026-01-13 09:35:51 +01:00
parent 4f5bb1be9f
commit 03f269302a

View file

@ -16,15 +16,22 @@ library(sf)
PROJECT <- "angata"
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split")
# Grid dimensions will be auto-determined based on ROI size
# Default: 5×5 = 25 tiles
# If ROI < 10×10 km: 1×1 = 1 tile (no splitting needed)
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
GRID_NROWS <- 5
GRID_NCOLS <- 5
cat("Combined: Create Master Grid (5x5) and Split TIFFs into Tiles\n")
# Construct grid-specific subfolder path
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
# Load field boundaries for overlap checking
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
# ============================================================================
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
@ -32,7 +39,70 @@ cat("Combined: Create Master Grid (5x5) and Split TIFFs into Tiles\n")
cat("\n[PART 1] Creating Master Grid\n")
cat("\n[1] Checking TIFF extents...\n")
# Load field boundaries for overlap checking
cat("\n[1] Loading field boundaries from GeoJSON...\n")
if (!file.exists(GEOJSON_PATH)) {
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
"Please ensure ", PROJECT, " has a pivot.geojson file.")
}
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s)\n", sep = "")
# Try to find a name column (could be 'name', 'field', 'field_name', etc.)
field_names <- NA
if ("name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$name
} else if ("field" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field
} else if ("field_name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field_name
} else {
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
}
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
# Helper function: Check if a tile overlaps with any field
tile_overlaps_fields <- function(tile_extent, field_geoms) {
tryCatch({
# Create a tile polygon from extent
tile_poly <- st_as_sfc(st_bbox(c(
xmin = tile_extent$xmin,
xmax = tile_extent$xmax,
ymin = tile_extent$ymin,
ymax = tile_extent$ymax
), crs = 4326))
# Convert tile_poly to sf object for comparison
tile_sf <- st_sf(geometry = tile_poly)
# Filter out empty geometries from field_geoms
field_valid <- field_geoms[!st_is_empty(field_geoms)]
if (length(field_valid) == 0) {
return(FALSE) # No valid fields
}
# Create sf from valid fields for intersection check
field_sf <- st_sf(geometry = field_valid)
# Check for ANY intersection
result <- st_intersects(tile_sf, field_sf, sparse = FALSE)
# Return TRUE if any intersection found
return(any(result, na.rm = TRUE))
}, error = function(e) {
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
return(TRUE) # Default to including tile if there's an error
})
}
cat("\n[2] Checking TIFF extents...\n")
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
tiff_files <- sort(tiff_files)
@ -41,19 +111,27 @@ if (length(tiff_files) == 0) {
stop("No TIFF files found in ", TIFF_FOLDER)
}
cat(" Found", length(tiff_files), "TIFF file(s)\n")
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
cat(" Checking extents... (this may take a while)\n")
# Load all extents
# Load all extents - ONE TIME, upfront
extents <- list()
for (i in seq_along(tiff_files)) {
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
raster <- terra::rast(tiff_path)
ext <- terra::ext(raster)
extents[[i]] <- ext
# Progress indicator every 50 files
if (i %% 50 == 0) {
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
}
}
cat(" ✓ All extents loaded\n")
# Check if all extents match
cat("\n[2] Comparing extents...\n")
cat("\n[3] Comparing extents...\n")
tolerance <- 1e-8
all_match <- TRUE
@ -84,7 +162,7 @@ if (all_match) {
}
# Create master extent
cat("\n[3] Creating master extent...\n")
cat("\n[4] Creating master extent...\n")
master_xmin <- min(sapply(extents, function(e) e$xmin))
master_xmax <- max(sapply(extents, function(e) e$xmax))
@ -112,9 +190,9 @@ if (x_range_m < 10000 && y_range_m < 10000) {
N_TILES <- GRID_NROWS * GRID_NCOLS
# Check if master grid already exists
cat("\n[4] Checking if master grid exists...\n")
cat("\n[5] Checking if master grid exists...\n")
master_grid_file <- file.path(OUTPUT_FOLDER, "master_grid_5x5.geojson")
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
if (file.exists(master_grid_file)) {
cat(" ✓ Master grid exists! Loading existing grid...\n")
@ -125,7 +203,7 @@ if (file.exists(master_grid_file)) {
cat(" Grid does not exist. Creating new master grid...\n")
# Create 5×5 grid
cat("\n[5] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
master_bbox <- st_bbox(c(
xmin = master_xmin,
@ -157,56 +235,101 @@ if (file.exists(master_grid_file)) {
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
}
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
cat(" ✓ Master grid saved to: master_grid_5x5.geojson\n")
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
}
# ============================================================================
# PART 2: SPLIT EACH TIFF INTO TILES
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
# ============================================================================
cat("\n[PART 2] Splitting TIFFs into Tiles\n")
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
cat("\n[6] Splitting TIFFs using master grid...\n")
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
# Check which tiles overlap with any field
overlapping_tile_indices <- c()
for (tile_idx in 1:nrow(master_grid_sf)) {
tile_geom <- master_grid_sf[tile_idx, ]
# Check overlap with any field
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
}
}
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
# Create filtered grid with only overlapping tiles
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
# Convert to SpatVector for makeTiles
filtered_grid_vect <- terra::vect(filtered_grid_sf)
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
# ============================================================================
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
# ============================================================================
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
cat("\n[8] Processing each date independently...\n")
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
total_tiles_created <- 0
dates_skipped <- 0
dates_processed <- 0
for (file_idx in seq_along(tiff_files)) {
tiff_file <- tiff_files[file_idx]
date_str <- gsub("\\.tif$", "", tiff_file)
cat("\n Processing: ", tiff_file, "\n", sep = "")
# Create date-specific output folder
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
# Load TIFF
# CHECK: Skip if date already processed (RESUME-SAFE)
if (dir.exists(date_output_folder)) {
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
if (length(existing_tiles) > 0) {
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
dates_skipped <- dates_skipped + 1
next # Skip this date
}
}
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
dates_processed <- dates_processed + 1
# Load TIFF for this date only
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
raster <- terra::rast(tiff_path)
dims <- dim(raster)
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
# Create date-specific output folder
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
if (!dir.exists(date_output_folder)) {
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
}
cat(" Splitting into ", N_TILES, " tiles using master grid...\n", sep = "")
cat(" Creating ", length(overlapping_tile_indices), " tiles...\n", sep = "")
# Split using master grid zones
# Use makeTiles with FILTERED grid (only overlapping tiles)
tiles_list <- terra::makeTiles(
x = raster,
y = master_grid_vect,
y = filtered_grid_vect,
filename = file.path(date_output_folder, "tile.tif"),
overwrite = TRUE
)
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
# Rename tiles to [DATE]_[TILE_ID].tif
cat(" Renaming tiles...\n")
for (tile_idx in seq_along(tiles_list)) {
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
tile_id <- sprintf("%02d", tile_idx)
tile_id <- filtered_grid_sf$tile_id[tile_idx]
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
if (file.exists(source_file)) {
@ -214,7 +337,7 @@ for (file_idx in seq_along(tiff_files)) {
}
}
cat(" ✓ Saved ", N_TILES, " tiles in folder: ", date_str, "/\n", sep = "")
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
total_tiles_created <- total_tiles_created + length(tiles_list)
}
@ -222,7 +345,7 @@ for (file_idx in seq_along(tiff_files)) {
# VERIFICATION
# ============================================================================
cat("\n[7] Verifying output...\n")
cat("\n[9] Verifying output...\n")
# Count tiles per date folder
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
@ -241,31 +364,44 @@ for (date_folder in date_folders) {
# SUMMARY
# ============================================================================
cat("SUMMARY\n")
cat("\n\n========== SUMMARY ==========\n")
cat("\nMaster Grid:\n")
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, "=", N_TILES, " tiles\n", sep = "")
cat(" - File: master_grid_5x5.geojson\n")
cat("\nGrid Configuration:\n")
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat("\nTIFF Splitting:\n")
cat(" - TIFFs processed: ", length(tiff_files), "\n", sep = "")
cat(" - Total tile files created: ", total_tile_files, "\n", sep = "")
cat(" - Expected total: ", length(tiff_files) * N_TILES, "\n", sep = "")
cat("\nField Filtering:\n")
cat(" - Field boundaries loaded from pivot.geojson\n")
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
cat(" - Significant storage savings for sparse fields!\n")
cat("\nDirectory Structure:\n")
cat(" daily_tiles/\n")
cat(" ├── master_grid_5x5.geojson\n")
for (date_folder in date_folders) {
cat(" ├── ", date_folder, "/\n", sep = "")
cat(" │ ├── ", date_folder, "_01.tif\n", sep = "")
cat(" │ ├── ", date_folder, "_02.tif\n", sep = "")
cat(" │ └── ...\n")
cat("\nProcessing Summary:\n")
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
if (dates_processed > 0) {
avg_tiles_per_date <- total_tiles_created / dates_processed
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
}
cat("\nKey Properties:\n")
cat(" - All tiles align perfectly across dates\n")
cat(" - Tile 01 covers same area in all dates\n")
cat(" - Date folders provide organizational hierarchy\n")
cat(" - Can do time-series analysis per tile\n")
cat("\nDirectory Structure:\n")
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat(" ├── 2024-01-15/\n")
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
cat(" │ ├── 2024-01-15_05.tif\n")
cat(" │ └── ...\n")
cat(" ├── 2024-01-16/\n")
cat(" │ └── ...\n")
cat(" └── ...\n")
cat("✓ Script complete!\n")
cat("\n⭐ Key Benefits:\n")
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
cat("\n✓ Script complete!\n")