SC-58: Implement grid filtering and per-date tiling
- Filter master grid to only overlapping tiles based on GeoJSON field boundaries - Implement per-date tiling for resumable processing (can stop/restart anytime) - Add grid-size versioning (subfolder structure: daily_tiles_split/5x5/, etc) - Skip already-processed dates for idempotent execution - Upfront extent checking (one-time), per-date processing (resumable) - Reduce storage by excluding empty tiles outside field boundaries
This commit is contained in:
parent
4f5bb1be9f
commit
03f269302a
|
|
@ -16,15 +16,22 @@ library(sf)
|
|||
|
||||
PROJECT <- "angata"
|
||||
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
|
||||
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split")
|
||||
|
||||
# Grid dimensions will be auto-determined based on ROI size
|
||||
# Default: 5×5 = 25 tiles
|
||||
# If ROI < 10×10 km: 1×1 = 1 tile (no splitting needed)
|
||||
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
|
||||
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
|
||||
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
|
||||
GRID_NROWS <- 5
|
||||
GRID_NCOLS <- 5
|
||||
|
||||
cat("Combined: Create Master Grid (5x5) and Split TIFFs into Tiles\n")
|
||||
# Construct grid-specific subfolder path
|
||||
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
|
||||
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
|
||||
|
||||
# Load field boundaries for overlap checking
|
||||
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
|
||||
|
||||
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
|
||||
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
||||
|
||||
# ============================================================================
|
||||
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
|
||||
|
|
@ -32,7 +39,70 @@ cat("Combined: Create Master Grid (5x5) and Split TIFFs into Tiles\n")
|
|||
|
||||
cat("\n[PART 1] Creating Master Grid\n")
|
||||
|
||||
cat("\n[1] Checking TIFF extents...\n")
|
||||
# Load field boundaries for overlap checking
|
||||
cat("\n[1] Loading field boundaries from GeoJSON...\n")
|
||||
|
||||
if (!file.exists(GEOJSON_PATH)) {
|
||||
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
|
||||
"Please ensure ", PROJECT, " has a pivot.geojson file.")
|
||||
}
|
||||
|
||||
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
|
||||
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
|
||||
|
||||
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s)\n", sep = "")
|
||||
|
||||
# Try to find a name column (could be 'name', 'field', 'field_name', etc.)
|
||||
field_names <- NA
|
||||
if ("name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$name
|
||||
} else if ("field" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field
|
||||
} else if ("field_name" %in% names(field_boundaries_sf)) {
|
||||
field_names <- field_boundaries_sf$field_name
|
||||
} else {
|
||||
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
|
||||
}
|
||||
|
||||
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
|
||||
|
||||
# Helper function: Check if a tile overlaps with any field
|
||||
tile_overlaps_fields <- function(tile_extent, field_geoms) {
|
||||
tryCatch({
|
||||
# Create a tile polygon from extent
|
||||
tile_poly <- st_as_sfc(st_bbox(c(
|
||||
xmin = tile_extent$xmin,
|
||||
xmax = tile_extent$xmax,
|
||||
ymin = tile_extent$ymin,
|
||||
ymax = tile_extent$ymax
|
||||
), crs = 4326))
|
||||
|
||||
# Convert tile_poly to sf object for comparison
|
||||
tile_sf <- st_sf(geometry = tile_poly)
|
||||
|
||||
# Filter out empty geometries from field_geoms
|
||||
field_valid <- field_geoms[!st_is_empty(field_geoms)]
|
||||
|
||||
if (length(field_valid) == 0) {
|
||||
return(FALSE) # No valid fields
|
||||
}
|
||||
|
||||
# Create sf from valid fields for intersection check
|
||||
field_sf <- st_sf(geometry = field_valid)
|
||||
|
||||
# Check for ANY intersection
|
||||
result <- st_intersects(tile_sf, field_sf, sparse = FALSE)
|
||||
|
||||
# Return TRUE if any intersection found
|
||||
return(any(result, na.rm = TRUE))
|
||||
|
||||
}, error = function(e) {
|
||||
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
|
||||
return(TRUE) # Default to including tile if there's an error
|
||||
})
|
||||
}
|
||||
|
||||
cat("\n[2] Checking TIFF extents...\n")
|
||||
|
||||
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
|
||||
tiff_files <- sort(tiff_files)
|
||||
|
|
@ -41,19 +111,27 @@ if (length(tiff_files) == 0) {
|
|||
stop("No TIFF files found in ", TIFF_FOLDER)
|
||||
}
|
||||
|
||||
cat(" Found", length(tiff_files), "TIFF file(s)\n")
|
||||
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
|
||||
cat(" Checking extents... (this may take a while)\n")
|
||||
|
||||
# Load all extents
|
||||
# Load all extents - ONE TIME, upfront
|
||||
extents <- list()
|
||||
for (i in seq_along(tiff_files)) {
|
||||
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
|
||||
raster <- terra::rast(tiff_path)
|
||||
ext <- terra::ext(raster)
|
||||
extents[[i]] <- ext
|
||||
|
||||
# Progress indicator every 50 files
|
||||
if (i %% 50 == 0) {
|
||||
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
|
||||
}
|
||||
}
|
||||
|
||||
cat(" ✓ All extents loaded\n")
|
||||
|
||||
# Check if all extents match
|
||||
cat("\n[2] Comparing extents...\n")
|
||||
cat("\n[3] Comparing extents...\n")
|
||||
|
||||
tolerance <- 1e-8
|
||||
all_match <- TRUE
|
||||
|
|
@ -84,7 +162,7 @@ if (all_match) {
|
|||
}
|
||||
|
||||
# Create master extent
|
||||
cat("\n[3] Creating master extent...\n")
|
||||
cat("\n[4] Creating master extent...\n")
|
||||
|
||||
master_xmin <- min(sapply(extents, function(e) e$xmin))
|
||||
master_xmax <- max(sapply(extents, function(e) e$xmax))
|
||||
|
|
@ -112,9 +190,9 @@ if (x_range_m < 10000 && y_range_m < 10000) {
|
|||
N_TILES <- GRID_NROWS * GRID_NCOLS
|
||||
|
||||
# Check if master grid already exists
|
||||
cat("\n[4] Checking if master grid exists...\n")
|
||||
cat("\n[5] Checking if master grid exists...\n")
|
||||
|
||||
master_grid_file <- file.path(OUTPUT_FOLDER, "master_grid_5x5.geojson")
|
||||
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
||||
|
||||
if (file.exists(master_grid_file)) {
|
||||
cat(" ✓ Master grid exists! Loading existing grid...\n")
|
||||
|
|
@ -125,7 +203,7 @@ if (file.exists(master_grid_file)) {
|
|||
cat(" Grid does not exist. Creating new master grid...\n")
|
||||
|
||||
# Create 5×5 grid
|
||||
cat("\n[5] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
|
||||
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
|
||||
|
||||
master_bbox <- st_bbox(c(
|
||||
xmin = master_xmin,
|
||||
|
|
@ -157,56 +235,101 @@ if (file.exists(master_grid_file)) {
|
|||
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
|
||||
cat(" ✓ Master grid saved to: master_grid_5x5.geojson\n")
|
||||
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PART 2: SPLIT EACH TIFF INTO TILES
|
||||
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
|
||||
# ============================================================================
|
||||
|
||||
cat("\n[PART 2] Splitting TIFFs into Tiles\n")
|
||||
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
|
||||
|
||||
cat("\n[6] Splitting TIFFs using master grid...\n")
|
||||
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
|
||||
|
||||
# Check which tiles overlap with any field
|
||||
overlapping_tile_indices <- c()
|
||||
for (tile_idx in 1:nrow(master_grid_sf)) {
|
||||
tile_geom <- master_grid_sf[tile_idx, ]
|
||||
|
||||
# Check overlap with any field
|
||||
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
|
||||
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
|
||||
}
|
||||
}
|
||||
|
||||
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
|
||||
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
|
||||
|
||||
# Create filtered grid with only overlapping tiles
|
||||
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
|
||||
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
|
||||
|
||||
# Convert to SpatVector for makeTiles
|
||||
filtered_grid_vect <- terra::vect(filtered_grid_sf)
|
||||
|
||||
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
|
||||
|
||||
# ============================================================================
|
||||
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
|
||||
# ============================================================================
|
||||
|
||||
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
|
||||
cat("\n[8] Processing each date independently...\n")
|
||||
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
|
||||
|
||||
total_tiles_created <- 0
|
||||
dates_skipped <- 0
|
||||
dates_processed <- 0
|
||||
|
||||
for (file_idx in seq_along(tiff_files)) {
|
||||
tiff_file <- tiff_files[file_idx]
|
||||
date_str <- gsub("\\.tif$", "", tiff_file)
|
||||
|
||||
cat("\n Processing: ", tiff_file, "\n", sep = "")
|
||||
# Create date-specific output folder
|
||||
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
|
||||
|
||||
# Load TIFF
|
||||
# CHECK: Skip if date already processed (RESUME-SAFE)
|
||||
if (dir.exists(date_output_folder)) {
|
||||
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
|
||||
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
|
||||
|
||||
if (length(existing_tiles) > 0) {
|
||||
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
|
||||
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
|
||||
dates_skipped <- dates_skipped + 1
|
||||
next # Skip this date
|
||||
}
|
||||
}
|
||||
|
||||
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
|
||||
dates_processed <- dates_processed + 1
|
||||
|
||||
# Load TIFF for this date only
|
||||
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
|
||||
raster <- terra::rast(tiff_path)
|
||||
|
||||
dims <- dim(raster)
|
||||
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
|
||||
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
|
||||
|
||||
# Create date-specific output folder
|
||||
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
|
||||
if (!dir.exists(date_output_folder)) {
|
||||
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
|
||||
cat(" Splitting into ", N_TILES, " tiles using master grid...\n", sep = "")
|
||||
cat(" Creating ", length(overlapping_tile_indices), " tiles...\n", sep = "")
|
||||
|
||||
# Split using master grid zones
|
||||
# Use makeTiles with FILTERED grid (only overlapping tiles)
|
||||
tiles_list <- terra::makeTiles(
|
||||
x = raster,
|
||||
y = master_grid_vect,
|
||||
y = filtered_grid_vect,
|
||||
filename = file.path(date_output_folder, "tile.tif"),
|
||||
overwrite = TRUE
|
||||
)
|
||||
|
||||
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
|
||||
|
||||
# Rename tiles to [DATE]_[TILE_ID].tif
|
||||
cat(" Renaming tiles...\n")
|
||||
|
||||
for (tile_idx in seq_along(tiles_list)) {
|
||||
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
|
||||
tile_id <- sprintf("%02d", tile_idx)
|
||||
tile_id <- filtered_grid_sf$tile_id[tile_idx]
|
||||
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
|
||||
|
||||
if (file.exists(source_file)) {
|
||||
|
|
@ -214,7 +337,7 @@ for (file_idx in seq_along(tiff_files)) {
|
|||
}
|
||||
}
|
||||
|
||||
cat(" ✓ Saved ", N_TILES, " tiles in folder: ", date_str, "/\n", sep = "")
|
||||
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
|
||||
total_tiles_created <- total_tiles_created + length(tiles_list)
|
||||
}
|
||||
|
||||
|
|
@ -222,7 +345,7 @@ for (file_idx in seq_along(tiff_files)) {
|
|||
# VERIFICATION
|
||||
# ============================================================================
|
||||
|
||||
cat("\n[7] Verifying output...\n")
|
||||
cat("\n[9] Verifying output...\n")
|
||||
|
||||
# Count tiles per date folder
|
||||
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
|
||||
|
|
@ -241,31 +364,44 @@ for (date_folder in date_folders) {
|
|||
# SUMMARY
|
||||
# ============================================================================
|
||||
|
||||
cat("SUMMARY\n")
|
||||
cat("\n\n========== SUMMARY ==========\n")
|
||||
|
||||
cat("\nMaster Grid:\n")
|
||||
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, "=", N_TILES, " tiles\n", sep = "")
|
||||
cat(" - File: master_grid_5x5.geojson\n")
|
||||
cat("\nGrid Configuration:\n")
|
||||
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
|
||||
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
||||
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||||
|
||||
cat("\nTIFF Splitting:\n")
|
||||
cat(" - TIFFs processed: ", length(tiff_files), "\n", sep = "")
|
||||
cat(" - Total tile files created: ", total_tile_files, "\n", sep = "")
|
||||
cat(" - Expected total: ", length(tiff_files) * N_TILES, "\n", sep = "")
|
||||
cat("\nField Filtering:\n")
|
||||
cat(" - Field boundaries loaded from pivot.geojson\n")
|
||||
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
|
||||
cat(" - Significant storage savings for sparse fields!\n")
|
||||
|
||||
cat("\nDirectory Structure:\n")
|
||||
cat(" daily_tiles/\n")
|
||||
cat(" ├── master_grid_5x5.geojson\n")
|
||||
for (date_folder in date_folders) {
|
||||
cat(" ├── ", date_folder, "/\n", sep = "")
|
||||
cat(" │ ├── ", date_folder, "_01.tif\n", sep = "")
|
||||
cat(" │ ├── ", date_folder, "_02.tif\n", sep = "")
|
||||
cat(" │ └── ...\n")
|
||||
cat("\nProcessing Summary:\n")
|
||||
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
|
||||
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
|
||||
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
|
||||
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
|
||||
if (dates_processed > 0) {
|
||||
avg_tiles_per_date <- total_tiles_created / dates_processed
|
||||
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
|
||||
}
|
||||
|
||||
cat("\nKey Properties:\n")
|
||||
cat(" - All tiles align perfectly across dates\n")
|
||||
cat(" - Tile 01 covers same area in all dates\n")
|
||||
cat(" - Date folders provide organizational hierarchy\n")
|
||||
cat(" - Can do time-series analysis per tile\n")
|
||||
cat("\nDirectory Structure:\n")
|
||||
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
|
||||
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
|
||||
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||||
cat(" ├── 2024-01-15/\n")
|
||||
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
|
||||
cat(" │ ├── 2024-01-15_05.tif\n")
|
||||
cat(" │ └── ...\n")
|
||||
cat(" ├── 2024-01-16/\n")
|
||||
cat(" │ └── ...\n")
|
||||
cat(" └── ...\n")
|
||||
|
||||
cat("✓ Script complete!\n")
|
||||
cat("\n⭐ Key Benefits:\n")
|
||||
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
|
||||
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
|
||||
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
|
||||
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
|
||||
|
||||
cat("\n✓ Script complete!\n")
|
||||
|
|
|
|||
Loading…
Reference in a new issue