SmartCane/r_app/01_create_master_grid_and_split_tiffs.R
Timon fc7e5f1ee0 Enhance download progress feedback and optimize tile overlap checks
- Added a progress bar to the tile download process for better user feedback.
- Simplified the tile overlap checking logic in the R script to improve performance and readability.
2026-01-13 11:30:38 +01:00

406 lines
15 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#' Combined: Create master grid and split TIFFs into tiles
#' ====================================================================
#'
#' Purpose:
#' 1. Check all daily TIFFs for matching extents
#' 2. Create master 5×5 grid covering all TIFFs
#' 3. Split each daily TIFF into 25 tiles using the master grid
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
library(terra)
library(sf)
# ============================================================================
# CONFIGURATION
# ============================================================================
PROJECT <- "angata"
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
GRID_NROWS <- 5
GRID_NCOLS <- 5
# Construct grid-specific subfolder path
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
# Load field boundaries for overlap checking
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
# ============================================================================
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
# ============================================================================
cat("\n[PART 1] Creating Master Grid\n")
# Load field boundaries for overlap checking
cat("\n[1] Loading field boundaries from GeoJSON...\n")
if (!file.exists(GEOJSON_PATH)) {
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
"Please ensure ", PROJECT, " has a pivot.geojson file.")
}
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s)\n", sep = "")
# Try to find a name column (could be 'name', 'field', 'field_name', etc.)
field_names <- NA
if ("name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$name
} else if ("field" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field
} else if ("field_name" %in% names(field_boundaries_sf)) {
field_names <- field_boundaries_sf$field_name
} else {
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
}
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
tile_overlaps_fields <- function(tile_extent, field_geoms) {
tryCatch({
# Simple bounding box overlap test - no complex geometry operations
# Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR
# box1.ymax < box2.ymin OR box1.ymin > box2.ymax)
# For each field geometry, check if it overlaps with tile bbox
for (i in seq_len(length(field_geoms))) {
# Skip empty geometries
if (st_is_empty(field_geoms[i])) {
next
}
# Get field bbox
field_bbox <- st_bbox(field_geoms[i])
# Check bbox overlap (simple coordinate comparison)
x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax)
y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax)
if (x_overlap && y_overlap) {
return(TRUE) # Found overlap!
}
}
return(FALSE) # No overlap found
}, error = function(e) {
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
return(TRUE) # Default to including tile if there's an error
})
}
cat("\n[2] Checking TIFF extents...\n")
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
tiff_files <- sort(tiff_files)
if (length(tiff_files) == 0) {
stop("No TIFF files found in ", TIFF_FOLDER)
}
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
cat(" Checking extents... (this may take a while)\n")
# Load all extents - ONE TIME, upfront
extents <- list()
for (i in seq_along(tiff_files)) {
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
raster <- terra::rast(tiff_path)
ext <- terra::ext(raster)
extents[[i]] <- ext
# Progress indicator every 50 files
if (i %% 50 == 0) {
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
}
}
cat(" ✓ All extents loaded\n")
# Check if all extents match
cat("\n[3] Comparing extents...\n")
tolerance <- 1e-8
all_match <- TRUE
first_ext <- extents[[1]]
for (i in 2:length(extents)) {
curr_ext <- extents[[i]]
match <- (
abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
abs(curr_ext$ymax - first_ext$ymax) < tolerance
)
if (!match) {
all_match <- FALSE
cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
"Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
"Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
}
}
if (all_match) {
cat(" ✓ All TIFF extents MATCH perfectly!\n")
} else {
cat(" ⚠️ Extents differ - creating master extent covering all\n")
}
# Create master extent
cat("\n[4] Creating master extent...\n")
master_xmin <- min(sapply(extents, function(e) e$xmin))
master_xmax <- max(sapply(extents, function(e) e$xmax))
master_ymin <- min(sapply(extents, function(e) e$ymin))
master_ymax <- max(sapply(extents, function(e) e$ymax))
x_range_m <- (master_xmax - master_xmin) * 111320
y_range_m <- (master_ymax - master_ymin) * 111320
cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
"Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
# Auto-determine grid size based on ROI dimensions
if (x_range_m < 10000 && y_range_m < 10000) {
cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
GRID_NROWS <- 1
GRID_NCOLS <- 1
} else {
cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
GRID_NROWS <- 5
GRID_NCOLS <- 5
}
N_TILES <- GRID_NROWS * GRID_NCOLS
# Check if master grid already exists
cat("\n[5] Checking if master grid exists...\n")
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
if (file.exists(master_grid_file)) {
cat(" ✓ Master grid exists! Loading existing grid...\n")
master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
master_grid_vect <- terra::vect(master_grid_file)
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
} else {
cat(" Grid does not exist. Creating new master grid...\n")
# Create 5×5 grid
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
master_bbox <- st_bbox(c(
xmin = master_xmin,
xmax = master_xmax,
ymin = master_ymin,
ymax = master_ymax
), crs = 4326)
bbox_sf <- st_as_sfc(master_bbox)
master_grid <- st_make_grid(
bbox_sf,
n = c(GRID_NCOLS, GRID_NROWS),
what = "polygons"
)
master_grid_sf <- st_sf(
tile_id = sprintf("%02d", 1:length(master_grid)),
geometry = master_grid
)
cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
# Convert to SpatVector for use in makeTiles
master_grid_vect <- terra::vect(master_grid_sf)
# Save master grid
if (!dir.exists(OUTPUT_FOLDER)) {
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
}
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
}
# ============================================================================
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
# ============================================================================
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
# Check which tiles overlap with any field
overlapping_tile_indices <- c()
for (tile_idx in 1:nrow(master_grid_sf)) {
tile_geom <- master_grid_sf[tile_idx, ]
# Check overlap with any field
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
}
}
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
# Create filtered grid with only overlapping tiles
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
# Convert to SpatVector for makeTiles
filtered_grid_vect <- terra::vect(filtered_grid_sf)
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
# ============================================================================
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
# ============================================================================
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
cat("\n[8] Processing each date independently...\n")
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
total_tiles_created <- 0
dates_skipped <- 0
dates_processed <- 0
for (file_idx in seq_along(tiff_files)) {
tiff_file <- tiff_files[file_idx]
date_str <- gsub("\\.tif$", "", tiff_file)
# Create date-specific output folder
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
# CHECK: Skip if date already processed (RESUME-SAFE)
if (dir.exists(date_output_folder)) {
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
if (length(existing_tiles) > 0) {
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
dates_skipped <- dates_skipped + 1
next # Skip this date
}
}
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
dates_processed <- dates_processed + 1
# Load TIFF for this date only
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
raster <- terra::rast(tiff_path)
dims <- dim(raster)
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
# Create date-specific output folder
if (!dir.exists(date_output_folder)) {
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
}
cat(" Creating ", length(overlapping_tile_indices), " tiles...\n", sep = "")
# Use makeTiles with FILTERED grid (only overlapping tiles)
tiles_list <- terra::makeTiles(
x = raster,
y = filtered_grid_vect,
filename = file.path(date_output_folder, "tile.tif"),
overwrite = TRUE
)
# Rename tiles to [DATE]_[TILE_ID].tif
for (tile_idx in seq_along(tiles_list)) {
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
tile_id <- filtered_grid_sf$tile_id[tile_idx]
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
if (file.exists(source_file)) {
file.rename(source_file, final_file)
}
}
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
total_tiles_created <- total_tiles_created + length(tiles_list)
}
# ============================================================================
# VERIFICATION
# ============================================================================
cat("\n[9] Verifying output...\n")
# Count tiles per date folder
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
date_folders <- sort(date_folders[date_folders != "."])
total_tile_files <- 0
for (date_folder in date_folders) {
tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
pattern = "\\.tif$")
tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
total_tile_files <- total_tile_files + length(tiles_in_folder)
cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
}
# ============================================================================
# SUMMARY
# ============================================================================
cat("\n\n========== SUMMARY ==========\n")
cat("\nGrid Configuration:\n")
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat("\nField Filtering:\n")
cat(" - Field boundaries loaded from pivot.geojson\n")
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
cat(" - Significant storage savings for sparse fields!\n")
cat("\nProcessing Summary:\n")
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
if (dates_processed > 0) {
avg_tiles_per_date <- total_tiles_created / dates_processed
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
}
cat("\nDirectory Structure:\n")
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
cat(" ├── 2024-01-15/\n")
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
cat(" │ ├── 2024-01-15_05.tif\n")
cat(" │ └── ...\n")
cat(" ├── 2024-01-16/\n")
cat(" │ └── ...\n")
cat(" └── ...\n")
cat("\n⭐ Key Benefits:\n")
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
cat("\n✓ Script complete!\n")