#' Combined: Create master grid and split TIFFs into tiles #' ==================================================================== #' #' Purpose: #' 1. Check all daily TIFFs for matching extents #' 2. Create master 5×5 grid covering all TIFFs #' 3. Split each daily TIFF into 25 tiles using the master grid #' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif #' & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-18 library(terra) library(sf) # ============================================================================ # CONFIGURATION & COMMAND-LINE ARGUMENTS # ============================================================================ # Parse command-line arguments for date filtering args <- commandArgs(trailingOnly = TRUE) # Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17 start_date <- NULL end_date <- NULL if (length(args) >= 1) { start_date <- as.Date(args[1]) cat("Filtering: start date =", as.character(start_date), "\n") } if (length(args) >= 2) { end_date <- as.Date(args[2]) cat("Filtering: end date =", as.character(end_date), "\n") } PROJECT <- "angata" TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b") # GRID SIZE CONFIGURATION - Change this to use different grid sizes # Options: 5x5 (25 tiles), 10x10 (100 tiles), etc. # This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc. GRID_NROWS <- 5 GRID_NCOLS <- 5 # Construct grid-specific subfolder path GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS) OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL) # Load field boundaries for overlap checking GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson") cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "") cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") # ============================================================================ # PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID # ============================================================================ cat("\n[PART 1] Creating Master Grid\n") # Load field boundaries for overlap checking cat("\n[1] Checking for existing master grid...\n") # Check if master grid already exists MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson")) if (file.exists(MASTER_GRID_PATH)) { cat(" ✓ Found existing master grid at:\n ", MASTER_GRID_PATH, "\n", sep = "") master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE) field_boundaries_sf <- NULL # No need to load pivot.geojson field_boundaries_vect <- NULL cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "") } else { # No existing grid - need to create one from pivot.geojson cat(" No existing grid found. Creating new one from pivot.geojson...\n") if (!file.exists(GEOJSON_PATH)) { stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n", "Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ", "from the same directory as a previous successful run (grid already exists).") } field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE) field_boundaries_vect <- terra::vect(GEOJSON_PATH) cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "") } # Try to find a name column (only if field_boundaries_sf exists) if (!is.null(field_boundaries_sf)) { field_names <- NA if ("name" %in% names(field_boundaries_sf)) { field_names <- field_boundaries_sf$name } else if ("field" %in% names(field_boundaries_sf)) { field_names <- field_boundaries_sf$field } else if ("field_name" %in% names(field_boundaries_sf)) { field_names <- field_boundaries_sf$field_name } else { field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices } cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "") } # Helper function: Check if a tile overlaps with any field (simple bbox overlap) tile_overlaps_fields <- function(tile_extent, field_geoms) { tryCatch({ # Simple bounding box overlap test - no complex geometry operations # Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR # box1.ymax < box2.ymin OR box1.ymin > box2.ymax) # For each field geometry, check if it overlaps with tile bbox for (i in seq_len(length(field_geoms))) { # Skip empty geometries if (st_is_empty(field_geoms[i])) { next } # Get field bbox field_bbox <- st_bbox(field_geoms[i]) # Check bbox overlap (simple coordinate comparison) x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax) y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax) if (x_overlap && y_overlap) { return(TRUE) # Found overlap! } } return(FALSE) # No overlap found }, error = function(e) { cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "") return(TRUE) # Default to including tile if there's an error }) } cat("\n[2] Checking TIFF extents...\n") tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE) tiff_files <- sort(tiff_files) # Filter by date range if specified if (!is.null(start_date) || !is.null(end_date)) { cat("\nApplying date filter...\n") file_dates <- as.Date(sub("\\.tif$", "", tiff_files)) if (!is.null(start_date) && !is.null(end_date)) { keep_idx <- file_dates >= start_date & file_dates <= end_date cat(" Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "") } else if (!is.null(start_date)) { keep_idx <- file_dates >= start_date cat(" From: ", as.character(start_date), "\n", sep = "") } else { keep_idx <- file_dates <= end_date cat(" Until: ", as.character(end_date), "\n", sep = "") } tiff_files <- tiff_files[keep_idx] cat(" ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "") } if (length(tiff_files) == 0) { stop("No TIFF files found in ", TIFF_FOLDER) } cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "") cat(" Checking extents... (this may take a while)\n") # Load all extents - ONE TIME, upfront extents <- list() for (i in seq_along(tiff_files)) { tiff_path <- file.path(TIFF_FOLDER, tiff_files[i]) raster <- terra::rast(tiff_path) ext <- terra::ext(raster) extents[[i]] <- ext # Progress indicator every 50 files if (i %% 50 == 0) { cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "") } } cat(" ✓ All extents loaded\n") # Check if all extents match cat("\n[3] Comparing extents...\n") tolerance <- 1e-8 all_match <- TRUE first_ext <- extents[[1]] for (i in 2:length(extents)) { curr_ext <- extents[[i]] match <- ( abs(curr_ext$xmin - first_ext$xmin) < tolerance && abs(curr_ext$xmax - first_ext$xmax) < tolerance && abs(curr_ext$ymin - first_ext$ymin) < tolerance && abs(curr_ext$ymax - first_ext$ymax) < tolerance ) if (!match) { all_match <- FALSE cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "") cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ", "Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "") cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ", "Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "") } } if (all_match) { cat(" ✓ All TIFF extents MATCH perfectly!\n") } else { cat(" ⚠️ Extents differ - creating master extent covering all\n") } # Create master extent cat("\n[4] Creating master extent...\n") master_xmin <- min(sapply(extents, function(e) e$xmin)) master_xmax <- max(sapply(extents, function(e) e$xmax)) master_ymin <- min(sapply(extents, function(e) e$ymin)) master_ymax <- max(sapply(extents, function(e) e$ymax)) x_range_m <- (master_xmax - master_xmin) * 111320 y_range_m <- (master_ymax - master_ymin) * 111320 cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ", "Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "") cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "") # Auto-determine grid size based on ROI dimensions if (x_range_m < 10000 && y_range_m < 10000) { cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n") GRID_NROWS <- 1 GRID_NCOLS <- 1 } else { cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n") GRID_NROWS <- 5 GRID_NCOLS <- 5 } N_TILES <- GRID_NROWS * GRID_NCOLS # Check if master grid already exists cat("\n[5] Checking if master grid exists...\n") master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson")) if (file.exists(master_grid_file)) { cat(" ✓ Master grid exists! Loading existing grid...\n") master_grid_sf <- st_read(master_grid_file, quiet = TRUE) master_grid_vect <- terra::vect(master_grid_file) cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "") } else { cat(" Grid does not exist. Creating new master grid...\n") # Create 5×5 grid cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "") master_bbox <- st_bbox(c( xmin = master_xmin, xmax = master_xmax, ymin = master_ymin, ymax = master_ymax ), crs = 4326) bbox_sf <- st_as_sfc(master_bbox) master_grid <- st_make_grid( bbox_sf, n = c(GRID_NCOLS, GRID_NROWS), what = "polygons" ) master_grid_sf <- st_sf( tile_id = sprintf("%02d", 1:length(master_grid)), geometry = master_grid ) cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "") # Convert to SpatVector for use in makeTiles master_grid_vect <- terra::vect(master_grid_sf) # Save master grid if (!dir.exists(OUTPUT_FOLDER)) { dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE) } st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE) cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") } # ============================================================================ # PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES) # ============================================================================ cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n") # If grid was loaded from file, it's already filtered. Skip filtering. if (!file.exists(MASTER_GRID_PATH)) { cat("\n[7] Filtering master grid to only overlapping tiles...\n") # Check which tiles overlap with any field overlapping_tile_indices <- c() for (tile_idx in 1:nrow(master_grid_sf)) { tile_geom <- master_grid_sf[tile_idx, ] # Check overlap with any field if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) { overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx) } } cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "") cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "") # Create filtered grid with only overlapping tiles filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ] filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices) } else { cat("\n[7] Using pre-filtered grid (already loaded from file)...\n") # Grid was already loaded - it's already filtered filtered_grid_sf <- master_grid_sf } # Convert to SpatVector for makeTiles filtered_grid_vect <- terra::vect(filtered_grid_sf) cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "") # ============================================================================ # PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE) # ============================================================================ cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n") cat("\n[8] Processing each date independently...\n") cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n") total_tiles_created <- 0 dates_skipped <- 0 dates_processed <- 0 for (file_idx in seq_along(tiff_files)) { tiff_file <- tiff_files[file_idx] date_str <- gsub("\\.tif$", "", tiff_file) # Create date-specific output folder date_output_folder <- file.path(OUTPUT_FOLDER, date_str) # CHECK: Skip if date already processed (RESUME-SAFE) if (dir.exists(date_output_folder)) { existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$") existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)] if (length(existing_tiles) > 0) { cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str, " (", length(existing_tiles), " tiles already exist)\n", sep = "") dates_skipped <- dates_skipped + 1 next # Skip this date } } cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "") dates_processed <- dates_processed + 1 # Load TIFF for this date only tiff_path <- file.path(TIFF_FOLDER, tiff_file) raster <- terra::rast(tiff_path) dims <- dim(raster) cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "") # Create date-specific output folder if (!dir.exists(date_output_folder)) { dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE) } cat(" Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "") # Use makeTiles with FILTERED grid (only overlapping tiles) tiles_list <- terra::makeTiles( x = raster, y = filtered_grid_vect, filename = file.path(date_output_folder, "tile.tif"), overwrite = TRUE ) # Rename tiles to [DATE]_[TILE_ID].tif for (tile_idx in seq_along(tiles_list)) { source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif")) tile_id <- filtered_grid_sf$tile_id[tile_idx] final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif")) if (file.exists(source_file)) { file.rename(source_file, final_file) } } cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "") total_tiles_created <- total_tiles_created + length(tiles_list) } # ============================================================================ # VERIFICATION # ============================================================================ cat("\n[9] Verifying output...\n") # Count tiles per date folder date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE) date_folders <- sort(date_folders[date_folders != "."]) total_tile_files <- 0 for (date_folder in date_folders) { tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder), pattern = "\\.tif$") tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)] total_tile_files <- total_tile_files + length(tiles_in_folder) cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "") } # ============================================================================ # SUMMARY # ============================================================================ cat("\n\n========== SUMMARY ==========\n") cat("\nGrid Configuration:\n") cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "") cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "") cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") cat("\nField Filtering:\n") cat(" - Field boundaries loaded from pivot.geojson\n") cat(" - Only overlapping tiles created (empty tiles deleted)\n") cat(" - Significant storage savings for sparse fields!\n") cat("\nProcessing Summary:\n") cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "") cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "") cat(" - Dates processed: ", dates_processed, "\n", sep = "") cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "") if (dates_processed > 0) { avg_tiles_per_date <- total_tiles_created / dates_processed cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "") } cat("\nDirectory Structure:\n") cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "") cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "") cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "") cat(" ├── 2024-01-15/\n") cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n") cat(" │ ├── 2024-01-15_05.tif\n") cat(" │ └── ...\n") cat(" ├── 2024-01-16/\n") cat(" │ └── ...\n") cat(" └── ...\n") cat("\n⭐ Key Benefits:\n") cat(" ✓ Overlap-filtered: No wasted empty tiles\n") cat(" ✓ Skip existing dates: Resume-safe, idempotent\n") cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n") cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n") # ============================================================================ # WRITE TILING CONFIGURATION METADATA # ============================================================================ # This metadata file is read by parameters_project.R to determine mosaic mode # It allows script 40 to know what script 10 decided without re-computing cat("\n[10] Writing tiling configuration metadata...\n") config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json") config_json <- paste0( '{\n', ' "project": "', PROJECT, '",\n', ' "has_tiles": ', tolower(N_TILES > 1), ',\n', ' "grid_size": "', GRID_SIZE_LABEL, '",\n', ' "grid_rows": ', GRID_NROWS, ',\n', ' "grid_cols": ', GRID_NCOLS, ',\n', ' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n', ' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n', ' "created_date": "', Sys.Date(), '",\n', ' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n', '}\n' ) writeLines(config_json, config_file) cat(" ✓ Metadata saved to: tiling_config.json\n") cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "") cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "") cat("\n✓ Script complete!\n")