- Fixed CI calculation: changed from NDVI (NIR-Red)/(NIR+Red) to correct NIR/Green-1 formula in: * process_single_tile() function * create_ci_band() utility function * Updated create_mask_and_crop() documentation - Renamed numbered shell scripts for clarity (matching R script numbering): * 01_run_planet_download -> 10_planet_download.sh * 02_run_ci_extraction -> 20_ci_extraction.sh * 03_run_growth_model -> 30_growth_model.sh * 04_run_mosaic_creation -> 40_mosaic_creation.sh * 09_run_calculate_kpis -> 80_calculate_kpis.sh * 10_run_kpi_report -> 90_kpi_report.sh - Archived obsolete shell scripts to old_sh/: * build_mosaic.sh, build_report.sh, interpolate_growth_model.sh * 05_run_dashboard_report.sh, 06_run_crop_messaging.sh * 11_run_yield_prediction.sh/ps1 * runcane.sh, runpython.sh, smartcane.sh, update_RDS.sh - Deleted test/debug files and temporary outputs: * analyze_*.R, benchmark_gpu_vs_cpu.py, convert_angata_harvest.py * debug_mosaic.R, examine_kpi_results.R, generate_sar_report.R * inspect_8band_structure.R, inspect_tif_bands.R * old_working_utils.R, predict_harvest_operational.R * run_kpi_calculation.R, run_report.R, simple_sar_test.R * data_validation_tool/, harvest_ci_pattern_analysis.png, kpi_debug.out - Enhanced harvest prediction: Added threshold tuning (0.40-0.45) and field type handling - Enhanced mosaic creation: Improved tile detection and routing logic
434 lines
16 KiB
R
434 lines
16 KiB
R
#' Combined: Create master grid and split TIFFs into tiles
|
||
#' ====================================================================
|
||
#'
|
||
#' Purpose:
|
||
#' 1. Check all daily TIFFs for matching extents
|
||
#' 2. Create master 5×5 grid covering all TIFFs
|
||
#' 3. Split each daily TIFF into 25 tiles using the master grid
|
||
#' 4. Save tiles in date-specific folders: daily_tiles/[DATE]/[DATE]_[TILE_ID].tif
|
||
|
||
library(terra)
|
||
library(sf)
|
||
|
||
# ============================================================================
|
||
# CONFIGURATION
|
||
# ============================================================================
|
||
|
||
PROJECT <- "angata"
|
||
TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
|
||
|
||
# GRID SIZE CONFIGURATION - Change this to use different grid sizes
|
||
# Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
|
||
# This determines the subfolder: daily_tiles_split/5x5/, daily_tiles_split/10x10/, etc.
|
||
GRID_NROWS <- 5
|
||
GRID_NCOLS <- 5
|
||
|
||
# Construct grid-specific subfolder path
|
||
GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
|
||
OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
|
||
|
||
# Load field boundaries for overlap checking
|
||
GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
|
||
|
||
cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
|
||
cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
||
|
||
# ============================================================================
|
||
# PART 1: CHECK TIFF EXTENTS AND CREATE MASTER GRID
|
||
# ============================================================================
|
||
|
||
cat("\n[PART 1] Creating Master Grid\n")
|
||
|
||
# Load field boundaries for overlap checking
|
||
cat("\n[1] Loading field boundaries from GeoJSON...\n")
|
||
|
||
if (!file.exists(GEOJSON_PATH)) {
|
||
stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
|
||
"Please ensure ", PROJECT, " has a pivot.geojson file.")
|
||
}
|
||
|
||
field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
|
||
field_boundaries_vect <- terra::vect(GEOJSON_PATH)
|
||
|
||
cat(" ✓ Loaded ", nrow(field_boundaries_sf), " field(s)\n", sep = "")
|
||
|
||
# Try to find a name column (could be 'name', 'field', 'field_name', etc.)
|
||
field_names <- NA
|
||
if ("name" %in% names(field_boundaries_sf)) {
|
||
field_names <- field_boundaries_sf$name
|
||
} else if ("field" %in% names(field_boundaries_sf)) {
|
||
field_names <- field_boundaries_sf$field
|
||
} else if ("field_name" %in% names(field_boundaries_sf)) {
|
||
field_names <- field_boundaries_sf$field_name
|
||
} else {
|
||
field_names <- 1:nrow(field_boundaries_sf) # Fall back to indices
|
||
}
|
||
|
||
cat(" Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
|
||
|
||
# Helper function: Check if a tile overlaps with any field (simple bbox overlap)
|
||
tile_overlaps_fields <- function(tile_extent, field_geoms) {
|
||
tryCatch({
|
||
# Simple bounding box overlap test - no complex geometry operations
|
||
# Two boxes overlap if: NOT (box1.xmax < box2.xmin OR box1.xmin > box2.xmax OR
|
||
# box1.ymax < box2.ymin OR box1.ymin > box2.ymax)
|
||
|
||
# For each field geometry, check if it overlaps with tile bbox
|
||
for (i in seq_len(length(field_geoms))) {
|
||
# Skip empty geometries
|
||
if (st_is_empty(field_geoms[i])) {
|
||
next
|
||
}
|
||
|
||
# Get field bbox
|
||
field_bbox <- st_bbox(field_geoms[i])
|
||
|
||
# Check bbox overlap (simple coordinate comparison)
|
||
x_overlap <- !(tile_extent$xmax < field_bbox$xmin || tile_extent$xmin > field_bbox$xmax)
|
||
y_overlap <- !(tile_extent$ymax < field_bbox$ymin || tile_extent$ymin > field_bbox$ymax)
|
||
|
||
if (x_overlap && y_overlap) {
|
||
return(TRUE) # Found overlap!
|
||
}
|
||
}
|
||
|
||
return(FALSE) # No overlap found
|
||
|
||
}, error = function(e) {
|
||
cat(" ⚠️ Error checking overlap: ", e$message, "\n", sep = "")
|
||
return(TRUE) # Default to including tile if there's an error
|
||
})
|
||
}
|
||
|
||
cat("\n[2] Checking TIFF extents...\n")
|
||
|
||
tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
|
||
tiff_files <- sort(tiff_files)
|
||
|
||
if (length(tiff_files) == 0) {
|
||
stop("No TIFF files found in ", TIFF_FOLDER)
|
||
}
|
||
|
||
cat(" Found ", length(tiff_files), " TIFF file(s)\n", sep = "")
|
||
cat(" Checking extents... (this may take a while)\n")
|
||
|
||
# Load all extents - ONE TIME, upfront
|
||
extents <- list()
|
||
for (i in seq_along(tiff_files)) {
|
||
tiff_path <- file.path(TIFF_FOLDER, tiff_files[i])
|
||
raster <- terra::rast(tiff_path)
|
||
ext <- terra::ext(raster)
|
||
extents[[i]] <- ext
|
||
|
||
# Progress indicator every 50 files
|
||
if (i %% 50 == 0) {
|
||
cat(" Checked ", i, "/", length(tiff_files), " files\n", sep = "")
|
||
}
|
||
}
|
||
|
||
cat(" ✓ All extents loaded\n")
|
||
|
||
# Check if all extents match
|
||
cat("\n[3] Comparing extents...\n")
|
||
|
||
tolerance <- 1e-8
|
||
all_match <- TRUE
|
||
first_ext <- extents[[1]]
|
||
|
||
for (i in 2:length(extents)) {
|
||
curr_ext <- extents[[i]]
|
||
match <- (
|
||
abs(curr_ext$xmin - first_ext$xmin) < tolerance &&
|
||
abs(curr_ext$xmax - first_ext$xmax) < tolerance &&
|
||
abs(curr_ext$ymin - first_ext$ymin) < tolerance &&
|
||
abs(curr_ext$ymax - first_ext$ymax) < tolerance
|
||
)
|
||
if (!match) {
|
||
all_match <- FALSE
|
||
cat(" ✗ Extent mismatch: ", tiff_files[1], " vs ", tiff_files[i], "\n", sep = "")
|
||
cat(" File 1: X [", round(first_ext$xmin, 6), ", ", round(first_ext$xmax, 6), "] ",
|
||
"Y [", round(first_ext$ymin, 6), ", ", round(first_ext$ymax, 6), "]\n", sep = "")
|
||
cat(" File ", i, ": X [", round(curr_ext$xmin, 6), ", ", round(curr_ext$xmax, 6), "] ",
|
||
"Y [", round(curr_ext$ymin, 6), ", ", round(curr_ext$ymax, 6), "]\n", sep = "")
|
||
}
|
||
}
|
||
|
||
if (all_match) {
|
||
cat(" ✓ All TIFF extents MATCH perfectly!\n")
|
||
} else {
|
||
cat(" ⚠️ Extents differ - creating master extent covering all\n")
|
||
}
|
||
|
||
# Create master extent
|
||
cat("\n[4] Creating master extent...\n")
|
||
|
||
master_xmin <- min(sapply(extents, function(e) e$xmin))
|
||
master_xmax <- max(sapply(extents, function(e) e$xmax))
|
||
master_ymin <- min(sapply(extents, function(e) e$ymin))
|
||
master_ymax <- max(sapply(extents, function(e) e$ymax))
|
||
|
||
x_range_m <- (master_xmax - master_xmin) * 111320
|
||
y_range_m <- (master_ymax - master_ymin) * 111320
|
||
|
||
cat(" Master extent: X [", round(master_xmin, 6), ", ", round(master_xmax, 6), "] ",
|
||
"Y [", round(master_ymin, 6), ", ", round(master_ymax, 6), "]\n", sep = "")
|
||
cat(" Coverage: ", round(x_range_m / 1000, 1), "km × ", round(y_range_m / 1000, 1), "km\n", sep = "")
|
||
|
||
# Auto-determine grid size based on ROI dimensions
|
||
if (x_range_m < 10000 && y_range_m < 10000) {
|
||
cat("\n ⚠️ ROI is small (< 10×10 km). Using single tile (1×1 grid) - no splitting needed!\n")
|
||
GRID_NROWS <- 1
|
||
GRID_NCOLS <- 1
|
||
} else {
|
||
cat("\n ROI size allows tiling. Using 5×5 grid (25 tiles per date).\n")
|
||
GRID_NROWS <- 5
|
||
GRID_NCOLS <- 5
|
||
}
|
||
|
||
N_TILES <- GRID_NROWS * GRID_NCOLS
|
||
|
||
# Check if master grid already exists
|
||
cat("\n[5] Checking if master grid exists...\n")
|
||
|
||
master_grid_file <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))
|
||
|
||
if (file.exists(master_grid_file)) {
|
||
cat(" ✓ Master grid exists! Loading existing grid...\n")
|
||
master_grid_sf <- st_read(master_grid_file, quiet = TRUE)
|
||
master_grid_vect <- terra::vect(master_grid_file)
|
||
cat(" ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
|
||
} else {
|
||
cat(" Grid does not exist. Creating new master grid...\n")
|
||
|
||
# Create 5×5 grid
|
||
cat("\n[6] Creating ", GRID_NCOLS, "×", GRID_NROWS, " master grid...\n", sep = "")
|
||
|
||
master_bbox <- st_bbox(c(
|
||
xmin = master_xmin,
|
||
xmax = master_xmax,
|
||
ymin = master_ymin,
|
||
ymax = master_ymax
|
||
), crs = 4326)
|
||
|
||
bbox_sf <- st_as_sfc(master_bbox)
|
||
|
||
master_grid <- st_make_grid(
|
||
bbox_sf,
|
||
n = c(GRID_NCOLS, GRID_NROWS),
|
||
what = "polygons"
|
||
)
|
||
|
||
master_grid_sf <- st_sf(
|
||
tile_id = sprintf("%02d", 1:length(master_grid)),
|
||
geometry = master_grid
|
||
)
|
||
|
||
cat(" ✓ Created grid with ", length(master_grid), " cells\n", sep = "")
|
||
|
||
# Convert to SpatVector for use in makeTiles
|
||
master_grid_vect <- terra::vect(master_grid_sf)
|
||
|
||
# Save master grid
|
||
if (!dir.exists(OUTPUT_FOLDER)) {
|
||
dir.create(OUTPUT_FOLDER, recursive = TRUE, showWarnings = FALSE)
|
||
}
|
||
st_write(master_grid_sf, master_grid_file, delete_dsn = TRUE, quiet = TRUE)
|
||
cat(" ✓ Master grid saved to: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||
}
|
||
|
||
# ============================================================================
|
||
# PART 2: CREATE FILTERED GRID (ONLY OVERLAPPING TILES)
|
||
# ============================================================================
|
||
|
||
cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")
|
||
|
||
cat("\n[7] Filtering master grid to only overlapping tiles...\n")
|
||
|
||
# Check which tiles overlap with any field
|
||
overlapping_tile_indices <- c()
|
||
for (tile_idx in 1:nrow(master_grid_sf)) {
|
||
tile_geom <- master_grid_sf[tile_idx, ]
|
||
|
||
# Check overlap with any field
|
||
if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
|
||
overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
|
||
}
|
||
}
|
||
|
||
cat(" Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
|
||
cat(" Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
|
||
|
||
# Create filtered grid with only overlapping tiles
|
||
filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
|
||
filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
|
||
|
||
# Convert to SpatVector for makeTiles
|
||
filtered_grid_vect <- terra::vect(filtered_grid_sf)
|
||
|
||
cat(" ✓ Filtered grid ready: ", nrow(filtered_grid_sf), " tiles to create per date\n", sep = "")
|
||
|
||
# ============================================================================
|
||
# PART 3: SPLIT EACH TIFF INTO TILES (INDEPENDENT, PER-DATE, RESUMABLE)
|
||
# ============================================================================
|
||
|
||
cat("\n[PART 3] Tiling Individual Dates (Per-Date Processing)\n")
|
||
cat("\n[8] Processing each date independently...\n")
|
||
cat(" (This process is RESUMABLE - you can stop and restart anytime)\n\n")
|
||
|
||
total_tiles_created <- 0
|
||
dates_skipped <- 0
|
||
dates_processed <- 0
|
||
|
||
for (file_idx in seq_along(tiff_files)) {
|
||
tiff_file <- tiff_files[file_idx]
|
||
date_str <- gsub("\\.tif$", "", tiff_file)
|
||
|
||
# Create date-specific output folder
|
||
date_output_folder <- file.path(OUTPUT_FOLDER, date_str)
|
||
|
||
# CHECK: Skip if date already processed (RESUME-SAFE)
|
||
if (dir.exists(date_output_folder)) {
|
||
existing_tiles <- list.files(date_output_folder, pattern = "\\.tif$")
|
||
existing_tiles <- existing_tiles[!grepl("master_grid", existing_tiles)]
|
||
|
||
if (length(existing_tiles) > 0) {
|
||
cat("[", file_idx, "/", length(tiff_files), "] SKIP: ", date_str,
|
||
" (", length(existing_tiles), " tiles already exist)\n", sep = "")
|
||
dates_skipped <- dates_skipped + 1
|
||
next # Skip this date
|
||
}
|
||
}
|
||
|
||
cat("[", file_idx, "/", length(tiff_files), "] Processing: ", date_str, "\n", sep = "")
|
||
dates_processed <- dates_processed + 1
|
||
|
||
# Load TIFF for this date only
|
||
tiff_path <- file.path(TIFF_FOLDER, tiff_file)
|
||
raster <- terra::rast(tiff_path)
|
||
|
||
dims <- dim(raster)
|
||
cat(" Dimensions: ", dims[2], "×", dims[1], " pixels\n", sep = "")
|
||
|
||
# Create date-specific output folder
|
||
if (!dir.exists(date_output_folder)) {
|
||
dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
|
||
}
|
||
|
||
cat(" Creating ", length(overlapping_tile_indices), " tiles...\n", sep = "")
|
||
|
||
# Use makeTiles with FILTERED grid (only overlapping tiles)
|
||
tiles_list <- terra::makeTiles(
|
||
x = raster,
|
||
y = filtered_grid_vect,
|
||
filename = file.path(date_output_folder, "tile.tif"),
|
||
overwrite = TRUE
|
||
)
|
||
|
||
# Rename tiles to [DATE]_[TILE_ID].tif
|
||
for (tile_idx in seq_along(tiles_list)) {
|
||
source_file <- file.path(date_output_folder, paste0("tile", tile_idx, ".tif"))
|
||
tile_id <- filtered_grid_sf$tile_id[tile_idx]
|
||
final_file <- file.path(date_output_folder, paste0(date_str, "_", tile_id, ".tif"))
|
||
|
||
if (file.exists(source_file)) {
|
||
file.rename(source_file, final_file)
|
||
}
|
||
}
|
||
|
||
cat(" ✓ Created ", length(tiles_list), " tiles\n", sep = "")
|
||
total_tiles_created <- total_tiles_created + length(tiles_list)
|
||
}
|
||
|
||
# ============================================================================
|
||
# VERIFICATION
|
||
# ============================================================================
|
||
|
||
cat("\n[9] Verifying output...\n")
|
||
|
||
# Count tiles per date folder
|
||
date_folders <- list.dirs(OUTPUT_FOLDER, full.names = FALSE, recursive = FALSE)
|
||
date_folders <- sort(date_folders[date_folders != "."])
|
||
|
||
total_tile_files <- 0
|
||
for (date_folder in date_folders) {
|
||
tiles_in_folder <- list.files(file.path(OUTPUT_FOLDER, date_folder),
|
||
pattern = "\\.tif$")
|
||
tiles_in_folder <- tiles_in_folder[!grepl("master_grid", tiles_in_folder)]
|
||
total_tile_files <- total_tile_files + length(tiles_in_folder)
|
||
cat(" ", date_folder, ": ", length(tiles_in_folder), " tiles\n", sep = "")
|
||
}
|
||
|
||
# ============================================================================
|
||
# SUMMARY
|
||
# ============================================================================
|
||
|
||
cat("\n\n========== SUMMARY ==========\n")
|
||
|
||
cat("\nGrid Configuration:\n")
|
||
cat(" - Dimensions: ", GRID_NCOLS, "×", GRID_NROWS, " = ", N_TILES, " total tile positions\n", sep = "")
|
||
cat(" - Storage subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
|
||
cat(" - Master grid file: master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||
|
||
cat("\nField Filtering:\n")
|
||
cat(" - Field boundaries loaded from pivot.geojson\n")
|
||
cat(" - Only overlapping tiles created (empty tiles deleted)\n")
|
||
cat(" - Significant storage savings for sparse fields!\n")
|
||
|
||
cat("\nProcessing Summary:\n")
|
||
cat(" - Total TIFF files: ", length(tiff_files), "\n", sep = "")
|
||
cat(" - Dates skipped (already processed): ", dates_skipped, "\n", sep = "")
|
||
cat(" - Dates processed: ", dates_processed, "\n", sep = "")
|
||
cat(" - Total tiles created: ", total_tiles_created, "\n", sep = "")
|
||
if (dates_processed > 0) {
|
||
avg_tiles_per_date <- total_tiles_created / dates_processed
|
||
cat(" - Average tiles per date: ", round(avg_tiles_per_date, 1), "\n", sep = "")
|
||
}
|
||
|
||
cat("\nDirectory Structure:\n")
|
||
cat(" laravel_app/storage/app/", PROJECT, "/daily_tiles_split/\n", sep = "")
|
||
cat(" └── ", GRID_SIZE_LABEL, "/\n", sep = "")
|
||
cat(" ├── master_grid_", GRID_SIZE_LABEL, ".geojson\n", sep = "")
|
||
cat(" ├── 2024-01-15/\n")
|
||
cat(" │ ├── 2024-01-15_01.tif (only overlapping tiles)\n")
|
||
cat(" │ ├── 2024-01-15_05.tif\n")
|
||
cat(" │ └── ...\n")
|
||
cat(" ├── 2024-01-16/\n")
|
||
cat(" │ └── ...\n")
|
||
cat(" └── ...\n")
|
||
|
||
cat("\n⭐ Key Benefits:\n")
|
||
cat(" ✓ Overlap-filtered: No wasted empty tiles\n")
|
||
cat(" ✓ Skip existing dates: Resume-safe, idempotent\n")
|
||
cat(" ✓ Grid versioning: Future 10x10 grids stored separately\n")
|
||
cat(" ✓ Disk efficient: Storage reduced for sparse ROIs\n")
|
||
|
||
# ============================================================================
|
||
# WRITE TILING CONFIGURATION METADATA
|
||
# ============================================================================
|
||
# This metadata file is read by parameters_project.R to determine mosaic mode
|
||
# It allows script 40 to know what script 10 decided without re-computing
|
||
|
||
cat("\n[10] Writing tiling configuration metadata...\n")
|
||
|
||
config_file <- file.path(OUTPUT_FOLDER, "tiling_config.json")
|
||
config_json <- paste0(
|
||
'{\n',
|
||
' "project": "', PROJECT, '",\n',
|
||
' "has_tiles": ', tolower(N_TILES > 1), ',\n',
|
||
' "grid_size": "', GRID_SIZE_LABEL, '",\n',
|
||
' "grid_rows": ', GRID_NROWS, ',\n',
|
||
' "grid_cols": ', GRID_NCOLS, ',\n',
|
||
' "roi_width_km": ', round(x_range_m / 1000, 1), ',\n',
|
||
' "roi_height_km": ', round(y_range_m / 1000, 1), ',\n',
|
||
' "created_date": "', Sys.Date(), '",\n',
|
||
' "created_time": "', format(Sys.time(), "%H:%M:%S"), '"\n',
|
||
'}\n'
|
||
)
|
||
|
||
writeLines(config_json, config_file)
|
||
cat(" ✓ Metadata saved to: tiling_config.json\n")
|
||
cat(" - has_tiles: ", tolower(N_TILES > 1), "\n", sep = "")
|
||
cat(" - grid_size: ", GRID_SIZE_LABEL, "\n", sep = "")
|
||
|
||
cat("\n✓ Script complete!\n")
|