From 20e3d5b4a3d9b59b163b3a6980fa059e58521e3d Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 3 Feb 2026 16:34:49 +0100 Subject: [PATCH] worked on sc 110 making a 10 utils file and making general log files. --- r_app/00_common_utils.R | 101 ++++++++- r_app/10_create_per_field_tiffs.R | 183 +--------------- r_app/10_create_per_field_tiffs_utils.R | 265 ++++++++++++++++++++++++ r_app/parameters_project.R | 2 +- 4 files changed, 377 insertions(+), 174 deletions(-) create mode 100644 r_app/10_create_per_field_tiffs_utils.R diff --git a/r_app/00_common_utils.R b/r_app/00_common_utils.R index 1d0125f..3e18784 100644 --- a/r_app/00_common_utils.R +++ b/r_app/00_common_utils.R @@ -175,7 +175,7 @@ load_field_boundaries <- function(data_dir) { if (use_pivot_2) { field_boundaries_path <- here(data_dir, "pivot_2.geojson") } else { - field_boundaries_path <- here(data_dir, "pivot.geojson") + field_boundaries_path <- here(data_dir, "Data", "pivot.geojson") } if (!file.exists(field_boundaries_path)) { @@ -191,6 +191,11 @@ load_field_boundaries <- function(data_dir) { field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) } + # **CRITICAL**: Repair invalid geometries (degenerate vertices, self-intersections, etc.) + # This must happen BEFORE any spatial operations (CRS transform, intersect, crop, etc.) + # to prevent S2 geometry validation errors during downstream processing + field_boundaries_sf <- repair_geojson_geometries(field_boundaries_sf) + # Validate and fix CRS if needed tryCatch({ # Simply assign WGS84 if not already set (safe approach) @@ -396,6 +401,100 @@ date_list <- function(end_date, offset) { )) } +# ============================================================================== +#' Repair Invalid GeoJSON Geometries +#' +#' Fixes common geometry issues in GeoJSON/sf objects: +#' - Degenerate vertices (duplicate points) +#' - Self-intersecting polygons +#' - Invalid ring orientation +#' - Empty or NULL geometries +#' +#' Uses sf::st_make_valid() with buffer trick as fallback. +#' +#' @param sf_object sf object (GeoDataFrame) with potentially invalid geometries +#' @return sf object with repaired geometries +#' +#' @details +#' **Why this matters:** +#' Pivot GeoJSON files sometimes contain degenerate vertices or self-intersecting +#' rings from manual editing or GIS data sources. These cause errors when using +#' S2 geometry (strict validation) during cropping operations. +#' +#' **Repair strategy (priority order):** +#' 1. Try st_make_valid() - GEOS-based repair (most reliable) +#' 2. Fallback: st_union() + buffer(0) - Forces polygon validity +#' 3. Last resort: Silently keep original if repair fails +#' +#' @examples +#' \dontrun{ +#' fields <- st_read("pivot.geojson") +#' fields_fixed <- repair_geojson_geometries(fields) +#' cat(paste("Fixed geometries: before=", +#' nrow(fields[!st_is_valid(fields), ]), +#' ", after=", +#' nrow(fields_fixed[!st_is_valid(fields_fixed), ]))) +#' } +#' +repair_geojson_geometries <- function(sf_object) { + if (!inherits(sf_object, "sf")) { + stop("Input must be an sf (Simple Features) object") + } + + # Count invalid geometries BEFORE repair + invalid_before <- sum(!sf::st_is_valid(sf_object), na.rm = TRUE) + + if (invalid_before == 0) { + safe_log("All geometries already valid - no repair needed", "INFO") + return(sf_object) + } + + safe_log(paste("Found", invalid_before, "invalid geometries - attempting repair"), "WARNING") + + # STRATEGY: Apply st_make_valid() to entire sf object (most reliable for GEOS) + # This handles degenerate vertices, self-intersections, invalid rings while preserving all features + repaired <- tryCatch({ + # st_make_valid() on entire sf object preserves all features and attributes + repaired_geom <- sf::st_make_valid(sf_object) + + # Verify we still have the same number of rows + if (nrow(repaired_geom) != nrow(sf_object)) { + warning("st_make_valid() changed number of features - attempting row-wise repair") + + # Fallback: Repair row-by-row to maintain original structure + repaired_geom <- sf_object + for (i in seq_len(nrow(sf_object))) { + tryCatch({ + if (!sf::st_is_valid(sf_object[i, ])) { + repaired_geom[i, ] <- sf::st_make_valid(sf_object[i, ]) + } + }, error = function(e) { + safe_log(paste("Could not repair row", i, "-", e$message), "WARNING") + }) + } + } + + safe_log("✓ st_make_valid() successfully repaired geometries", "INFO") + repaired_geom + }, error = function(e) { + safe_log(paste("st_make_valid() failed:", e$message), "WARNING") + NULL + }) + + # If repair failed, keep original + if (is.null(repaired)) { + safe_log(paste("Could not repair", invalid_before, "invalid geometries - keeping original"), + "WARNING") + return(sf_object) + } + + # Count invalid geometries AFTER repair + invalid_after <- sum(!sf::st_is_valid(repaired), na.rm = TRUE) + safe_log(paste("Repair complete: before =", invalid_before, ", after =", invalid_after), "INFO") + + return(repaired) +} + # ============================================================================== # END 00_COMMON_UTILS.R # ============================================================================== diff --git a/r_app/10_create_per_field_tiffs.R b/r_app/10_create_per_field_tiffs.R index 808967e..fe8b989 100644 --- a/r_app/10_create_per_field_tiffs.R +++ b/r_app/10_create_per_field_tiffs.R @@ -47,6 +47,7 @@ library(sf) # ============================================================================== source(here::here("r_app", "parameters_project.R")) source(here::here("r_app", "00_common_utils.R")) +source(here::here("r_app", "10_create_per_field_tiffs_utils.R")) # Get project parameter from command line args <- commandArgs(trailingOnly = TRUE) @@ -63,172 +64,10 @@ safe_log(paste("Project:", PROJECT)) safe_log(paste("Base path:", paths$laravel_storage_dir)) safe_log(paste("Data dir:", paths$data_dir)) -# Unified function to crop TIFF to field boundaries -# Called by both migration and processing phases -crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) { - - created <- 0 - skipped <- 0 - errors <- 0 - - # Load raster - if (!file.exists(tif_path)) { - safe_log(paste("ERROR: TIFF not found:", tif_path)) - return(list(created = 0, skipped = 0, errors = 1)) - } - - rast <- tryCatch({ - rast(tif_path) - }, error = function(e) { - safe_log(paste("ERROR loading raster:", e$message)) - return(NULL) - }) - - if (is.null(rast)) { - return(list(created = 0, skipped = 0, errors = 1)) - } - - # Create raster bounding box in raster CRS - rast_bbox <- st_as_sfc(st_bbox(rast)) - st_crs(rast_bbox) <- st_crs(rast) - - # Reproject fields to match raster CRS - fields_reprojected <- st_transform(fields, st_crs(rast_bbox)) - - # Find which fields intersect this raster (CRITICAL: raster bbox first, then fields) - overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE) - overlapping_indices <- unique(unlist(overlapping_indices)) - - if (length(overlapping_indices) == 0) { - safe_log(paste("No fields intersect TIFF:", basename(tif_path))) - return(list(created = 0, skipped = 0, errors = 0)) - } - - # Process each overlapping field - for (field_idx in overlapping_indices) { - field_name <- fields$field_name[field_idx] - field_geom <- fields_reprojected[field_idx, ] - - # Create field directory - field_dir <- file.path(output_base_dir, field_name) - if (!dir.exists(field_dir)) { - dir.create(field_dir, recursive = TRUE, showWarnings = FALSE) - } - - # Output file path - output_path <- file.path(field_dir, paste0(tif_date, ".tif")) - - # Check if file already exists (idempotent) - if (file.exists(output_path)) { - skipped <- skipped + 1 - next - } - - # Crop raster to field boundary - tryCatch({ - field_rast <- crop(rast, field_geom) - writeRaster(field_rast, output_path, overwrite = TRUE) - created <- created + 1 - }, error = function(e) { - safe_log(paste("ERROR cropping field", field_name, ":", e$message)) - errors <<- errors + 1 - }) - } - - return(list(created = created, skipped = skipped, errors = errors)) -} - -# Process new 4-band raw TIFFs from merged_tif -# MIGRATION MODE: If field_tiles_CI/ already populated (from migration), skip those dates -# NORMAL MODE: Otherwise, process merged_tif/ → field_tiles/ -process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL) { - - safe_log("\n========================================") - safe_log("PHASE 2: PROCESSING NEW DOWNLOADS") - safe_log("========================================") - - # Check if download directory exists - if (!dir.exists(merged_tif_dir)) { - safe_log("No merged_tif/ directory found - no new data to process") - return(list(total_created = 0, total_skipped = 0, total_errors = 0)) - } - - # Create output directory - if (!dir.exists(field_tiles_dir)) { - dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE) - } - - # Find all date-pattern TIFFs in root of merged_tif - tiff_files <- list.files( - merged_tif_dir, - pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$", - full.names = TRUE - ) - - safe_log(paste("Found", length(tiff_files), "TIFF(s) to process")) - - if (length(tiff_files) == 0) { - safe_log("No new TIFFs found - nothing to process") - return(list(total_created = 0, total_skipped = 0, total_errors = 0)) - } - - # Process each new TIFF - total_created <- 0 - total_skipped <- 0 - total_errors <- 0 - - for (tif_path in tiff_files) { - tif_date <- gsub("\\.tif$", "", basename(tif_path)) - - # MIGRATION MODE CHECK: Skip if this date was already migrated to field_tiles_CI/ - # (This means Script 20 already processed it and extracted RDS) - if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) { - # Check if ANY field has this date in field_tiles_CI/ - date_migrated <- FALSE - - # Sample check: look for date in field_tiles_CI/*/DATE.tif - sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE) - for (field_dir in sample_field_dirs) { - potential_file <- file.path(field_dir, paste0(tif_date, ".tif")) - if (file.exists(potential_file)) { - date_migrated <- TRUE - break - } - } - - if (date_migrated) { - safe_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)")) - total_skipped <- total_skipped + 1 - next - } - } - - safe_log(paste("Processing:", tif_date)) - - result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir) - total_created <- total_created + result$created - total_skipped <- total_skipped + result$skipped - total_errors <- total_errors + result$errors - } - - safe_log(paste("Processing complete: created =", total_created, - ", skipped =", total_skipped, ", errors =", total_errors)) - - return(list(total_created = total_created, total_skipped = total_skipped, - total_errors = total_errors)) -} - -# ============================================================================ -# ============================================================================== -# MAIN EXECUTION -# ============================================================================== - -safe_log("========================================") -safe_log(paste("Script 10: Per-Field TIFF Creation for", PROJECT)) -safe_log("========================================") - -# Load field boundaries using centralized path (no dir.create needed - already created by setup_project_directories) -fields <- load_field_boundaries(paths$field_boundaries_path) +# Load field boundaries using data_dir (not field_boundaries_path) +# load_field_boundaries() expects a directory and builds the file path internally +fields_data <- load_field_boundaries(paths$data_dir) +fields <- fields_data$field_boundaries_sf # Define input and output directories (from centralized paths) merged_tif_dir <- paths$merged_tif_folder @@ -239,11 +78,11 @@ field_tiles_ci_dir <- paths$field_tiles_ci_dir # Pass field_tiles_ci_dir so it can skip dates already migrated process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir) -safe_log("\n========================================") -safe_log("FINAL SUMMARY") -safe_log("========================================") +safe_log("\n========================================", "INFO") +safe_log("FINAL SUMMARY", "INFO") +safe_log("========================================", "INFO") safe_log(paste("Processing: created =", process_result$total_created, ", skipped =", process_result$total_skipped, - ", errors =", process_result$total_errors)) -safe_log("Script 10 complete") -safe_log("========================================\n") + ", errors =", process_result$total_errors), "INFO") +safe_log("Script 10 complete", "INFO") +safe_log("========================================\n", "INFO") diff --git a/r_app/10_create_per_field_tiffs_utils.R b/r_app/10_create_per_field_tiffs_utils.R new file mode 100644 index 0000000..36f4eb7 --- /dev/null +++ b/r_app/10_create_per_field_tiffs_utils.R @@ -0,0 +1,265 @@ +# ============================================================================== +# 10_CREATE_PER_FIELD_TIFFS_UTILS.R +# ============================================================================== +# UTILITY FUNCTIONS FOR SCRIPT 10: Per-Field TIFF Creation +# +# PURPOSE: +# Extracted helper functions specific to Script 10 (per-field TIFF splitting). +# These functions are domain-specific (not generic) and handle raster cropping +# and batch processing of satellite imagery for per-field storage. +# +# FUNCTIONS: +# 1. crop_tiff_to_fields() — Crop a single TIFF to field boundaries +# 2. process_new_merged_tif() — Batch process all new merged TIFFs into per-field structure +# +# DEPENDENCIES: +# - terra (raster operations) +# - sf (geometry operations) +# - 00_common_utils.R (logging via safe_log()) +# +# USAGE: +# source(here::here("r_app", "10_create_per_field_tiffs_utils.R")) +# +# ============================================================================== + +library(terra) +library(sf) + +# ============================================================================== +#' Crop Single TIFF to Field Boundaries +#' +#' Loads a single satellite TIFF, finds overlapping field boundaries, and writes +#' cropped per-field versions to output directory. Handles CRS reprojection and +#' idempotent file creation (skips existing files). +#' +#' @param tif_path Character. Absolute path to source TIFF file (4-band or 5-band). +#' @param tif_date Character. Date string (YYYY-MM-DD) for naming output files. +#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column. +#' @param output_base_dir Character. Base directory where per-field TIFFs are stored +#' (e.g., "field_tiles" or "field_tiles_CI"). +#' +#' @return List with elements: +#' - created: Integer. Number of field TIFFs created +#' - skipped: Integer. Number of files that already existed +#' - errors: Integer. Number of fields that failed to crop +#' +#' @details +#' **Output Structure:** +#' Creates files at: {output_base_dir}/{field_name}/{tif_date}.tif +#' +#' **CRS Handling:** +#' Automatically reprojects field boundaries to match raster CRS before cropping. +#' +#' **Idempotency:** +#' If output file already exists, skips writing (counted as "skipped"). +#' Safe to re-run without duplicate file creation. +#' +#' @examples +#' \dontrun{ +#' fields <- load_field_boundaries("path/to/pivot.geojson") +#' result <- crop_tiff_to_fields( +#' tif_path = "merged_tif/2024-01-15.tif", +#' tif_date = "2024-01-15", +#' fields = fields, +#' output_base_dir = "field_tiles" +#' ) +#' cat(sprintf("Created: %d, Skipped: %d, Errors: %d\n", +#' result$created, result$skipped, result$errors)) +#' } +#' +crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) { + + created <- 0 + skipped <- 0 + errors <- 0 + + # Load raster + if (!file.exists(tif_path)) { + safe_log(paste("ERROR: TIFF not found:", tif_path), "ERROR") + return(list(created = 0, skipped = 0, errors = 1)) + } + + rast <- tryCatch({ + rast(tif_path) + }, error = function(e) { + safe_log(paste("ERROR loading raster:", e$message), "ERROR") + return(NULL) + }) + + if (is.null(rast)) { + return(list(created = 0, skipped = 0, errors = 1)) + } + + # Create raster bounding box in raster CRS + rast_bbox <- st_as_sfc(st_bbox(rast)) + st_crs(rast_bbox) <- st_crs(rast) + + # Reproject fields to match raster CRS + fields_reprojected <- st_transform(fields, st_crs(rast_bbox)) + + # Find which fields intersect this raster (CRITICAL: raster bbox first, then fields) + overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE) + overlapping_indices <- unique(unlist(overlapping_indices)) + + if (length(overlapping_indices) == 0) { + safe_log(paste("No fields intersect TIFF:", basename(tif_path)), "INFO") + return(list(created = 0, skipped = 0, errors = 0)) + } + + # Process each overlapping field + for (field_idx in overlapping_indices) { + field_name <- fields$field[field_idx] + field_geom <- fields_reprojected[field_idx, ] + + # Create field directory + field_dir <- file.path(output_base_dir, field_name) + if (!dir.exists(field_dir)) { + dir.create(field_dir, recursive = TRUE, showWarnings = FALSE) + } + + # Output file path + output_path <- file.path(field_dir, paste0(tif_date, ".tif")) + + # Check if file already exists (idempotent) + if (file.exists(output_path)) { + skipped <- skipped + 1 + next + } + + # Crop raster to field boundary + tryCatch({ + field_rast <- crop(rast, field_geom) + writeRaster(field_rast, output_path, overwrite = TRUE) + created <- created + 1 + }, error = function(e) { + safe_log(paste("ERROR cropping field", field_name, ":", e$message), "ERROR") + errors <<- errors + 1 + }) + } + + return(list(created = created, skipped = skipped, errors = errors)) +} + +# ============================================================================== +#' Process New Merged TIFFs into Per-Field Structure +#' +#' Batch processes all new 4-band raw satellite TIFFs from merged_tif/ directory. +#' Crops each TIFF to field boundaries and stores results in per-field subdirectories. +#' Supports migration mode: skips dates already processed and migrated to field_tiles_CI/. +#' +#' @param merged_tif_dir Character. Source directory containing raw merged TIFFs +#' (e.g., "merged_tif/"). +#' @param field_tiles_dir Character. Target directory for per-field TIFFs +#' (e.g., "field_tiles/"). +#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column. +#' @param field_tiles_ci_dir Character. Optional. Directory where migrated CI-calculated +#' TIFFs are stored. If provided, skips dates +#' already processed and moved to field_tiles_CI/. +#' Default: NULL (process all dates). +#' +#' @return List with elements: +#' - total_created: Integer. Total field TIFFs created across all dates +#' - total_skipped: Integer. Total files that already existed +#' - total_errors: Integer. Total cropping errors across all dates +#' +#' @details +#' **Migration Logic:** +#' When migration phase is active (field_tiles_CI/ contains CI-calculated TIFFs), +#' this function detects which dates have been migrated and skips reprocessing them. +#' This prevents redundant work during the per-field architecture transition. +#' +#' **Date Detection:** +#' Finds all files in merged_tif_dir matching pattern: YYYY-MM-DD.tif +#' +#' **Output Structure:** +#' Creates per-field subdirectories and stores: field_tiles/{FIELD}/{DATE}.tif +#' +#' @examples +#' \dontrun{ +#' fields <- load_field_boundaries("path/to/pivot.geojson") +#' result <- process_new_merged_tif( +#' merged_tif_dir = "merged_tif", +#' field_tiles_dir = "field_tiles", +#' fields = fields, +#' field_tiles_ci_dir = "field_tiles_CI" +#' ) +#' cat(sprintf("Processing complete: created=%d, skipped=%d, errors=%d\n", +#' result$total_created, result$total_skipped, result$total_errors)) +#' } +#' +process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL) { + + safe_log("\n========================================", "INFO") + safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO") + safe_log("========================================", "INFO") + + # Check if download directory exists + if (!dir.exists(merged_tif_dir)) { + safe_log("No merged_tif/ directory found - no new data to process", "WARNING") + return(list(total_created = 0, total_skipped = 0, total_errors = 0)) + } + + # Create output directory + if (!dir.exists(field_tiles_dir)) { + dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE) + } + + # Find all date-pattern TIFFs in root of merged_tif + tiff_files <- list.files( + merged_tif_dir, + pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$", + full.names = TRUE + ) + + safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO") + + if (length(tiff_files) == 0) { + safe_log("No new TIFFs found - nothing to process", "WARNING") + return(list(total_created = 0, total_skipped = 0, total_errors = 0)) + } + + # Process each new TIFF + total_created <- 0 + total_skipped <- 0 + total_errors <- 0 + + for (tif_path in tiff_files) { + tif_date <- gsub("\\.tif$", "", basename(tif_path)) + + # MIGRATION MODE CHECK: Skip if this date was already migrated to field_tiles_CI/ + # (This means Script 20 already processed it and extracted RDS) + if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) { + # Check if ANY field has this date in field_tiles_CI/ + date_migrated <- FALSE + + # Sample check: look for date in field_tiles_CI/*/DATE.tif + sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE) + for (field_dir in sample_field_dirs) { + potential_file <- file.path(field_dir, paste0(tif_date, ".tif")) + if (file.exists(potential_file)) { + date_migrated <- TRUE + break + } + } + + if (date_migrated) { + safe_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)"), "INFO") + total_skipped <- total_skipped + 1 + next + } + } + + safe_log(paste("Processing:", tif_date), "INFO") + + result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir) + total_created <- total_created + result$created + total_skipped <- total_skipped + result$skipped + total_errors <- total_errors + result$errors + } + + safe_log(paste("Processing complete: created =", total_created, + ", skipped =", total_skipped, ", errors =", total_errors), "INFO") + + return(list(total_created = total_created, total_skipped = total_skipped, + total_errors = total_errors)) +} diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 9caa6ec..a1c7794 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -445,7 +445,7 @@ load_field_boundaries <- function(data_dir) { if (use_pivot_2) { field_boundaries_path <- here(data_dir, "pivot_2.geojson") } else { - field_boundaries_path <- here(data_dir, "pivot.geojson") + field_boundaries_path <- here(data_dir, "Data", "pivot.geojson") } if (!file.exists(field_boundaries_path)) {