SmartCane/r_app/10_create_per_field_tiffs.R
2026-02-03 15:13:21 +01:00

249 lines
9 KiB
R

# ==============================================================================
# SmartCane Script 10: Create Per-Field TIFFs
# ==============================================================================
#
# PURPOSE:
# Split full-farm satellite TIFFs into per-field file structure across TWO phases:
#
# PHASE 1 - MIGRATION (Legacy Data):
# Input: merged_final_tif/{DATE}.tif (5-band: R,G,B,NIR,CI - with CI calculated)
# Output: field_tiles_CI/{FIELD}/{DATE}.tif
# Status: One-time reorganization of existing data; will be removed after 2-3 weeks
#
# PHASE 2 - PROCESSING (New Downloads):
# Input: merged_tif/{DATE}.tif (4-band: R,G,B,NIR - raw from Planet API)
# Output: field_tiles/{FIELD}/{DATE}.tif
# Status: Ongoing for all new downloads; always runs (not conditional)
#
# INTEGRATION WITH DOWNSTREAM SCRIPTS:
# - Script 20 (CI Extraction):
# Reads from field_tiles/{FIELD}/{DATE}.tif
# Adds CI calculation → outputs to field_tiles_CI/{FIELD}/{DATE}.tif (5-band)
# - Script 40 (Mosaic Creation):
# Reads from field_tiles_CI/{FIELD}/{DATE}.tif (via per-field weekly aggregation)
# Creates weekly_mosaic/{FIELD}/week_{WW}.tif
#
# ARCHITECTURE:
# This script uses field/date folder organization:
# field_tiles/
# ├── field_1/
# │ ├── 2024-01-15.tif
# │ └── 2024-01-16.tif
# └── field_2/
# ├── 2024-01-15.tif
# └── 2024-01-16.tif
#
# Benefits: Upstream scripts iterate per-field → per-date, enabling clean
# aggregation for mosaics (Script 40) and KPIs (Script 80/90).
#
# ==============================================================================
library(terra)
library(sf)
# ==============================================================================
# LOAD CENTRALIZED PARAMETERS & PATHS
# ==============================================================================
source(here::here("r_app", "parameters_project.R"))
# Get project parameter from command line
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
PROJECT <- "angata"
} else {
PROJECT <- args[1]
}
# Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(PROJECT)
smartcane_log(paste("Project:", PROJECT))
smartcane_log(paste("Base path:", paths$laravel_storage_dir))
smartcane_log(paste("Data dir:", paths$data_dir))
# Unified function to crop TIFF to field boundaries
# Called by both migration and processing phases
crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {
created <- 0
skipped <- 0
errors <- 0
# Load raster
if (!file.exists(tif_path)) {
smartcane_log(paste("ERROR: TIFF not found:", tif_path))
return(list(created = 0, skipped = 0, errors = 1))
}
rast <- tryCatch({
rast(tif_path)
}, error = function(e) {
smartcane_log(paste("ERROR loading raster:", e$message))
return(NULL)
})
if (is.null(rast)) {
return(list(created = 0, skipped = 0, errors = 1))
}
# Create raster bounding box in raster CRS
rast_bbox <- st_as_sfc(st_bbox(rast))
st_crs(rast_bbox) <- st_crs(rast)
# Reproject fields to match raster CRS
fields_reprojected <- st_transform(fields, st_crs(rast_bbox))
# Find which fields intersect this raster (CRITICAL: raster bbox first, then fields)
overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE)
overlapping_indices <- unique(unlist(overlapping_indices))
if (length(overlapping_indices) == 0) {
smartcane_log(paste("No fields intersect TIFF:", basename(tif_path)))
return(list(created = 0, skipped = 0, errors = 0))
}
# Process each overlapping field
for (field_idx in overlapping_indices) {
field_name <- fields$field_name[field_idx]
field_geom <- fields_reprojected[field_idx, ]
# Create field directory
field_dir <- file.path(output_base_dir, field_name)
if (!dir.exists(field_dir)) {
dir.create(field_dir, recursive = TRUE, showWarnings = FALSE)
}
# Output file path
output_path <- file.path(field_dir, paste0(tif_date, ".tif"))
# Check if file already exists (idempotent)
if (file.exists(output_path)) {
skipped <- skipped + 1
next
}
# Crop raster to field boundary
tryCatch({
field_rast <- crop(rast, field_geom)
writeRaster(field_rast, output_path, overwrite = TRUE)
created <- created + 1
}, error = function(e) {
smartcane_log(paste("ERROR cropping field", field_name, ":", e$message))
errors <<- errors + 1
})
}
return(list(created = created, skipped = skipped, errors = errors))
}
# Process new 4-band raw TIFFs from merged_tif
# MIGRATION MODE: If field_tiles_CI/ already populated (from migration), skip those dates
# NORMAL MODE: Otherwise, process merged_tif/ → field_tiles/
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL) {
smartcane_log("\n========================================")
smartcane_log("PHASE 2: PROCESSING NEW DOWNLOADS")
smartcane_log("========================================")
# Check if download directory exists
if (!dir.exists(merged_tif_dir)) {
smartcane_log("No merged_tif/ directory found - no new data to process")
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
}
# Create output directory
if (!dir.exists(field_tiles_dir)) {
dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE)
}
# Find all date-pattern TIFFs in root of merged_tif
tiff_files <- list.files(
merged_tif_dir,
pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$",
full.names = TRUE
)
smartcane_log(paste("Found", length(tiff_files), "TIFF(s) to process"))
if (length(tiff_files) == 0) {
smartcane_log("No new TIFFs found - nothing to process")
return(list(total_created = 0, total_skipped = 0, total_errors = 0))
}
# Process each new TIFF
total_created <- 0
total_skipped <- 0
total_errors <- 0
for (tif_path in tiff_files) {
tif_date <- gsub("\\.tif$", "", basename(tif_path))
# MIGRATION MODE CHECK: Skip if this date was already migrated to field_tiles_CI/
# (This means Script 20 already processed it and extracted RDS)
if (!is.null(field_tiles_ci_dir) && dir.exists(field_tiles_ci_dir)) {
# Check if ANY field has this date in field_tiles_CI/
date_migrated <- FALSE
# Sample check: look for date in field_tiles_CI/*/DATE.tif
sample_field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
for (field_dir in sample_field_dirs) {
potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
if (file.exists(potential_file)) {
date_migrated <- TRUE
break
}
}
if (date_migrated) {
smartcane_log(paste("Skipping:", tif_date, "(already migrated and processed by Script 20)"))
total_skipped <- total_skipped + 1
next
}
}
smartcane_log(paste("Processing:", tif_date))
result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)
total_created <- total_created + result$created
total_skipped <- total_skipped + result$skipped
total_errors <- total_errors + result$errors
}
smartcane_log(paste("Processing complete: created =", total_created,
", skipped =", total_skipped, ", errors =", total_errors))
return(list(total_created = total_created, total_skipped = total_skipped,
total_errors = total_errors))
}
# ============================================================================
# ==============================================================================
# MAIN EXECUTION
# ==============================================================================
smartcane_log("========================================")
smartcane_log(paste("Script 10: Per-Field TIFF Creation for", PROJECT))
smartcane_log("========================================")
# Load field boundaries using centralized path (no dir.create needed - already created by setup_project_directories)
fields <- load_field_boundaries(paths$field_boundaries_path)
# Define input and output directories (from centralized paths)
merged_tif_dir <- paths$merged_tif_folder
field_tiles_dir <- paths$field_tiles_dir
field_tiles_ci_dir <- paths$field_tiles_ci_dir
# PHASE 1: Process new downloads (always runs)
# Pass field_tiles_ci_dir so it can skip dates already migrated
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir)
smartcane_log("\n========================================")
smartcane_log("FINAL SUMMARY")
smartcane_log("========================================")
smartcane_log(paste("Processing: created =", process_result$total_created,
", skipped =", process_result$total_skipped,
", errors =", process_result$total_errors))
smartcane_log("Script 10 complete")
smartcane_log("========================================\n")