SmartCane/r_app/10_create_per_field_tiffs.R

158 lines
6.5 KiB
R

# ============================================================================
# SCRIPT 10: Create Per-Field TIFFs (Data Organization & Splitting)
# ============================================================================
# PURPOSE:
# Split full-farm satellite TIFFs into per-field file structure. Supports
# two phases: legacy data migration and ongoing new downloads. Transforms
# single large-file architecture into per-field directory structure for
# clean aggregation in downstream scripts (Script 20, 40, 80/90).
#
# INPUT DATA:
# - Source: laravel_app/storage/app/{project}/merged_tif/ or merged_final_tif/
# - Format: GeoTIFF (4-band RGB+NIR or 5-band with CI)
# - Naming: {YYYY-MM-DD}.tif (full farm mosaic)
#
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/field_tiles/
# - Format: GeoTIFF (4-band RGB+NIR, same as input)
# - Structure: field_tiles/{FIELD}/{YYYY-MM-DD}.tif
# - Naming: Per-field GeoTIFFs organized by field and date
#
# USAGE:
# Rscript 10_create_per_field_tiffs.R [project]
#
# Example (Windows PowerShell):
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
#
# PARAMETERS:
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
#
# CLIENT TYPES:
# - cane_supply (ANGATA): Yes - primary data organization script
# - agronomic_support (AURA): Yes - supports field-level analysis
#
# DEPENDENCIES:
# - Packages: terra, sf, tidyverse
# - Utils files: parameters_project.R, 00_common_utils.R, 10_create_per_field_tiffs_utils.R
# - External data: Field boundaries (pivot.geojson)
# - Data directories: merged_tif/, field_tiles/ (created if missing)
#
# NOTES:
# - Supports two-phase migration: legacy (merged_final_tif) and ongoing (merged_tif)
# - Automatically detects and handles field boundaries from pivot.geojson
# - Geometry validation and repair applied via st_make_valid()
# - Critical for downstream Scripts 20, 40, and KPI calculations
# - Creates per-field structure that enables efficient per-field processing
#
# RELATED ISSUES:
# SC-111: Script 10 refactoring and geometry repair
# SC-112: Utilities restructuring (uses 00_common_utils.R)
#
# ============================================================================
# Spatial data handling
suppressPackageStartupMessages({
library(terra) # For raster operations (reading/writing GeoTIFFs, cropping to field boundaries)
library(sf) # For spatial operations (reading field boundaries GeoJSON, masking)
library(here) # For relative path resolution
})
# ==============================================================================
# MAIN PROCESSING FUNCTION
# ==============================================================================
main <- function() {
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) == 0) "angata" else args[1]
# Make project_dir available to sourced files (they execute in global scope)
assign("project_dir", project_dir, envir = .GlobalEnv)
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
}, error = function(e) {
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
# Load Script 10-specific utilities
tryCatch({
source("r_app/10_create_per_field_tiffs_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 10_create_per_field_tiffs_utils.R: %s\n", e$message))
stop(e)
})
# STEP 4: Set default date parameters (can be overridden by pipeline runner via assign())
# These control which dates Script 10 processes from merged_tif/
# Window: end_date - offset days to end_date
# Always coerce to correct types to avoid issues with lingering/inherited values
if (!exists("end_date") || !inherits(end_date, "Date")) {
end_date <- as.Date("2026-02-04")
safe_log(paste("Using default end_date:", end_date), "INFO")
}
if (!exists("offset") || !is.numeric(offset)) {
offset <- 7
safe_log(paste("Using default offset:", offset, "days"), "INFO")
}
# Ensure offset is numeric (in case it came in as a character string from environment)
if (is.character(offset)) {
offset <- as.numeric(offset)
}
# Calculate date window for processing
start_date <- end_date - offset
date_window <- seq(start_date, end_date, by = "day")
date_window_str <- format(date_window, "%Y-%m-%d")
safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO")
# STEP 5: Load centralized path structure (creates all directories automatically)
paths <- setup_project_directories(project_dir)
safe_log(paste("Project:", project_dir))
safe_log(paste("Base path:", paths$laravel_storage_dir))
safe_log(paste("Data dir:", paths$data_dir))
# Load field boundaries using data_dir (not field_boundaries_path)
# load_field_boundaries() expects a directory and builds the file path internally
fields_data <- load_field_boundaries(paths$data_dir)
fields <- fields_data$field_boundaries_sf
# Define input and output directories (from centralized paths)
merged_tif_dir <- paths$merged_tif_folder
field_tiles_dir <- paths$field_tiles_dir
field_tiles_ci_dir <- paths$field_tiles_ci_dir
# PHASE 1: Process new downloads (always runs)
# Pass field_tiles_ci_dir so it can skip dates already migrated
# Also pass end_date and offset so only dates in window are processed
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir,
end_date = end_date, offset = offset)
safe_log("\n========================================", "INFO")
safe_log("FINAL SUMMARY", "INFO")
safe_log("========================================", "INFO")
safe_log(paste("Processing: created =", process_result$total_created,
", skipped =", process_result$total_skipped,
", errors =", process_result$total_errors), "INFO")
safe_log("Script 10 complete", "INFO")
safe_log("========================================\n", "INFO")
quit(status = 0)
}
# Execute main if called from command line
if (sys.nframe() == 0) {
main()
}