158 lines
6.5 KiB
R
158 lines
6.5 KiB
R
# ============================================================================
|
|
# SCRIPT 10: Create Per-Field TIFFs (Data Organization & Splitting)
|
|
# ============================================================================
|
|
# PURPOSE:
|
|
# Split full-farm satellite TIFFs into per-field file structure. Supports
|
|
# two phases: legacy data migration and ongoing new downloads. Transforms
|
|
# single large-file architecture into per-field directory structure for
|
|
# clean aggregation in downstream scripts (Script 20, 40, 80/90).
|
|
#
|
|
# INPUT DATA:
|
|
# - Source: laravel_app/storage/app/{project}/merged_tif/ or merged_final_tif/
|
|
# - Format: GeoTIFF (4-band RGB+NIR or 5-band with CI)
|
|
# - Naming: {YYYY-MM-DD}.tif (full farm mosaic)
|
|
#
|
|
# OUTPUT DATA:
|
|
# - Destination: laravel_app/storage/app/{project}/field_tiles/
|
|
# - Format: GeoTIFF (4-band RGB+NIR, same as input)
|
|
# - Structure: field_tiles/{FIELD}/{YYYY-MM-DD}.tif
|
|
# - Naming: Per-field GeoTIFFs organized by field and date
|
|
#
|
|
# USAGE:
|
|
# Rscript 10_create_per_field_tiffs.R [project]
|
|
#
|
|
# Example (Windows PowerShell):
|
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/10_create_per_field_tiffs.R angata
|
|
#
|
|
# PARAMETERS:
|
|
# - project: Project name (character) - angata, chemba, xinavane, esa, simba
|
|
#
|
|
# CLIENT TYPES:
|
|
# - cane_supply (ANGATA): Yes - primary data organization script
|
|
# - agronomic_support (AURA): Yes - supports field-level analysis
|
|
#
|
|
# DEPENDENCIES:
|
|
# - Packages: terra, sf, tidyverse
|
|
# - Utils files: parameters_project.R, 00_common_utils.R, 10_create_per_field_tiffs_utils.R
|
|
# - External data: Field boundaries (pivot.geojson)
|
|
# - Data directories: merged_tif/, field_tiles/ (created if missing)
|
|
#
|
|
# NOTES:
|
|
# - Supports two-phase migration: legacy (merged_final_tif) and ongoing (merged_tif)
|
|
# - Automatically detects and handles field boundaries from pivot.geojson
|
|
# - Geometry validation and repair applied via st_make_valid()
|
|
# - Critical for downstream Scripts 20, 40, and KPI calculations
|
|
# - Creates per-field structure that enables efficient per-field processing
|
|
#
|
|
# RELATED ISSUES:
|
|
# SC-111: Script 10 refactoring and geometry repair
|
|
# SC-112: Utilities restructuring (uses 00_common_utils.R)
|
|
#
|
|
# ============================================================================
|
|
|
|
# Spatial data handling
|
|
suppressPackageStartupMessages({
|
|
|
|
library(terra) # For raster operations (reading/writing GeoTIFFs, cropping to field boundaries)
|
|
library(sf) # For spatial operations (reading field boundaries GeoJSON, masking)
|
|
library(here) # For relative path resolution
|
|
})
|
|
# ==============================================================================
|
|
# MAIN PROCESSING FUNCTION
|
|
# ==============================================================================
|
|
|
|
main <- function() {
|
|
# STEP 1: Set working directory to project root (smartcane/)
|
|
# This ensures all relative paths resolve correctly
|
|
if (basename(getwd()) == "r_app") {
|
|
setwd("..")
|
|
}
|
|
|
|
# STEP 2: Parse command-line arguments FIRST (needed by parameters_project.R)
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
project_dir <- if (length(args) == 0) "angata" else args[1]
|
|
|
|
# Make project_dir available to sourced files (they execute in global scope)
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
|
|
# STEP 3: SOURCE ALL UTILITY SCRIPTS (now that project_dir is defined)
|
|
# Load parameters_project.R (provides safe_log, setup_project_directories, etc.)
|
|
tryCatch({
|
|
source("r_app/parameters_project.R")
|
|
}, error = function(e) {
|
|
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
|
stop(e)
|
|
})
|
|
|
|
# Load Script 10-specific utilities
|
|
tryCatch({
|
|
source("r_app/10_create_per_field_tiffs_utils.R")
|
|
}, error = function(e) {
|
|
cat(sprintf("Error loading 10_create_per_field_tiffs_utils.R: %s\n", e$message))
|
|
stop(e)
|
|
})
|
|
|
|
# STEP 4: Set default date parameters (can be overridden by pipeline runner via assign())
|
|
# These control which dates Script 10 processes from merged_tif/
|
|
# Window: end_date - offset days to end_date
|
|
# Always coerce to correct types to avoid issues with lingering/inherited values
|
|
if (!exists("end_date") || !inherits(end_date, "Date")) {
|
|
end_date <- as.Date("2026-02-04")
|
|
safe_log(paste("Using default end_date:", end_date), "INFO")
|
|
}
|
|
if (!exists("offset") || !is.numeric(offset)) {
|
|
offset <- 7
|
|
safe_log(paste("Using default offset:", offset, "days"), "INFO")
|
|
}
|
|
|
|
# Ensure offset is numeric (in case it came in as a character string from environment)
|
|
if (is.character(offset)) {
|
|
offset <- as.numeric(offset)
|
|
}
|
|
|
|
# Calculate date window for processing
|
|
start_date <- end_date - offset
|
|
date_window <- seq(start_date, end_date, by = "day")
|
|
date_window_str <- format(date_window, "%Y-%m-%d")
|
|
safe_log(paste("Processing dates from", start_date, "to", end_date, sprintf("(%d dates)", length(date_window_str))), "INFO")
|
|
|
|
# STEP 5: Load centralized path structure (creates all directories automatically)
|
|
paths <- setup_project_directories(project_dir)
|
|
|
|
safe_log(paste("Project:", project_dir))
|
|
safe_log(paste("Base path:", paths$laravel_storage_dir))
|
|
safe_log(paste("Data dir:", paths$data_dir))
|
|
|
|
# Load field boundaries using data_dir (not field_boundaries_path)
|
|
# load_field_boundaries() expects a directory and builds the file path internally
|
|
fields_data <- load_field_boundaries(paths$data_dir)
|
|
fields <- fields_data$field_boundaries_sf
|
|
|
|
# Define input and output directories (from centralized paths)
|
|
merged_tif_dir <- paths$merged_tif_folder
|
|
field_tiles_dir <- paths$field_tiles_dir
|
|
field_tiles_ci_dir <- paths$field_tiles_ci_dir
|
|
|
|
# PHASE 1: Process new downloads (always runs)
|
|
# Pass field_tiles_ci_dir so it can skip dates already migrated
|
|
# Also pass end_date and offset so only dates in window are processed
|
|
process_result <- process_new_merged_tif(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir,
|
|
end_date = end_date, offset = offset)
|
|
|
|
safe_log("\n========================================", "INFO")
|
|
safe_log("FINAL SUMMARY", "INFO")
|
|
safe_log("========================================", "INFO")
|
|
safe_log(paste("Processing: created =", process_result$total_created,
|
|
", skipped =", process_result$total_skipped,
|
|
", errors =", process_result$total_errors), "INFO")
|
|
safe_log("Script 10 complete", "INFO")
|
|
safe_log("========================================\n", "INFO")
|
|
|
|
quit(status = 0)
|
|
}
|
|
|
|
# Execute main if called from command line
|
|
if (sys.nframe() == 0) {
|
|
main()
|
|
}
|