SmartCane/r_app/20_ci_extraction_per_field.R
Timon 4cd62ab82e Enhance report utility functions and add validation scripts
- Updated `create_CI_map` and `create_CI_diff_map` functions to enforce a 1:1 aspect ratio for consistent map sizing.
- Modified `ci_plot` function to adjust widths of arranged maps for better layout.
- Changed raster merging method in `aggregate_per_field_mosaics_to_farm_level` from `mosaic` to `merge` for improved handling of field data.
- Introduced `test_kpi_validation.R` script to validate the structure of KPI RDS files, ensuring expected KPIs are present.
- Added `test_overview_maps_aggregation.R` script to test the aggregation pipeline for overview maps, including loading field mosaics, creating a farm-level mosaic, and generating visualizations.
2026-02-11 14:32:36 +01:00

241 lines
9.4 KiB
R

# CI_EXTRACTION_PER_FIELD.R
# =========================
# Script 20 (Refactored for Per-Field Architecture)
#
# This script reads per-field TIFFs from Script 10 output and:
# 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR)
# 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif
# 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds
#
# Key differences from legacy Script 20:
# - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10)
# - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI)
# - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics)
# - Directly extracts CI statistics per sub_field within each field
#
# Usage:
# Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset]
# Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(here)
})
# =============================================================================
# Main Processing
# =============================================================================
main <- function() {
# STEP 1: Set working directory to project root (smartcane/)
# This ensures all relative paths resolve correctly
if (basename(getwd()) == "r_app") {
setwd("..")
}
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
# Parse command-line arguments FIRST
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date()
offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
# Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
# Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.)
tryCatch({
source("r_app/parameters_project.R")
}, error = function(e) {
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
stop(e)
})
# Load CI extraction utilities
tryCatch({
source("r_app/20_ci_extraction_utils.R")
}, error = function(e) {
cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message))
stop(e)
})
# STEP 3: Now all utilities are loaded, proceed with script logic
safe_log(sprintf("=== Script 20: CI Extraction Per-Field ==="))
safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days",
project_dir, format(end_date, "%Y-%m-%d"), offset))
# Set up directory paths from parameters
setup <- setup_project_directories(project_dir)
# Load field boundaries directly from field_boundaries_path in setup
tryCatch({
field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE)
safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path))
}, error = function(e) {
safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR")
stop(e)
})
# Get list of dates to process
# If in migration mode, dates_to_process is provided by the pipeline runner
if (exists("dates_to_process") && !is.null(dates_to_process)) {
# Migration mode: Use provided list of dates (process ALL available dates)
dates_filter <- sort(dates_to_process)
safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter)))
} else {
# Normal mode: Use 7-day offset window
dates <- date_list(end_date, offset)
dates_filter <- dates$days_filter
safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)",
dates$start_date, dates$end_date, length(dates_filter)))
}
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir))
# Process each field
if (!dir.exists(setup$field_tiles_dir)) {
safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR")
stop("Script 10 output not found. Run Script 10 first.")
}
fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE)
fields <- fields[fields != ""] # Remove empty strings
if (length(fields) == 0) {
safe_log("No fields found in field_tiles directory", "WARNING")
return()
}
safe_log(sprintf("Found %d fields to process", length(fields)))
# DEBUG: Check what paths are available in setup
safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", ")))
safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir))
safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir))
# Use daily_ci_vals_dir for per-field daily CI output
# Pre-create output subdirectories for all fields
for (field in fields) {
dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE)
if (!is.null(setup$daily_ci_vals_dir)) {
dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE)
}
}
# Process each DATE (load merged TIFF once, extract all fields from it)
total_success <- 0
total_error <- 0
for (date_str in dates_filter) {
# Load the MERGED TIFF (farm-wide) ONCE for this date
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
if (!file.exists(input_tif_merged)) {
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
total_error <<- total_error + 1
next
}
tryCatch({
# Load 4-band TIFF ONCE
raster_4band <- terra::rast(input_tif_merged)
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
# Calculate CI from 4-band
ci_raster <- calc_ci_from_raster(raster_4band)
# Create 5-band (R, G, B, NIR, CI)
# Explicitly set band names after combining to ensure proper naming
five_band <- c(raster_4band, ci_raster)
names(five_band) <- c("Red", "Green", "Blue", "NIR", "CI")
# Now process all fields from this single merged TIFF
fields_processed_this_date <- 0
for (field in fields) {
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
# Pre-create output directories
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
# MODE 3: Skip if both outputs already exist
if (file.exists(output_tif) && file.exists(output_rds)) {
next
}
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
if (file.exists(output_tif) && !file.exists(output_rds)) {
tryCatch({
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
fields_processed_this_date <- fields_processed_this_date + 1
}, error = function(e) {
# Continue to next field
})
next
}
# MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
tryCatch({
# Crop 5-band TIFF to field boundary
field_geom <- field_boundaries_sf %>% filter(field == !!field)
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
# Save 5-band field TIFF
terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
# Extract CI statistics by sub_field (from cropped CI raster)
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
# Save RDS
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
saveRDS(ci_stats, output_rds)
}
fields_processed_this_date <- fields_processed_this_date + 1
}, error = function(e) {
# Error in individual field, continue to next
safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING")
})
}
# Increment success counter if at least one field succeeded
if (fields_processed_this_date > 0) {
total_success <<- total_success + 1
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
}
}, error = function(e) {
total_error <<- total_error + 1
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
})
}
# Summary
safe_log(sprintf("\n=== Processing Complete ==="))
safe_log(sprintf("Successfully processed: %d", total_success))
safe_log(sprintf("Errors encountered: %d", total_error))
if (total_success > 0) {
safe_log("Output files created in:")
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))
}
}
# Execute main if called from command line
if (sys.nframe() == 0) {
main()
}