SmartCane/r_app/20_ci_extraction_per_field.R
2026-01-29 21:10:24 +01:00

241 lines
9.2 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# CI_EXTRACTION_PER_FIELD.R
# =========================
# Script 20 (Refactored for Per-Field Architecture)
#
# This script reads per-field TIFFs from Script 10 output and:
# 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR)
# 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif
# 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds
#
# Key differences from legacy Script 20:
# - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10)
# - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI)
# - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics)
# - Directly extracts CI statistics per sub_field within each field
#
# Usage:
# Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset]
# Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(here)
})
# =============================================================================
# Load utility functions from 20_ci_extraction_utils.R
# =============================================================================
source("r_app/20_ci_extraction_utils.R")
# =============================================================================
# Main Processing
# =============================================================================
main <- function() {
# IMPORTANT: Set working directory to project root (smartcane/)
# This ensures here() functions resolve relative to /smartcane, not /smartcane/r_app
if (basename(getwd()) == "r_app") {
setwd("..")
}
# Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date()
offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
# IMPORTANT: Make project_dir available globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
safe_log(sprintf("=== Script 20: CI Extraction Per-Field ==="))
safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days",
project_dir, format(end_date, "%Y-%m-%d"), offset))
# 1. Load parameters (includes field boundaries setup)
# ---------------------------------------------------
tryCatch({
source("r_app/parameters_project.R")
safe_log("Loaded parameters_project.R")
}, error = function(e) {
safe_log(sprintf("Error loading parameters: %s", e$message), "ERROR")
stop(e)
})
# 2. Set up directory paths from parameters FIRST (before using setup$...)
# -----------------------------------------------------------------------
setup <- setup_project_directories(project_dir)
# 3. Load field boundaries directly from field_boundaries_path in setup
# ------------------------------------------------------------------
tryCatch({
field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE)
safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path))
}, error = function(e) {
safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR")
stop(e)
})
# 4. Get list of dates to process
dates <- date_list(end_date, offset)
safe_log(sprintf("Processing dates: %s to %s (%d dates)",
dates$start_date, dates$end_date, length(dates$days_filter)))
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
safe_log(sprintf("Output RDS directory: %s", setup$daily_vals_per_field_dir))
# 5. Process each field
# ----------------------
if (!dir.exists(setup$field_tiles_dir)) {
safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR")
stop("Script 10 output not found. Run Script 10 first.")
}
fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE)
fields <- fields[fields != ""] # Remove empty strings
if (length(fields) == 0) {
safe_log("No fields found in field_tiles directory", "WARNING")
return()
}
safe_log(sprintf("Found %d fields to process", length(fields)))
# 6. Process each field
# ----------------------
total_success <- 0
total_error <- 0
ci_results_by_date <- list()
for (field in fields) {
safe_log(sprintf("\n--- Processing field: %s ---", field))
field_tiles_path <- file.path(field_tiles_dir, field)
field_ci_path <- file.path(field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(setup$daily_vals_per_field_dir, field)
# Create output subdirectories for this field
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
# 5a. Process each date for this field
# -----------------------------------
for (date_str in dates$days_filter) {
input_tif <- file.path(field_tiles_path, sprintf("%s.tif", date_str))
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
# Skip if both outputs already exist
if (file.exists(output_tif) && file.exists(output_rds)) {
safe_log(sprintf(" %s: Already processed (skipping)", date_str))
next
}
# Check if input TIFF exists
if (!file.exists(input_tif)) {
safe_log(sprintf(" %s: Input TIFF not found (skipping)", date_str))
next
}
tryCatch({
# Load 4-band TIFF
raster_4band <- terra::rast(input_tif)
# Calculate CI
ci_raster <- calc_ci_from_raster(raster_4band)
# Create 5-band TIFF (R, G, B, NIR, CI)
five_band <- c(raster_4band, ci_raster)
# Save 5-band TIFF
terra::writeRaster(five_band, output_tif, overwrite = TRUE)
# Extract CI statistics by sub_field
ci_stats <- extract_ci_by_subfield(ci_raster, field_boundaries_sf, field)
# Save RDS
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
saveRDS(ci_stats, output_rds)
safe_log(sprintf(" %s: ✓ Processed (%d sub-fields)", date_str, nrow(ci_stats)))
# Store for daily aggregation
ci_stats_with_date <- ci_stats %>% mutate(date = date_str)
key <- sprintf("%s_%s", field, date_str)
ci_results_by_date[[key]] <- ci_stats_with_date
} else {
safe_log(sprintf(" %s: ⚠ No CI data extracted", date_str))
}
total_success <- total_success + 1
}, error = function(e) {
safe_log(sprintf(" %s: ✗ Error - %s", date_str, e$message), "ERROR")
total_error <<- total_error + 1
})
}
}
# 7. Summary
# ----------
safe_log(sprintf("\n=== Processing Complete ==="))
safe_log(sprintf("Successfully processed: %d", total_success))
safe_log(sprintf("Errors encountered: %d", total_error))
if (total_success > 0) {
safe_log("Output files created in:")
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
safe_log(sprintf(" RDS: %s", setup$daily_vals_per_field_dir))
}
# 8. Aggregate per-field daily RDS files into combined_CI_data.rds
# ----------------------------------------------------------------
# This creates the wide-format (fields × dates) file that Script 30 and
# other downstream scripts expect for backward compatibility
safe_log("\n=== Aggregating Per-Field Daily RDS into combined_CI_data.rds ===")
tryCatch({
# Find all daily RDS files (recursively from daily_vals/{FIELD}/{DATE}.rds)
all_daily_files <- list.files(
path = setup$daily_vals_per_field_dir,
pattern = "\\.rds$",
full.names = TRUE,
recursive = TRUE
)
if (length(all_daily_files) == 0) {
safe_log("No daily RDS files found to aggregate", "WARNING")
} else {
safe_log(sprintf("Aggregating %d daily RDS files into combined_CI_data.rds", length(all_daily_files)))
# Read and combine all daily RDS files
combined_data <- all_daily_files %>%
purrr::map(readRDS) %>%
purrr::list_rbind() %>%
dplyr::group_by(sub_field)
# Create output directory if needed
dir.create(setup$cumulative_CI_vals_dir, showWarnings = FALSE, recursive = TRUE)
# Save combined data
combined_ci_path <- file.path(setup$cumulative_CI_vals_dir, "combined_CI_data.rds")
saveRDS(combined_data, combined_ci_path)
safe_log(sprintf("✓ Created combined_CI_data.rds with %d rows from %d files",
nrow(combined_data), length(all_daily_files)))
safe_log(sprintf(" Location: %s", combined_ci_path))
}
}, error = function(e) {
safe_log(sprintf("⚠ Error aggregating to combined_CI_data.rds: %s", e$message), "WARNING")
safe_log(" This is OK - Script 30 can still use per-field RDS files directly", "WARNING")
})
}
# Execute main if called from command line
if (sys.nframe() == 0) {
main()
}