- Implemented `weather_api_comparison.py` to compare daily precipitation from multiple weather APIs for Arnhem, Netherlands and Angata, Kenya. - Integrated fetching functions for various weather data sources including Open-Meteo, NASA POWER, OpenWeatherMap, and WeatherAPI.com. - Added plotting functions to visualize archive and forecast data, including cumulative precipitation and comparison against ERA5 reference. - Created `90_rainfall_utils.R` for R to fetch rainfall data and overlay it on CI plots, supporting multiple providers with a generic fetch wrapper. - Included spatial helpers for efficient API calls based on unique geographical tiles.
263 lines
10 KiB
R
263 lines
10 KiB
R
# CI_EXTRACTION_PER_FIELD.R
|
|
# =========================
|
|
# Script 20 (Refactored for Per-Field Architecture)
|
|
#
|
|
# This script reads per-field TIFFs from Script 10 output and:
|
|
# 1. Calculates Canopy Index (CI) from 4-band imagery (RGB + NIR)
|
|
# 2. Outputs 5-band TIFFs with CI as the 5th band to field_tiles_CI/{FIELD}/{DATE}.tif
|
|
# 3. Outputs per-field per-date RDS files to daily_vals/{FIELD}/{DATE}.rds
|
|
#
|
|
# Key differences from legacy Script 20:
|
|
# - Input: field_tiles/{FIELD}/{DATE}.tif (4-band, from Script 10)
|
|
# - Output: field_tiles_CI/{FIELD}/{DATE}.tif (5-band with CI)
|
|
# - Output: daily_vals/{FIELD}/{DATE}.rds (per-field CI statistics)
|
|
# - Directly extracts CI statistics per sub_field within each field
|
|
#
|
|
# Usage:
|
|
# Rscript 20_ci_extraction_per_field.R [project_dir] [end_date] [offset]
|
|
# Example: Rscript 20_ci_extraction_per_field.R angata 2026-01-02 7
|
|
|
|
suppressPackageStartupMessages({
|
|
library(sf)
|
|
library(terra)
|
|
library(tidyverse)
|
|
library(lubridate)
|
|
library(here)
|
|
})
|
|
|
|
# =============================================================================
|
|
# Main Processing
|
|
# =============================================================================
|
|
|
|
main <- function() {
|
|
# STEP 1: Set working directory to project root (smartcane/)
|
|
# This ensures all relative paths resolve correctly
|
|
if (basename(getwd()) == "r_app") {
|
|
setwd("..")
|
|
}
|
|
|
|
# STEP 2: SOURCE ALL UTILITY SCRIPTS (before any operations)
|
|
# Parse command-line arguments FIRST
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
project_dir <- if (length(args) >= 1 && args[1] != "") args[1] else "angata"
|
|
end_date <- if (length(args) >= 2 && args[2] != "") as.Date(args[2]) else Sys.Date()
|
|
offset <- if (length(args) >= 3 && !is.na(as.numeric(args[3]))) as.numeric(args[3]) else 7
|
|
|
|
# Make project_dir available globally for parameters_project.R
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
|
|
# Load parameters_project.R (provides safe_log, date_list, setup_project_directories, etc.)
|
|
tryCatch({
|
|
source("r_app/parameters_project.R")
|
|
}, error = function(e) {
|
|
cat(sprintf("Error loading parameters_project.R: %s\n", e$message))
|
|
stop(e)
|
|
})
|
|
|
|
# Load CI extraction utilities
|
|
tryCatch({
|
|
source("r_app/20_ci_extraction_utils.R")
|
|
}, error = function(e) {
|
|
cat(sprintf("Error loading 20_ci_extraction_utils.R: %s\n", e$message))
|
|
stop(e)
|
|
})
|
|
|
|
# STEP 3: Now all utilities are loaded, proceed with script logic
|
|
safe_log(sprintf("=== Script 20: CI Extraction Per-Field ==="))
|
|
safe_log(sprintf("Project: %s | End Date: %s | Offset: %d days",
|
|
project_dir, format(end_date, "%Y-%m-%d"), offset))
|
|
|
|
# Set up directory paths from parameters
|
|
setup <- setup_project_directories(project_dir)
|
|
|
|
# Load field boundaries directly from field_boundaries_path in setup
|
|
tryCatch({
|
|
field_boundaries_sf <- st_read(setup$field_boundaries_path, quiet = TRUE)
|
|
safe_log(sprintf("Loaded %d field/sub_field polygons from %s", nrow(field_boundaries_sf), setup$field_boundaries_path))
|
|
}, error = function(e) {
|
|
safe_log(sprintf("Error loading field boundaries from %s: %s", setup$field_boundaries_path, e$message), "ERROR")
|
|
stop(e)
|
|
})
|
|
|
|
# Get list of dates to process
|
|
# If in migration mode, dates_to_process is provided by the pipeline runner
|
|
if (exists("dates_to_process") && !is.null(dates_to_process)) {
|
|
# Migration mode: Use provided list of dates (process ALL available dates)
|
|
dates_filter <- sort(dates_to_process)
|
|
safe_log(sprintf("Migration mode: Processing %d specified dates", length(dates_filter)))
|
|
} else {
|
|
# Normal mode: Use 7-day offset window
|
|
dates <- date_list(end_date, offset)
|
|
dates_filter <- dates$days_filter
|
|
safe_log(sprintf("Normal mode: Processing dates: %s to %s (%d dates)",
|
|
dates$start_date, dates$end_date, length(dates_filter)))
|
|
}
|
|
|
|
safe_log(sprintf("Input directory: %s", setup$field_tiles_dir))
|
|
safe_log(sprintf("Output TIF directory: %s", setup$field_tiles_ci_dir))
|
|
safe_log(sprintf("Output RDS directory: %s", setup$daily_ci_vals_dir))
|
|
|
|
# Process each field
|
|
if (!dir.exists(setup$field_tiles_dir)) {
|
|
safe_log(sprintf("Field tiles directory not found: %s", setup$field_tiles_dir), "ERROR")
|
|
stop("Script 10 output not found. Run Script 10 first.")
|
|
}
|
|
|
|
fields <- list.dirs(setup$field_tiles_dir, full.names = FALSE, recursive = FALSE)
|
|
fields <- fields[fields != ""] # Remove empty strings
|
|
|
|
if (length(fields) == 0) {
|
|
safe_log("No fields found in field_tiles directory", "WARNING")
|
|
return()
|
|
}
|
|
|
|
safe_log(sprintf("Found %d fields to process", length(fields)))
|
|
|
|
# DEBUG: Check what paths are available in setup
|
|
safe_log(sprintf("[DEBUG] Available setup paths: %s", paste(names(setup), collapse=", ")))
|
|
safe_log(sprintf("[DEBUG] field_tiles_ci_dir: %s", setup$field_tiles_ci_dir))
|
|
safe_log(sprintf("[DEBUG] daily_ci_vals_dir: %s", setup$daily_ci_vals_dir))
|
|
|
|
# Use daily_ci_vals_dir for per-field daily CI output
|
|
# Pre-create output subdirectories for all fields
|
|
for (field in fields) {
|
|
dir.create(file.path(setup$field_tiles_ci_dir, field), showWarnings = FALSE, recursive = TRUE)
|
|
if (!is.null(setup$daily_ci_vals_dir)) {
|
|
dir.create(file.path(setup$daily_ci_vals_dir, field), showWarnings = FALSE, recursive = TRUE)
|
|
}
|
|
}
|
|
|
|
# Process each DATE (load merged TIFF once, extract all fields from it)
|
|
total_processed_dates <- 0
|
|
total_skipped_dates <- 0
|
|
total_already_complete_dates <- 0
|
|
total_error <- 0
|
|
|
|
for (date_str in dates_filter) {
|
|
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
|
|
output_tifs <- file.path(setup$field_tiles_ci_dir, fields, sprintf("%s.tif", date_str))
|
|
output_rds <- file.path(setup$daily_ci_vals_dir, fields, sprintf("%s.rds", date_str))
|
|
names(output_tifs) <- fields
|
|
names(output_rds) <- fields
|
|
tif_exists <- file.exists(output_tifs)
|
|
rds_exists <- file.exists(output_rds)
|
|
fields_need_rds_only <- fields[tif_exists & !rds_exists]
|
|
fields_need_raster <- fields[!tif_exists]
|
|
|
|
if (length(fields_need_rds_only) == 0 && length(fields_need_raster) == 0) {
|
|
total_already_complete_dates <- total_already_complete_dates + 1
|
|
safe_log(sprintf(" %s: All field outputs already exist (skipping)", date_str))
|
|
next
|
|
}
|
|
|
|
fields_processed_this_date <- 0
|
|
rds_only_processed <- 0
|
|
raster_processed_this_date <- 0
|
|
|
|
if (length(fields_need_rds_only) > 0) {
|
|
for (field in fields_need_rds_only) {
|
|
tryCatch({
|
|
extract_rds_from_ci_tiff(output_tifs[[field]], output_rds[[field]], field_boundaries_sf, field)
|
|
fields_processed_this_date <- fields_processed_this_date + 1
|
|
rds_only_processed <- rds_only_processed + 1
|
|
}, error = function(e) {
|
|
safe_log(sprintf(" Error regenerating RDS for field %s: %s", field, e$message), "WARNING")
|
|
})
|
|
}
|
|
}
|
|
|
|
if (length(fields_need_raster) == 0) {
|
|
total_processed_dates <- total_processed_dates + 1
|
|
safe_log(sprintf(" %s: Regenerated %d RDS files from existing CI TIFFs", date_str, rds_only_processed))
|
|
next
|
|
}
|
|
|
|
if (!file.exists(input_tif_merged)) {
|
|
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
|
|
total_skipped_dates <- total_skipped_dates + 1
|
|
next
|
|
}
|
|
|
|
tryCatch({
|
|
# Load the merged TIFF only when at least one field still needs a CI TIFF.
|
|
raster_4band <- terra::rast(input_tif_merged)
|
|
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields_need_raster)))
|
|
|
|
# Calculate CI from 4-band
|
|
ci_raster <- calc_ci_from_raster(raster_4band)
|
|
|
|
# Create 5-band (R, G, B, NIR, CI)
|
|
# Explicitly set band names after combining to ensure proper naming
|
|
five_band <- c(raster_4band, ci_raster)
|
|
names(five_band) <- c("Red", "Green", "Blue", "NIR", "CI")
|
|
|
|
# Now process only the fields that still need CI TIFF output for this date.
|
|
for (field in fields_need_raster) {
|
|
output_tif_path <- output_tifs[[field]]
|
|
output_rds_path <- output_rds[[field]]
|
|
|
|
tryCatch({
|
|
# Crop 5-band TIFF to field boundary
|
|
field_geom <- field_boundaries_sf %>% filter(field == !!field)
|
|
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
|
|
|
|
# Save 5-band field TIFF
|
|
terra::writeRaster(five_band_cropped, output_tif_path, overwrite = TRUE)
|
|
|
|
# Extract CI statistics by sub_field (from cropped CI raster)
|
|
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
|
|
ci_stats <- extract_ci_by_subfield(ci_cropped, field_boundaries_sf, field)
|
|
|
|
# Save RDS
|
|
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
|
saveRDS(ci_stats, output_rds_path)
|
|
}
|
|
|
|
fields_processed_this_date <- fields_processed_this_date + 1
|
|
raster_processed_this_date <- raster_processed_this_date + 1
|
|
|
|
}, error = function(e) {
|
|
# Error in individual field, continue to next
|
|
safe_log(sprintf(" Error processing field %s: %s", field, e$message), "WARNING")
|
|
})
|
|
}
|
|
|
|
if (fields_processed_this_date > 0) {
|
|
total_processed_dates <- total_processed_dates + 1
|
|
safe_log(sprintf(
|
|
" %s: Processed %d fields (%d CI TIFFs created, %d RDS-only regenerated)",
|
|
date_str,
|
|
fields_processed_this_date,
|
|
raster_processed_this_date,
|
|
rds_only_processed
|
|
))
|
|
} else {
|
|
total_error <- total_error + 1
|
|
safe_log(sprintf(" %s: No field outputs were created; see warnings above", date_str), "ERROR")
|
|
}
|
|
|
|
}, error = function(e) {
|
|
total_error <- total_error + 1
|
|
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
|
|
})
|
|
}
|
|
|
|
# Summary
|
|
safe_log(sprintf("\n=== Processing Complete ==="))
|
|
safe_log(sprintf("Dates processed: %d", total_processed_dates))
|
|
safe_log(sprintf("Dates skipped (missing merged_tif): %d", total_skipped_dates))
|
|
safe_log(sprintf("Dates already complete: %d", total_already_complete_dates))
|
|
safe_log(sprintf("Errors encountered: %d", total_error))
|
|
|
|
if (total_processed_dates > 0) {
|
|
safe_log("Output files created in:")
|
|
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
|
|
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))
|
|
}
|
|
}
|
|
|
|
# Execute main if called from command line
|
|
if (sys.nframe() == 0) {
|
|
main()
|
|
}
|