SmartCane/r_app/20_ci_extraction.R

203 lines
7.3 KiB
R

# CI_EXTRACTION.R
# ==============
# This script processes satellite imagery to extract Canopy Index (CI) values for agricultural fields.
# It handles image processing, masking, and extraction of statistics by field/sub-field.
# Supports both 4-band and 8-band PlanetScope data with automatic band detection and cloud masking.
#
# Usage: Rscript 02_ci_extraction.R [end_date] [offset] [project_dir] [data_source]
# - end_date: End date for processing (YYYY-MM-DD format)
# - offset: Number of days to look back from end_date
# - project_dir: Project directory name (e.g., "angata", "aura", "chemba")
# - data_source: Data source directory - "merged_tif_8b" (default) or "merged_tif" (4-band) or "merged_final_tif"
# If tiles exist (daily_tiles_split/), they are used automatically
#
# Examples:
# # Angata 8-band data (with UDM cloud masking)
# & 'C:\Program Files\R\R-4.4.3\bin\x64\Rscript' r_app/02_ci_extraction.R 2026-01-02 7 angata merged_tif_8b
#
# # Aura 4-band data
# Rscript 02_ci_extraction.R 2025-11-26 7 aura merged_tif
#
# # Auto-detects and uses tiles if available:
# Rscript 02_ci_extraction.R 2026-01-02 7 angata (uses tiles if daily_tiles_split/ exists)
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(readxl)
library(here)
library(furrr)
library(future)
})
# 2. Process command line arguments
# ------------------------------
main <- function() {
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Process end_date argument
if (length(args) >= 1 && !is.na(args[1])) {
end_date <- as.Date(args[1])
if (is.na(end_date)) {
warning("Invalid end_date provided. Using default (current date).")
end_date <- Sys.Date()
#end_date <- "2023-10-01"
}
} else {
end_date <- Sys.Date()
#end_date <- "2023-10-01"
}
# Process offset argument
if (length(args) >= 2 && !is.na(args[2])) {
offset <- as.numeric(args[2])
if (is.na(offset) || offset <= 0) {
warning("Invalid offset provided. Using default (7 days).")
offset <- 7
}
} else {
offset <- 7
}
# Process project_dir argument
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else if (exists("project_dir", envir = .GlobalEnv)) {
project_dir <- get("project_dir", envir = .GlobalEnv)
} else {
project_dir <- "angata" # Changed default from "aura" to "esa"
}
# Process data_source argument (optional, for specifying merged_tif_8b vs merged_tif vs merged_final_tif)
if (length(args) >= 4 && !is.na(args[4])) {
data_source <- as.character(args[4])
# Validate data_source is a recognized option
if (!data_source %in% c("merged_tif_8b", "merged_tif", "merged_final_tif")) {
warning(paste("Data source", data_source, "not in standard list. Using as-is."))
}
} else if (exists("data_source", envir = .GlobalEnv)) {
data_source <- get("data_source", envir = .GlobalEnv)
} else {
data_source <- "merged_tif_8b" # Default to 8-band (newer data with cloud masking)
}
# Make project_dir and data_source available globally
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("data_source", data_source, envir = .GlobalEnv)
cat(sprintf("CI Extraction: project=%s, end_date=%s, offset=%d days, data_source=%s\n",
project_dir, format(end_date, "%Y-%m-%d"), offset, data_source))
# Set flag to use pivot_2.geojson for ESA (extra fields for yield prediction)
ci_extraction_script <- TRUE
assign("ci_extraction_script", ci_extraction_script, envir = .GlobalEnv)
# 3. Initialize project configuration
# --------------------------------
new_project_question <- TRUE
cat("[DEBUG] Attempting to source r_app/parameters_project.R\n")
tryCatch({
source("r_app/parameters_project.R")
cat("[DEBUG] Successfully sourced r_app/parameters_project.R\n")
}, error = function(e) {
cat("[ERROR] Failed to source r_app/parameters_project.R:\n", e$message, "\n")
stop(e)
})
cat("[DEBUG] Attempting to source r_app/20_ci_extraction_utils.R\n")
tryCatch({
source("r_app/20_ci_extraction_utils.R")
cat("[DEBUG] Successfully sourced r_app/20_ci_extraction_utils.R\n")
}, error = function(e) {
cat("[ERROR] Failed to source r_app/20_ci_extraction_utils.R:\n", e$message, "\n")
stop(e)
})
# 4. Generate date list for processing
# ---------------------------------
dates <- date_list(end_date, 7)
log_message(paste("Processing data for week", dates$week, "of", dates$year))
# 5. Find and filter raster files by date - with grid size detection
# -----------------------------------
log_message("Searching for raster files")
# Check if tiles exist (Script 01 output) - detect grid size dynamically
tiles_split_base <- file.path("laravel_app", "storage", "app", project_dir, "daily_tiles_split")
# Detect grid size from daily_tiles_split folder structure
# Expected structure: daily_tiles_split/5x5/ or daily_tiles_split/10x10/ etc.
grid_size <- NA
if (dir.exists(tiles_split_base)) {
subfolders <- list.dirs(tiles_split_base, full.names = FALSE, recursive = FALSE)
# Look for grid size patterns like "5x5", "10x10", "20x20"
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
grid_size <- grid_patterns[1] # Use first grid size found
log_message(paste("Detected grid size:", grid_size))
}
}
# Construct tile folder path with grid size
if (!is.na(grid_size)) {
tile_folder <- file.path(tiles_split_base, grid_size)
} else {
tile_folder <- tiles_split_base
}
use_tiles <- dir.exists(tile_folder)
# Make grid_size available globally for other functions
assign("grid_size", grid_size, envir = .GlobalEnv)
tryCatch({
if (use_tiles) {
# Use tile-based processing
log_message(paste("Tile folder detected at", tile_folder))
log_message("Using tile-based CI extraction")
# Call the tile-based extraction function
process_ci_values_from_tiles(
dates = dates,
tile_folder = tile_folder,
field_boundaries = field_boundaries,
field_boundaries_sf = field_boundaries_sf,
daily_CI_vals_dir = daily_CI_vals_dir,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
merged_final_dir = merged_final,
grid_size = grid_size
)
} else {
# Use legacy full-extent processing
log_message("No tiles found. Using legacy full-extent approach")
# Use the existing utility function to find satellite images
existing_files <- find_satellite_images(planet_tif_folder, dates$days_filter)
log_message(paste("Found", length(existing_files), "raster files for processing"))
# Process raster files and create VRT
vrt_list <- process_satellite_images(existing_files, field_boundaries, merged_final, daily_vrt)
# Process and combine CI values
process_ci_values(dates, field_boundaries, merged_final,
field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir)
}
}, error = function(e) {
log_message(paste("Error in main processing:", e$message), level = "ERROR")
stop(e$message)
})
}
if (sys.nframe() == 0) {
main()
}