- Replace raster package with terra throughout the codebase - Update map visualizations with better layout and legends - Add descriptive headers to report sections - Improve map legend positioning and sizing - Enhance error handling for missing data - Remove redundant legends in field-specific visualizations - Optimize figure dimensions to prevent page overflow - Expand documentation of CI index and report components - Update package dependencies in packages.
427 lines
14 KiB
R
427 lines
14 KiB
R
# CI_EXTRACTION_UTILS.R
|
|
# =====================
|
|
# Utility functions for the SmartCane CI (Chlorophill Index) extraction workflow.
|
|
# These functions support date handling, raster processing, and data extraction.
|
|
|
|
#' Safe logging function that works whether log_message exists or not
|
|
#'
|
|
#' @param message The message to log
|
|
#' @param level The log level (default: "INFO")
|
|
#' @return NULL (used for side effects)
|
|
#'
|
|
safe_log <- function(message, level = "INFO") {
|
|
if (exists("log_message")) {
|
|
log_message(message, level)
|
|
} else {
|
|
if (level %in% c("ERROR", "WARNING")) {
|
|
warning(message)
|
|
} else {
|
|
message(message)
|
|
}
|
|
}
|
|
}
|
|
|
|
#' Generate a sequence of dates for processing
|
|
#'
|
|
#' @param end_date The end date for the sequence (Date object)
|
|
#' @param offset Number of days to look back from end_date
|
|
#' @return A list containing week number, year, and a sequence of dates for filtering
|
|
#'
|
|
date_list <- function(end_date, offset) {
|
|
# Input validation
|
|
if (!lubridate::is.Date(end_date)) {
|
|
end_date <- as.Date(end_date)
|
|
if (is.na(end_date)) {
|
|
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
|
}
|
|
}
|
|
|
|
offset <- as.numeric(offset)
|
|
if (is.na(offset) || offset < 1) {
|
|
stop("Invalid offset provided. Expected a positive number.")
|
|
}
|
|
|
|
# Calculate date range
|
|
offset <- offset - 1 # Adjust offset to include end_date
|
|
start_date <- end_date - lubridate::days(offset)
|
|
|
|
# Extract week and year information
|
|
week <- lubridate::week(start_date)
|
|
year <- lubridate::year(start_date)
|
|
|
|
# Generate sequence of dates
|
|
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
|
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
|
|
|
# Log the date range
|
|
safe_log(paste("Date range generated from", start_date, "to", end_date))
|
|
|
|
return(list(
|
|
"week" = week,
|
|
"year" = year,
|
|
"days_filter" = days_filter,
|
|
"start_date" = start_date,
|
|
"end_date" = end_date
|
|
))
|
|
}
|
|
|
|
#' Create a Chlorophill Index (CI) mask from satellite imagery and crop to field boundaries
|
|
#'
|
|
#' @param file Path to the satellite image file
|
|
#' @param field_boundaries Field boundaries vector object
|
|
#' @param merged_final_dir Directory to save the processed raster
|
|
#' @return Processed raster object with CI band
|
|
#'
|
|
create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
|
|
# Validate inputs
|
|
if (!file.exists(file)) {
|
|
stop(paste("File not found:", file))
|
|
}
|
|
|
|
if (is.null(field_boundaries)) {
|
|
stop("Field boundaries are required but were not provided")
|
|
}
|
|
|
|
# Establish file names for output
|
|
basename_no_ext <- tools::file_path_sans_ext(basename(file))
|
|
new_file <- here::here(merged_final_dir, paste0(basename_no_ext, ".tif"))
|
|
vrt_file <- here::here(daily_vrt, paste0(basename_no_ext, ".vrt"))
|
|
|
|
# Process with error handling
|
|
tryCatch({
|
|
# Log processing start
|
|
safe_log(paste("Processing", basename(file)))
|
|
|
|
# Load and prepare raster
|
|
loaded_raster <- terra::rast(file)
|
|
|
|
# Validate raster has necessary bands
|
|
if (terra::nlyr(loaded_raster) < 4) {
|
|
stop("Raster must have at least 4 bands (Red, Green, Blue, NIR)")
|
|
}
|
|
|
|
# Name bands for clarity
|
|
names(loaded_raster) <- c("Red", "Green", "Blue", "NIR")
|
|
|
|
# Calculate Canopy Index
|
|
CI <- loaded_raster$NIR / loaded_raster$Green - 1
|
|
|
|
# Add CI to raster and mask invalid values
|
|
loaded_raster$CI <- CI
|
|
loaded_raster <- terra::crop(loaded_raster, field_boundaries, mask = TRUE)
|
|
|
|
# Replace zeros with NA for better visualization and analysis
|
|
loaded_raster[loaded_raster == 0] <- NA
|
|
|
|
# Write output files
|
|
terra::writeRaster(loaded_raster, new_file, overwrite = TRUE)
|
|
terra::vrt(new_file, vrt_file, overwrite = TRUE)
|
|
|
|
# Check if the result has enough valid pixels
|
|
valid_pixels <- terra::global(loaded_raster$CI, "notNA", na.rm=TRUE)
|
|
|
|
# Log completion
|
|
safe_log(paste("Completed processing", basename(file),
|
|
"- Valid pixels:", valid_pixels[1,]))
|
|
|
|
return(loaded_raster)
|
|
|
|
}, error = function(e) {
|
|
err_msg <- paste("Error processing", basename(file), "-", e$message)
|
|
safe_log(err_msg, "ERROR")
|
|
return(NULL)
|
|
}, finally = {
|
|
# Clean up memory
|
|
gc()
|
|
})
|
|
}
|
|
|
|
#' Process a batch of satellite images and create VRT files
|
|
#'
|
|
#' @param files Vector of file paths to process
|
|
#' @param field_boundaries Field boundaries vector object for cropping
|
|
#' @param merged_final_dir Directory to save processed rasters
|
|
#' @param daily_vrt_dir Directory to save VRT files
|
|
#' @param min_valid_pixels Minimum number of valid pixels for a raster to be kept (default: 100)
|
|
#' @return List of valid VRT files created
|
|
#'
|
|
process_satellite_images <- function(files, field_boundaries, merged_final_dir, daily_vrt_dir, min_valid_pixels = 100) {
|
|
vrt_list <- list()
|
|
|
|
safe_log(paste("Starting batch processing of", length(files), "files"))
|
|
|
|
# Process each file
|
|
for (file in files) {
|
|
# Process each raster file
|
|
v_crop <- create_mask_and_crop(file, field_boundaries, merged_final_dir)
|
|
|
|
# Skip if processing failed
|
|
if (is.null(v_crop)) {
|
|
next
|
|
}
|
|
|
|
# Check if the raster has enough valid data
|
|
valid_data <- terra::global(v_crop, "notNA")
|
|
vrt_file <- here::here(daily_vrt_dir, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
|
|
|
|
if (valid_data[1,] > min_valid_pixels) {
|
|
vrt_list[[vrt_file]] <- vrt_file
|
|
} else {
|
|
# Remove VRT files with insufficient data
|
|
if (file.exists(vrt_file)) {
|
|
file.remove(vrt_file)
|
|
}
|
|
safe_log(paste("Skipping", basename(file), "- insufficient valid data"), "WARNING")
|
|
}
|
|
|
|
# Clean up memory
|
|
rm(v_crop)
|
|
gc()
|
|
}
|
|
|
|
safe_log(paste("Completed processing", length(vrt_list), "raster files"))
|
|
|
|
return(vrt_list)
|
|
}
|
|
|
|
#' Find satellite image files filtered by date
|
|
#'
|
|
#' @param tif_folder Directory containing satellite imagery files
|
|
#' @param dates_filter Character vector of dates in YYYY-MM-DD format
|
|
#' @return Vector of file paths matching the date filter
|
|
#'
|
|
find_satellite_images <- function(tif_folder, dates_filter) {
|
|
# Find all raster files
|
|
raster_files <- list.files(tif_folder, full.names = TRUE, pattern = "\\.tif$")
|
|
|
|
if (length(raster_files) == 0) {
|
|
stop("No raster files found in directory: ", tif_folder)
|
|
}
|
|
|
|
# Filter files by dates
|
|
filtered_files <- purrr::map(dates_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>%
|
|
purrr::compact() %>%
|
|
purrr::flatten_chr()
|
|
|
|
# Remove files that do not exist
|
|
existing_files <- filtered_files[file.exists(filtered_files)]
|
|
|
|
# Check if the list of existing files is empty
|
|
if (length(existing_files) == 0) {
|
|
stop("No files found matching the date filter: ", paste(dates_filter, collapse = ", "))
|
|
}
|
|
|
|
return(existing_files)
|
|
}
|
|
|
|
#' Extract date from file path
|
|
#'
|
|
#' @param file_path Path to the file
|
|
#' @return Extracted date in YYYY-MM-DD format
|
|
#'
|
|
date_extract <- function(file_path) {
|
|
date <- stringr::str_extract(file_path, "\\d{4}-\\d{2}-\\d{2}")
|
|
|
|
if (is.na(date)) {
|
|
warning(paste("Could not extract date from file path: ", file_path))
|
|
}
|
|
|
|
return(date)
|
|
}
|
|
|
|
#' Extract CI values from a raster for each field or subfield
|
|
#'
|
|
#' @param file Path to the raster file
|
|
#' @param field_geojson Field boundaries as SF object
|
|
#' @param quadrants Boolean indicating whether to extract by quadrants
|
|
#' @param save_dir Directory to save the extracted values
|
|
#' @return Path to the saved RDS file
|
|
#'
|
|
extract_rasters_daily <- function(file, field_geojson, quadrants = TRUE, save_dir) {
|
|
# Validate inputs
|
|
if (!file.exists(file)) {
|
|
stop(paste("File not found: ", file))
|
|
}
|
|
|
|
if (!inherits(field_geojson, "sf") && !inherits(field_geojson, "sfc")) {
|
|
field_geojson <- sf::st_as_sf(field_geojson)
|
|
}
|
|
|
|
# Extract date from file path
|
|
date <- date_extract(file)
|
|
if (is.na(date)) {
|
|
stop(paste("Could not extract date from file path:", file))
|
|
}
|
|
|
|
# Log extraction start
|
|
safe_log(paste("Extracting CI values for", date, "- Using quadrants:", quadrants))
|
|
|
|
# Process with error handling
|
|
tryCatch({
|
|
# Load raster
|
|
x <- terra::rast(file)
|
|
|
|
# Check if CI band exists
|
|
if (!"CI" %in% names(x)) {
|
|
stop("CI band not found in raster")
|
|
}
|
|
|
|
# Extract statistics
|
|
pivot_stats <- cbind(
|
|
field_geojson,
|
|
mean_CI = round(exactextractr::exact_extract(x$CI, field_geojson, fun = "mean"), 2)
|
|
) %>%
|
|
sf::st_drop_geometry() %>%
|
|
dplyr::rename("{date}" := mean_CI)
|
|
|
|
# Determine save path
|
|
save_suffix <- if (quadrants) {"quadrant"} else {"whole_field"}
|
|
save_path <- here::here(save_dir, paste0("extracted_", date, "_", save_suffix, ".rds"))
|
|
|
|
# Save extracted data
|
|
saveRDS(pivot_stats, save_path)
|
|
|
|
# Log success
|
|
safe_log(paste("Successfully extracted and saved CI values for", date))
|
|
|
|
return(save_path)
|
|
|
|
}, error = function(e) {
|
|
err_msg <- paste("Error extracting CI values for", date, "-", e$message)
|
|
safe_log(err_msg, "ERROR")
|
|
return(NULL)
|
|
})
|
|
}
|
|
|
|
#' Combine daily CI values into a single dataset
|
|
#'
|
|
#' @param daily_CI_vals_dir Directory containing daily CI values
|
|
#' @param output_file Path to save the combined dataset
|
|
#' @return Combined dataset as a tibble
|
|
#'
|
|
combine_ci_values <- function(daily_CI_vals_dir, output_file = NULL) {
|
|
# List all RDS files in the daily CI values directory
|
|
files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE)
|
|
|
|
if (length(files) == 0) {
|
|
stop("No extracted CI values found in directory:", daily_CI_vals_dir)
|
|
}
|
|
|
|
# Log process start
|
|
safe_log(paste("Combining", length(files), "CI value files"))
|
|
|
|
# Load and combine all files
|
|
combined_data <- files %>%
|
|
purrr::map(readRDS) %>%
|
|
purrr::list_rbind() %>%
|
|
dplyr::group_by(sub_field)
|
|
|
|
# Save if output file is specified
|
|
if (!is.null(output_file)) {
|
|
saveRDS(combined_data, output_file)
|
|
safe_log(paste("Combined CI values saved to", output_file))
|
|
}
|
|
|
|
return(combined_data)
|
|
}
|
|
|
|
#' Update existing CI data with new values
|
|
#'
|
|
#' @param new_data New CI data to be added
|
|
#' @param existing_data_file Path to the existing data file
|
|
#' @return Updated combined dataset
|
|
#'
|
|
update_ci_data <- function(new_data, existing_data_file) {
|
|
if (!file.exists(existing_data_file)) {
|
|
warning(paste("Existing data file not found:", existing_data_file))
|
|
return(new_data)
|
|
}
|
|
|
|
# Load existing data
|
|
existing_data <- readRDS(existing_data_file)
|
|
|
|
# Combine data, handling duplicates by keeping the newer values
|
|
combined_data <- dplyr::bind_rows(new_data, existing_data) %>%
|
|
dplyr::distinct() %>%
|
|
dplyr::group_by(sub_field)
|
|
|
|
# Save updated data
|
|
saveRDS(combined_data, existing_data_file)
|
|
safe_log(paste("Updated CI data saved to", existing_data_file))
|
|
|
|
return(combined_data)
|
|
}
|
|
|
|
#' Process and combine CI values from raster files
|
|
#'
|
|
#' @param dates List of dates from date_list()
|
|
#' @param field_boundaries Field boundaries as vector object
|
|
#' @param merged_final_dir Directory with processed raster files
|
|
#' @param field_boundaries_sf Field boundaries as SF object
|
|
#' @param daily_CI_vals_dir Directory to save daily CI values
|
|
#' @param cumulative_CI_vals_dir Directory to save cumulative CI values
|
|
#' @return NULL (used for side effects)
|
|
#'
|
|
process_ci_values <- function(dates, field_boundaries, merged_final_dir,
|
|
field_boundaries_sf, daily_CI_vals_dir,
|
|
cumulative_CI_vals_dir) {
|
|
# Find processed raster files
|
|
raster_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$")
|
|
|
|
# Define path for combined CI data
|
|
combined_ci_path <- here::here(cumulative_CI_vals_dir, "combined_CI_data.rds")
|
|
|
|
# Check if the combined CI data file exists
|
|
if (!file.exists(combined_ci_path)) {
|
|
# Process all available data if file doesn't exist
|
|
safe_log("combined_CI_data.rds does not exist. Creating new file with all available data.")
|
|
|
|
# Extract data from all raster files
|
|
purrr::walk(
|
|
raster_files,
|
|
extract_rasters_daily,
|
|
field_geojson = field_boundaries_sf,
|
|
quadrants = FALSE,
|
|
save_dir = daily_CI_vals_dir
|
|
)
|
|
|
|
# Combine all extracted data
|
|
pivot_stats <- combine_ci_values(daily_CI_vals_dir, combined_ci_path)
|
|
safe_log("All CI values extracted from historic images and saved.")
|
|
|
|
} else {
|
|
# Process only the latest data and add to existing file
|
|
safe_log("combined_CI_data.rds exists, adding the latest image data.")
|
|
|
|
# Filter files by dates
|
|
filtered_files <- purrr::map(dates$days_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>%
|
|
purrr::compact() %>%
|
|
purrr::flatten_chr()
|
|
|
|
# Extract data for the new files
|
|
purrr::walk(
|
|
filtered_files,
|
|
extract_rasters_daily,
|
|
field_geojson = field_boundaries_sf,
|
|
quadrants = TRUE,
|
|
save_dir = daily_CI_vals_dir
|
|
)
|
|
|
|
# Filter extracted values files by the current date range
|
|
extracted_values <- list.files(daily_CI_vals_dir, full.names = TRUE)
|
|
extracted_values <- purrr::map(dates$days_filter, ~ extracted_values[grepl(pattern = .x, x = extracted_values)]) %>%
|
|
purrr::compact() %>%
|
|
purrr::flatten_chr()
|
|
|
|
# Combine new values
|
|
new_pivot_stats <- extracted_values %>%
|
|
purrr::map(readRDS) %>%
|
|
purrr::list_rbind() %>%
|
|
dplyr::group_by(sub_field)
|
|
|
|
# Update the combined data file
|
|
update_ci_data(new_pivot_stats, combined_ci_path)
|
|
safe_log("CI values from latest images added to combined_CI_data.rds")
|
|
}
|
|
}
|