# CI_EXTRACTION_UTILS.R # ===================== # Utility functions for the SmartCane CI (Chlorophill Index) extraction workflow. # These functions support date handling, raster processing, and data extraction. #' Safe logging function that works whether log_message exists or not #' #' @param message The message to log #' @param level The log level (default: "INFO") #' @return NULL (used for side effects) #' safe_log <- function(message, level = "INFO") { if (exists("log_message")) { log_message(message, level) } else { if (level %in% c("ERROR", "WARNING")) { warning(message) } else { message(message) } } } #' Generate a sequence of dates for processing #' #' @param end_date The end date for the sequence (Date object) #' @param offset Number of days to look back from end_date #' @return A list containing week number, year, and a sequence of dates for filtering #' date_list <- function(end_date, offset) { # Input validation if (!lubridate::is.Date(end_date)) { end_date <- as.Date(end_date) if (is.na(end_date)) { stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.") } } offset <- as.numeric(offset) if (is.na(offset) || offset < 1) { stop("Invalid offset provided. Expected a positive number.") } # Calculate date range offset <- offset - 1 # Adjust offset to include end_date start_date <- end_date - lubridate::days(offset) # Extract week and year information week <- lubridate::week(start_date) year <- lubridate::year(start_date) # Generate sequence of dates days_filter <- seq(from = start_date, to = end_date, by = "day") days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering # Log the date range safe_log(paste("Date range generated from", start_date, "to", end_date)) return(list( "week" = week, "year" = year, "days_filter" = days_filter, "start_date" = start_date, "end_date" = end_date )) } #' Create a Chlorophill Index (CI) mask from satellite imagery and crop to field boundaries #' #' @param file Path to the satellite image file #' @param field_boundaries Field boundaries vector object #' @param merged_final_dir Directory to save the processed raster #' @return Processed raster object with CI band #' create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) { # Validate inputs if (!file.exists(file)) { stop(paste("File not found:", file)) } if (is.null(field_boundaries)) { stop("Field boundaries are required but were not provided") } # Establish file names for output basename_no_ext <- tools::file_path_sans_ext(basename(file)) new_file <- here::here(merged_final_dir, paste0(basename_no_ext, ".tif")) vrt_file <- here::here(daily_vrt, paste0(basename_no_ext, ".vrt")) # Process with error handling tryCatch({ # Log processing start safe_log(paste("Processing", basename(file))) # Load and prepare raster loaded_raster <- terra::rast(file) # Validate raster has necessary bands if (terra::nlyr(loaded_raster) < 4) { stop("Raster must have at least 4 bands (Red, Green, Blue, NIR)") } # Name bands for clarity names(loaded_raster) <- c("Red", "Green", "Blue", "NIR") # Calculate Canopy Index CI <- loaded_raster$NIR / loaded_raster$Green - 1 # Add CI to raster and mask invalid values loaded_raster$CI <- CI loaded_raster <- terra::crop(loaded_raster, field_boundaries, mask = TRUE) # Replace zeros with NA for better visualization and analysis loaded_raster[loaded_raster == 0] <- NA # Write output files terra::writeRaster(loaded_raster, new_file, overwrite = TRUE) terra::vrt(new_file, vrt_file, overwrite = TRUE) # Check if the result has enough valid pixels valid_pixels <- terra::global(loaded_raster$CI, "notNA", na.rm=TRUE) # Log completion safe_log(paste("Completed processing", basename(file), "- Valid pixels:", valid_pixels[1,])) return(loaded_raster) }, error = function(e) { err_msg <- paste("Error processing", basename(file), "-", e$message) safe_log(err_msg, "ERROR") return(NULL) }, finally = { # Clean up memory gc() }) } #' Process a batch of satellite images and create VRT files #' #' @param files Vector of file paths to process #' @param field_boundaries Field boundaries vector object for cropping #' @param merged_final_dir Directory to save processed rasters #' @param daily_vrt_dir Directory to save VRT files #' @param min_valid_pixels Minimum number of valid pixels for a raster to be kept (default: 100) #' @return List of valid VRT files created #' process_satellite_images <- function(files, field_boundaries, merged_final_dir, daily_vrt_dir, min_valid_pixels = 100) { vrt_list <- list() safe_log(paste("Starting batch processing of", length(files), "files")) # Process each file for (file in files) { # Process each raster file v_crop <- create_mask_and_crop(file, field_boundaries, merged_final_dir) # Skip if processing failed if (is.null(v_crop)) { next } # Check if the raster has enough valid data valid_data <- terra::global(v_crop, "notNA") vrt_file <- here::here(daily_vrt_dir, paste0(tools::file_path_sans_ext(basename(file)), ".vrt")) if (valid_data[1,] > min_valid_pixels) { vrt_list[[vrt_file]] <- vrt_file } else { # Remove VRT files with insufficient data if (file.exists(vrt_file)) { file.remove(vrt_file) } safe_log(paste("Skipping", basename(file), "- insufficient valid data"), "WARNING") } # Clean up memory rm(v_crop) gc() } safe_log(paste("Completed processing", length(vrt_list), "raster files")) return(vrt_list) } #' Find satellite image files filtered by date #' #' @param tif_folder Directory containing satellite imagery files #' @param dates_filter Character vector of dates in YYYY-MM-DD format #' @return Vector of file paths matching the date filter #' find_satellite_images <- function(tif_folder, dates_filter) { # Find all raster files raster_files <- list.files(tif_folder, full.names = TRUE, pattern = "\\.tif$") if (length(raster_files) == 0) { stop("No raster files found in directory: ", tif_folder) } # Filter files by dates filtered_files <- purrr::map(dates_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>% purrr::compact() %>% purrr::flatten_chr() # Remove files that do not exist existing_files <- filtered_files[file.exists(filtered_files)] # Check if the list of existing files is empty if (length(existing_files) == 0) { stop("No files found matching the date filter: ", paste(dates_filter, collapse = ", ")) } return(existing_files) } #' Extract date from file path #' #' @param file_path Path to the file #' @return Extracted date in YYYY-MM-DD format #' date_extract <- function(file_path) { date <- stringr::str_extract(file_path, "\\d{4}-\\d{2}-\\d{2}") if (is.na(date)) { warning(paste("Could not extract date from file path: ", file_path)) } return(date) } #' Extract CI values from a raster for each field or subfield #' #' @param file Path to the raster file #' @param field_geojson Field boundaries as SF object #' @param quadrants Boolean indicating whether to extract by quadrants #' @param save_dir Directory to save the extracted values #' @return Path to the saved RDS file #' extract_rasters_daily <- function(file, field_geojson, quadrants = TRUE, save_dir) { # Validate inputs if (!file.exists(file)) { stop(paste("File not found: ", file)) } if (!inherits(field_geojson, "sf") && !inherits(field_geojson, "sfc")) { field_geojson <- sf::st_as_sf(field_geojson) } # Extract date from file path date <- date_extract(file) if (is.na(date)) { stop(paste("Could not extract date from file path:", file)) } # Log extraction start safe_log(paste("Extracting CI values for", date, "- Using quadrants:", quadrants)) # Process with error handling tryCatch({ # Load raster x <- terra::rast(file) # Check if CI band exists if (!"CI" %in% names(x)) { stop("CI band not found in raster") } # Extract statistics pivot_stats <- cbind( field_geojson, mean_CI = round(exactextractr::exact_extract(x$CI, field_geojson, fun = "mean"), 2) ) %>% sf::st_drop_geometry() %>% dplyr::rename("{date}" := mean_CI) # Determine save path save_suffix <- if (quadrants) {"quadrant"} else {"whole_field"} save_path <- here::here(save_dir, paste0("extracted_", date, "_", save_suffix, ".rds")) # Save extracted data saveRDS(pivot_stats, save_path) # Log success safe_log(paste("Successfully extracted and saved CI values for", date)) return(save_path) }, error = function(e) { err_msg <- paste("Error extracting CI values for", date, "-", e$message) safe_log(err_msg, "ERROR") return(NULL) }) } #' Combine daily CI values into a single dataset #' #' @param daily_CI_vals_dir Directory containing daily CI values #' @param output_file Path to save the combined dataset #' @return Combined dataset as a tibble #' combine_ci_values <- function(daily_CI_vals_dir, output_file = NULL) { # List all RDS files in the daily CI values directory files <- list.files(path = daily_CI_vals_dir, pattern = "^extracted_.*\\.rds$", full.names = TRUE) if (length(files) == 0) { stop("No extracted CI values found in directory:", daily_CI_vals_dir) } # Log process start safe_log(paste("Combining", length(files), "CI value files")) # Load and combine all files combined_data <- files %>% purrr::map(readRDS) %>% purrr::list_rbind() %>% dplyr::group_by(sub_field) # Save if output file is specified if (!is.null(output_file)) { saveRDS(combined_data, output_file) safe_log(paste("Combined CI values saved to", output_file)) } return(combined_data) } #' Update existing CI data with new values #' #' @param new_data New CI data to be added #' @param existing_data_file Path to the existing data file #' @return Updated combined dataset #' update_ci_data <- function(new_data, existing_data_file) { if (!file.exists(existing_data_file)) { warning(paste("Existing data file not found:", existing_data_file)) return(new_data) } # Load existing data existing_data <- readRDS(existing_data_file) # Combine data, handling duplicates by keeping the newer values combined_data <- dplyr::bind_rows(new_data, existing_data) %>% dplyr::distinct() %>% dplyr::group_by(sub_field) # Save updated data saveRDS(combined_data, existing_data_file) safe_log(paste("Updated CI data saved to", existing_data_file)) return(combined_data) } #' Process and combine CI values from raster files #' #' @param dates List of dates from date_list() #' @param field_boundaries Field boundaries as vector object #' @param merged_final_dir Directory with processed raster files #' @param field_boundaries_sf Field boundaries as SF object #' @param daily_CI_vals_dir Directory to save daily CI values #' @param cumulative_CI_vals_dir Directory to save cumulative CI values #' @return NULL (used for side effects) #' process_ci_values <- function(dates, field_boundaries, merged_final_dir, field_boundaries_sf, daily_CI_vals_dir, cumulative_CI_vals_dir) { # Find processed raster files raster_files <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$") # Define path for combined CI data combined_ci_path <- here::here(cumulative_CI_vals_dir, "combined_CI_data.rds") # Check if the combined CI data file exists if (!file.exists(combined_ci_path)) { # Process all available data if file doesn't exist safe_log("combined_CI_data.rds does not exist. Creating new file with all available data.") # Extract data from all raster files purrr::walk( raster_files, extract_rasters_daily, field_geojson = field_boundaries_sf, quadrants = FALSE, save_dir = daily_CI_vals_dir ) # Combine all extracted data pivot_stats <- combine_ci_values(daily_CI_vals_dir, combined_ci_path) safe_log("All CI values extracted from historic images and saved.") } else { # Process only the latest data and add to existing file safe_log("combined_CI_data.rds exists, adding the latest image data.") # Filter files by dates filtered_files <- purrr::map(dates$days_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>% purrr::compact() %>% purrr::flatten_chr() # Extract data for the new files purrr::walk( filtered_files, extract_rasters_daily, field_geojson = field_boundaries_sf, quadrants = TRUE, save_dir = daily_CI_vals_dir ) # Filter extracted values files by the current date range extracted_values <- list.files(daily_CI_vals_dir, full.names = TRUE) extracted_values <- purrr::map(dates$days_filter, ~ extracted_values[grepl(pattern = .x, x = extracted_values)]) %>% purrr::compact() %>% purrr::flatten_chr() # Combine new values new_pivot_stats <- extracted_values %>% purrr::map(readRDS) %>% purrr::list_rbind() %>% dplyr::group_by(sub_field) # Update the combined data file update_ci_data(new_pivot_stats, combined_ci_path) safe_log("CI values from latest images added to combined_CI_data.rds") } }