# MOSAIC_CREATION_UTILS.R # ====================== # Utility functions for creating weekly mosaics from daily satellite imagery. # These functions support cloud cover assessment, date handling, and mosaic creation. #' Detect whether a project uses tile-based or single-file mosaic approach #' #' @param merged_final_tif_dir Directory containing merged_final_tif files #' @return List with has_tiles (logical), detected_tiles (vector), total_files (count) #' detect_mosaic_mode <- function(merged_final_tif_dir) { # Check if directory exists if (!dir.exists(merged_final_tif_dir)) { return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0)) } # List all .tif files in merged_final_tif tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) if (length(tif_files) == 0) { return(list(has_tiles = FALSE, detected_tiles = character(), total_files = 0)) } # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits) # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif tile_pattern <- "_(\\d{2})\\.tif$" tile_files <- tif_files[grepl(tile_pattern, tif_files)] has_tiles <- length(tile_files) > 0 return(list( has_tiles = has_tiles, detected_tiles = tile_files, total_files = length(tif_files) )) } #' Safe logging function #' @param message The message to log #' @param level The log level (default: "INFO") #' @return NULL (used for side effects) #' safe_log <- function(message, level = "INFO") { if (exists("log_message")) { log_message(message, level) } else { if (level %in% c("ERROR", "WARNING")) { warning(message) } else { message(message) } } } #' Generate a sequence of dates for processing #' #' @param end_date The end date for the sequence (Date object) #' @param offset Number of days to look back from end_date #' @return A list containing week number, year, and a sequence of dates for filtering #' date_list <- function(end_date, offset) { # Input validation if (!lubridate::is.Date(end_date)) { end_date <- as.Date(end_date) if (is.na(end_date)) { stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.") } } offset <- as.numeric(offset) if (is.na(offset) || offset < 1) { stop("Invalid offset provided. Expected a positive number.") } # Calculate date range offset <- offset - 1 # Adjust offset to include end_date start_date <- end_date - lubridate::days(offset) # Extract week and year information week <- lubridate::isoweek(end_date) year <- lubridate::isoyear(end_date) # Generate sequence of dates days_filter <- seq(from = start_date, to = end_date, by = "day") days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering # Log the date range safe_log(paste("Date range generated from", start_date, "to", end_date)) return(list( "week" = week, "year" = year, "days_filter" = days_filter, "start_date" = start_date, "end_date" = end_date )) } #' Create a weekly mosaic from available VRT files #' #' @param dates List from date_list() with date range info #' @param field_boundaries Field boundaries for image cropping #' @param daily_vrt_dir Directory containing VRT files #' @param merged_final_dir Directory with merged final rasters #' @param output_dir Output directory for weekly mosaics #' @param file_name_tif Output filename for the mosaic #' @param create_plots Whether to create visualization plots (default: TRUE) #' @return The file path of the saved mosaic #' create_weekly_mosaic <- function(dates, field_boundaries, daily_vrt_dir, merged_final_dir, output_dir, file_name_tif, create_plots = FALSE) { # Find VRT files for the specified date range vrt_list <- find_vrt_files(daily_vrt_dir, dates) # Find final raster files for fallback raster_files_final <- list.files(merged_final_dir, full.names = TRUE, pattern = "\\.tif$") # Process the mosaic if VRT files are available if (length(vrt_list) > 0) { safe_log("VRT list created, assessing cloud cover for mosaic creation") # Calculate aggregated cloud cover statistics (returns data frame for image selection) cloud_coverage_stats <- count_cloud_coverage(vrt_list, merged_final_dir, field_boundaries) # Create mosaic based on cloud cover assessment mosaic <- create_mosaic(raster_files_final, cloud_coverage_stats, field_boundaries) } else { safe_log("No VRT files available for the date range, creating empty mosaic with NA values", "WARNING") # Create empty mosaic if no files are available if (length(raster_files_final) == 0) { stop("No VRT files or final raster files available to create mosaic") } mosaic <- terra::rast(raster_files_final[1]) mosaic <- terra::setValues(mosaic, NA) mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE) names(mosaic) <- c("Red", "Green", "Blue", "NIR", "CI") } # Save the mosaic (without mask files to avoid breaking other scripts) file_path <- save_mosaic(mosaic, output_dir, file_name_tif, create_plots, save_mask = FALSE) safe_log(paste("Weekly mosaic processing completed for week", dates$week)) return(file_path) } #' Find VRT files within a date range #' #' @param vrt_directory Directory containing VRT files #' @param dates List from date_list() function containing days_filter #' @return Character vector of VRT file paths #' find_vrt_files <- function(vrt_directory, dates) { # Get all VRT files in directory vrt_files <- list.files(here::here(vrt_directory), full.names = TRUE) if (length(vrt_files) == 0) { warning("No VRT files found in directory: ", vrt_directory) return(character(0)) } # Filter files by dates vrt_list <- purrr::map(dates$days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_files)]) %>% purrr::compact() %>% purrr::flatten_chr() # Log results safe_log(paste("Found", length(vrt_list), "VRT files for the date range")) return(vrt_list) } #' Count missing pixels (clouds) in rasters - per field analysis using actual TIF files #' #' @param vrt_list List of VRT file paths (used to extract dates for TIF file lookup) #' @param merged_final_dir Directory containing the actual TIF files (e.g., merged_final_tif) #' @param field_boundaries Field boundaries (sf object) for calculating cloud cover over field areas only #' @return Data frame with aggregated cloud statistics for each TIF file (used for mosaic selection) #' count_cloud_coverage <- function(vrt_list, merged_final_dir = NULL, field_boundaries = NULL) { if (length(vrt_list) == 0) { warning("No VRT files provided for cloud coverage calculation") return(NULL) } tryCatch({ # Extract dates from VRT filenames to find corresponding TIF files # VRT filenames are like "merged2025-12-18.vrt", TIF filenames are like "2025-12-18.tif" tif_dates <- gsub(".*([0-9]{4}-[0-9]{2}-[0-9]{2}).*", "\\1", basename(vrt_list)) # Build list of actual TIF files to use tif_files <- paste0(here::here(merged_final_dir), "/", tif_dates, ".tif") # Check which TIF files exist tif_exist <- file.exists(tif_files) if (!any(tif_exist)) { warning("No TIF files found in directory: ", merged_final_dir) return(NULL) } tif_files <- tif_files[tif_exist] safe_log(paste("Found", length(tif_files), "TIF files for cloud coverage assessment")) # Initialize list to store aggregated results aggregated_results <- list() # Process each TIF file for (tif_idx in seq_along(tif_files)) { tif_file <- tif_files[tif_idx] tryCatch({ # Load the TIF file (typically has 5 bands: R, G, B, NIR, CI) current_raster <- terra::rast(tif_file) # Extract the CI band (last band) ci_band <- current_raster[[terra::nlyr(current_raster)]] # Create a unique field mask for THIS raster's extent # This handles cases where rasters have different extents due to missing data total_notna <- NA_real_ total_pixels <- NA_real_ if (!is.null(field_boundaries)) { tryCatch({ # Create mask specific to this raster's grid field_mask <- terra::rasterize(field_boundaries, ci_band, field = 1) # Count pixels within field boundaries (for this specific raster) total_pixels <- terra::global(field_mask, fun = "notNA")$notNA # Cloud coverage calculated only over field areas ci_field_masked <- terra::mask(ci_band, field_mask, maskvalue = NA) total_notna <- terra::global(ci_field_masked, fun = "notNA")$notNA }, error = function(e) { # If field mask creation fails, fall back to entire raster safe_log(paste("Could not create field mask for", basename(tif_file), ":", e$message), "WARNING") }) } # If field mask failed, use entire raster if (is.na(total_notna)) { total_notna <- terra::global(ci_band, fun = "notNA")$notNA total_pixels <- terra::ncell(ci_band) } # Calculate cloud coverage percentage (missing = clouds) missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) aggregated_results[[tif_idx]] <- data.frame( filename = tif_file, notNA = total_notna, total_pixels = total_pixels, missing_pixels_percentage = missing_pct, thres_5perc = as.integer(missing_pct < 5), thres_40perc = as.integer(missing_pct < 45), stringsAsFactors = FALSE ) }, error = function(e) { safe_log(paste("Error processing TIF", basename(tif_file), ":", e$message), "WARNING") aggregated_results[[tif_idx]] <<- data.frame( filename = tif_file, notNA = NA_real_, total_pixels = NA_real_, missing_pixels_percentage = 100, thres_5perc = 0, thres_40perc = 0, stringsAsFactors = FALSE ) }) } # Combine all aggregated results aggregated_df <- if (length(aggregated_results) > 0) { do.call(rbind, aggregated_results) } else { data.frame() } # Log results safe_log(paste("Cloud coverage assessment completed for", length(vrt_list), "images")) # Return aggregated data only return(aggregated_df) }, error = function(e) { warning("Error in cloud coverage calculation: ", e$message) return(NULL) }) } #' Create a mosaic from merged_final_tif files based on cloud coverage #' #' @param tif_files List of processed TIF files (5 bands: R, G, B, NIR, CI) #' @param cloud_coverage_stats Cloud coverage statistics from count_cloud_coverage() #' @param field_boundaries Field boundaries for masking (optional) #' @return A SpatRaster object with 5 bands (Red, Green, Blue, NIR, CI) #' create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NULL) { # If no TIF files, return NULL if (length(tif_files) == 0) { safe_log("No TIF files available for mosaic creation", "ERROR") return(NULL) } # Validate cloud coverage stats mosaic_type <- "Unknown" # Track what type of mosaic is being created if (is.null(cloud_coverage_stats) || nrow(cloud_coverage_stats) == 0) { safe_log("No cloud coverage statistics available, using all files", "WARNING") rasters_to_use <- tif_files mosaic_type <- paste("all", length(tif_files), "available images") } else { # Determine best rasters to use based on cloud coverage thresholds # Count how many images meet each threshold num_5perc <- sum(cloud_coverage_stats$thres_5perc, na.rm = TRUE) num_40perc <- sum(cloud_coverage_stats$thres_40perc, na.rm = TRUE) if (num_5perc > 1) { # Multiple images with <5% cloud coverage safe_log(paste("Creating max composite from", num_5perc, "cloud-free images (<5% clouds)")) mosaic_type <- paste(num_5perc, "cloud-free images (<5% clouds)") best_coverage <- which(cloud_coverage_stats$thres_5perc > 0) } else if (num_5perc == 1) { # Single image with <5% cloud coverage safe_log("Using single cloud-free image (<5% clouds)") mosaic_type <- "single cloud-free image (<5% clouds)" best_coverage <- which(cloud_coverage_stats$thres_5perc > 0) } else if (num_40perc > 1) { # Multiple images with <40% cloud coverage safe_log(paste("Creating max composite from", num_40perc, "partially cloudy images (<40% clouds)"), "WARNING") mosaic_type <- paste(num_40perc, "partially cloudy images (<40% clouds)") best_coverage <- which(cloud_coverage_stats$thres_40perc > 0) } else if (num_40perc == 1) { # Single image with <40% cloud coverage safe_log("Using single partially cloudy image (<40% clouds)", "WARNING") mosaic_type <- "single partially cloudy image (<40% clouds)" best_coverage <- which(cloud_coverage_stats$thres_40perc > 0) } else { # No cloud-free images available safe_log("No cloud-free images available, using all images", "WARNING") mosaic_type <- paste("all", nrow(cloud_coverage_stats), "available images") best_coverage <- seq_len(nrow(cloud_coverage_stats)) } # Get filenames of best-coverage images # Match by extracting tile ID from both cloud stats and TIF filenames rasters_to_use <- character() for (idx in best_coverage) { # Extract tile ID from cloud_coverage_stats filename (e.g., "tile_18" → 18) cc_filename <- cloud_coverage_stats$filename[idx] cc_tile_id <- gsub(".*_([0-9]+).*", "\\1", cc_filename) # Find matching TIF file by matching tile ID matching_tif <- NULL for (tif_file in tif_files) { tif_tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", basename(tif_file)) if (tif_tile_id == cc_tile_id) { matching_tif <- tif_file break } } if (!is.null(matching_tif)) { rasters_to_use <- c(rasters_to_use, matching_tif) } } if (length(rasters_to_use) == 0) { safe_log("Could not match cloud coverage stats to TIF files, using all files", "WARNING") rasters_to_use <- tif_files mosaic_type <- paste("all", length(tif_files), "available images") } } # Load and mosaic the selected rasters if (length(rasters_to_use) == 1) { # Single file - just load it safe_log(paste("Using single image for mosaic:", basename(rasters_to_use))) mosaic <- terra::rast(rasters_to_use[1]) } else { # Multiple files - merge handles different extents/grids automatically safe_log(paste("Creating mosaic from", length(rasters_to_use), "images")) # Load all rasters with error handling - only keep successful loads all_rasters <- Filter(Negate(is.null), lapply(rasters_to_use, function(f) { tryCatch({ terra::rast(f) }, error = function(e) { safe_log(paste("Warning: Could not load", basename(f), ":", e$message), "WARNING") NULL # Return NULL on error, will be filtered out }) })) # Check what we loaded safe_log(paste("Loaded", length(all_rasters), "valid rasters from", length(rasters_to_use), "files")) if (length(all_rasters) == 0) { safe_log("No valid rasters to merge", "WARNING") return(NULL) } # Merge all rasters (handles different extents and grids automatically) if (length(all_rasters) == 1) { mosaic <- all_rasters[[1]] safe_log("Using single raster after filtering") } else { # Create max composite: take maximum value at each pixel across all dates # This skips clouds (low/zero CI values) in favor of clear pixels from other dates mosaic <- tryCatch({ safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds")) # Get extent from field boundaries if available, otherwise use raster intersection if (!is.null(field_boundaries)) { crop_extent <- terra::ext(field_boundaries) safe_log("Using field boundaries extent for consistent area across all dates") } else { # Fallback: use intersection of all raster extents crop_extent <- terra::ext(all_rasters[[1]]) for (i in 2:length(all_rasters)) { crop_extent <- terra::intersect(crop_extent, terra::ext(all_rasters[[i]])) } safe_log("Using raster intersection extent") } # Crop all rasters to common extent cropped_rasters <- lapply(all_rasters, function(r) { terra::crop(r, crop_extent) }) # Resample all cropped rasters to match the first one's grid # This handles pixel grid misalignment from Python's dynamic extent adjustment reference_grid <- cropped_rasters[[1]] safe_log("Resampling rasters to common grid for stacking") aligned_rasters <- lapply(cropped_rasters, function(r) { if (identical(terra::ext(r), terra::ext(reference_grid)) && identical(terra::res(r), terra::res(reference_grid))) { return(r) # Already aligned } terra::resample(r, reference_grid, method = "near") }) # Create max composite using mosaic on aligned rasters # Resample ensures all rasters have matching grids (no resolution mismatch) raster_collection <- terra::sprc(aligned_rasters) max_mosaic <- terra::mosaic(raster_collection, fun = "max") max_mosaic }, error = function(e) { safe_log(paste("Max composite creation failed:", e$message), "WARNING") safe_log("Using first raster only as fallback") all_rasters[[1]] }) safe_log(paste("Max composite created - taking clearest pixel at each location")) } # Ensure we have exactly the required bands: Red, Green, Blue, NIR, CI required_bands <- c("Red", "Green", "Blue", "NIR", "CI") available_bands <- names(mosaic) # Check if all required bands are present if (!all(required_bands %in% available_bands)) { safe_log(paste("Warning: Not all required bands found. Available:", paste(available_bands, collapse = ", ")), "WARNING") } # Select only the required bands in the correct order if (all(required_bands %in% available_bands)) { mosaic <- mosaic[[required_bands]] safe_log("Selected Red, Green, Blue, NIR, CI bands") } else { safe_log(paste("Warning: mosaic has", terra::nlyr(mosaic), "bands, expected 5 (R, G, B, NIR, CI)"), "WARNING") if (terra::nlyr(mosaic) > 5) { # Keep only first 5 bands as fallback mosaic <- terra::subset(mosaic, 1:5) safe_log("Keeping only first 5 bands as fallback") } } } # Crop/mask to field boundaries if provided if (!is.null(field_boundaries)) { tryCatch({ mosaic <- terra::crop(mosaic, field_boundaries, mask = TRUE) safe_log("Mosaic cropped to field boundaries") }, error = function(e) { safe_log(paste("Could not crop to field boundaries:", e$message), "WARNING") # Return uncropped mosaic }) } # Log final mosaic summary safe_log(paste("✓ Mosaic created from", mosaic_type, "-", terra::nlyr(mosaic), "bands,", nrow(mosaic), "x", ncol(mosaic), "pixels")) return(mosaic) } #' Save a mosaic raster to disk #' #' @param mosaic_raster A SpatRaster object to save #' @param output_dir Directory to save the output #' @param file_name Filename for the output raster #' @param plot_result Whether to create visualizations (default: FALSE) #' @param save_mask Whether to save cloud masks separately (default: FALSE) #' @return The file path of the saved raster #' save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALSE, save_mask = FALSE) { # Validate input if (is.null(mosaic_raster)) { stop("No mosaic raster provided to save") } # Create output directory if it doesn't exist dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) # Create full file path file_path <- here::here(output_dir, file_name) # Get cloud mask if it exists cloud_mask <- attr(mosaic_raster, "cloud_mask") # Save raster terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE) # Save cloud mask if available and requested if (!is.null(cloud_mask) && save_mask) { # Create mask filename by adding _mask before extension mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name) mask_file_path <- here::here(output_dir, mask_file_name) # Save the mask terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE) safe_log(paste("Cloud/shadow mask saved to:", mask_file_path)) } else if (!is.null(cloud_mask)) { safe_log("Cloud mask available but not saved (save_mask = FALSE)") } # Create plots if requested if (plot_result) { # Plot the CI band if ("CI" %in% names(mosaic_raster)) { terra::plot(mosaic_raster$CI, main = paste("CI map", file_name)) } # Plot RGB image if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) { terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name)) } # Plot cloud mask if available if (!is.null(cloud_mask)) { terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name), col = c("red", "green")) } # If we have both RGB and cloud mask, create a side-by-side comparison if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) { old_par <- par(mfrow = c(1, 2)) terra::plotRGB(mosaic_raster, main = "RGB Image") # Create a colored mask for visualization (red = cloud/shadow, green = clear) mask_plot <- cloud_mask terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green")) par(old_par) } } # Log save completion safe_log(paste("Mosaic saved to:", file_path)) return(file_path) } #' Create weekly mosaic from pre-split tiles with MAX aggregation #' #' This function processes tiles created by Script 01 and processed by Script 02. #' For each of the 25 tiles independently: #' 1. Collects that tile from all dates in the range #' 2. Calculates cloud coverage per date #' 3. Uses create_mosaic logic to select best dates (cloud-clean preferred) #' 4. Creates MAX composite for that tile #' 5. Saves to weekly_tile_max/tile_XX.tif #' #' Input: merged_final_tif/[DATE]/[TILE_01.tif, TILE_02.tif, ..., TILE_25.tif] #' Output: weekly_tile_max/tile_01.tif through tile_25.tif (25 weekly MAX tiles) #' #' @param dates List from date_list() containing days_filter vector #' @param merged_final_dir Directory containing processed tiles (merged_final_tif) #' @param tile_output_dir Directory to save weekly MAX tiles (weekly_tile_max) #' @param field_boundaries Field boundaries for cloud coverage calculation (optional) #' @return List of paths to created tile files #' create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output_dir, field_boundaries = NULL) { safe_log("Starting per-tile mosaic creation with cloud-based date selection...") # Create output directory if needed dir.create(tile_output_dir, recursive = TRUE, showWarnings = FALSE) # Step 1: Discover all tiles from all dates and group by tile ID tile_groups <- list() # Structure: tile_groups$tile_01 = list of files for that tile across dates for (date in dates$days_filter) { date_dir <- file.path(merged_final_dir, date) if (!dir.exists(date_dir)) { safe_log(paste(" Skipping date:", date, "- directory not found"), "WARNING") next } tile_files <- list.files(date_dir, pattern = "\\.tif$", full.names = TRUE) if (length(tile_files) == 0) { safe_log(paste(" No tiles found for date:", date), "WARNING") next } # Extract tile ID from each filename (e.g., "2026-01-02_01.tif" → tile ID is "01") for (tile_file in tile_files) { # Extract tile number from filename tile_basename <- basename(tile_file) tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", tile_basename) if (!tile_id %in% names(tile_groups)) { tile_groups[[tile_id]] <- list() } tile_groups[[tile_id]][[length(tile_groups[[tile_id]]) + 1]] <- tile_file } } if (length(tile_groups) == 0) { stop("No tiles found in date range") } safe_log(paste("Found", length(tile_groups), "unique tiles across all dates")) # Step 2: Process each tile independently created_tiles <- character() for (tile_id in names(tile_groups)) { tile_files_for_this_id <- unlist(tile_groups[[tile_id]]) safe_log(paste("Processing tile", tile_id, "with", length(tile_files_for_this_id), "dates")) # Step 2a: Calculate cloud coverage for this tile across all dates cloud_stats_this_tile <- tryCatch({ count_cloud_coverage_for_tile( tile_files = tile_files_for_this_id, field_boundaries = field_boundaries ) }, error = function(e) { safe_log(paste(" Error calculating cloud coverage for tile", tile_id, "-", e$message), "WARNING") NULL }) if (is.null(cloud_stats_this_tile) || nrow(cloud_stats_this_tile) == 0) { safe_log(paste(" No valid cloud stats for tile", tile_id, "- using all available dates"), "WARNING") cloud_stats_this_tile <- NULL } # Step 2b: Create MAX mosaic for this tile using create_mosaic logic tile_mosaic <- tryCatch({ create_mosaic( tif_files = tile_files_for_this_id, cloud_coverage_stats = cloud_stats_this_tile, field_boundaries = NULL # Don't crop individual tiles ) }, error = function(e) { safe_log(paste(" Error creating mosaic for tile", tile_id, "-", e$message), "WARNING") NULL }) if (is.null(tile_mosaic)) { safe_log(paste(" Failed to create mosaic for tile", tile_id, "- skipping"), "WARNING") next } # Step 2c: Save this tile's weekly MAX mosaic # Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1) tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_", sprintf("%02d", as.integer(tile_id)), ".tif") tile_output_path <- file.path(tile_output_dir, tile_filename) tryCatch({ terra::writeRaster(tile_mosaic, tile_output_path, overwrite = TRUE) safe_log(paste(" ✓ Saved tile", tile_id, "to:", tile_filename)) created_tiles <- c(created_tiles, tile_output_path) }, error = function(e) { safe_log(paste(" Error saving tile", tile_id, "-", e$message), "WARNING") }) } safe_log(paste("✓ Created", length(created_tiles), "weekly MAX tiles in", tile_output_dir)) return(created_tiles) } #' Calculate cloud coverage for a single tile across multiple dates #' #' Helper function for per-tile cloud assessment. #' Takes tile files from different dates and calculates cloud coverage for each. #' Cloud coverage is calculated ONLY within field boundaries, so total_pixels #' varies per tile based on which fields are present in that tile area. #' #' @param tile_files Character vector of tile file paths from different dates #' @param field_boundaries Field boundaries for analysis (required for per-field counting) #' @return Data frame with cloud stats for each date/tile #' count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) { if (length(tile_files) == 0) { warning("No tile files provided for cloud coverage calculation") return(NULL) } aggregated_results <- list() for (idx in seq_along(tile_files)) { tile_file <- tile_files[idx] tryCatch({ # Load the tile current_raster <- terra::rast(tile_file) # Extract the CI band (last band in 5-band output) ci_band <- current_raster[[terra::nlyr(current_raster)]] # Calculate cloud coverage within field boundaries if (!is.null(field_boundaries)) { # Create a reference raster template (same extent/resolution as ci_band, but independent of its data) # This ensures field_mask shows the potential field area even if ci_band is entirely cloudy (all NA) ref_template <- terra::rast(terra::ext(ci_band), resolution = terra::res(ci_band), crs = terra::crs(ci_band)) terra::values(ref_template) <- 1 # Fill entire raster with 1s # Crop and mask to field boundaries: keeps 1 inside fields, NA outside # This is independent of ci_band's data - represents the potential field area field_mask <- terra::crop(ref_template, field_boundaries, mask = TRUE) # Count total potential field pixels from the mask (independent of clouds) total_pixels <- terra::global(field_mask, fun = "notNA")$notNA # Now crop and mask CI band to field boundaries to count actual valid (non-cloud) pixels ci_masked <- terra::crop(ci_band, field_boundaries, mask = TRUE) total_notna <- terra::global(ci_masked, fun = "notNA")$notNA } else { # If no field boundaries provided, use entire tile total_notna <- terra::global(ci_band, fun = "notNA")$notNA total_pixels <- terra::ncell(ci_band) } # Cloud coverage percentage (missing = clouds) missing_pct <- round(100 - ((total_notna / total_pixels) * 100)) aggregated_results[[idx]] <- data.frame( filename = paste0("tile_", sprintf("%02d", as.integer(gsub(".*_([0-9]+)\\.tif", "\\1", basename(tile_file))))), notNA = total_notna, total_pixels = total_pixels, missing_pixels_percentage = missing_pct, thres_5perc = as.integer(missing_pct < 5), thres_40perc = as.integer(missing_pct < 45), stringsAsFactors = FALSE ) }, error = function(e) { safe_log(paste("Error processing tile:", basename(tile_file), "-", e$message), "WARNING") aggregated_results[[idx]] <<- data.frame( filename = tile_file, notNA = NA_real_, total_pixels = NA_real_, missing_pixels_percentage = 100, thres_5perc = 0, thres_40perc = 0, stringsAsFactors = FALSE ) }) } # Combine results aggregated_df <- if (length(aggregated_results) > 0) { do.call(rbind, aggregated_results) } else { data.frame() } return(aggregated_df) }