```r # Cloud detection analysis script # Load necessary libraries library(terra) library(exactextractr) library(sf) library(dplyr) library(ggplot2) library(tidyr) library(reshape2) # Define file paths (these should be set to your actual file locations) classes_file <- "path/to/classes.geojson" rasters_dir <- "path/to/rasters" diagnostic_dir <- "path/to/diagnostics" # Helper function for logging safe_log <- function(message, level = "INFO") { timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") cat(paste0("[", timestamp, "] [", level, "] ", message, "\n")) } # Main processing block # Load classification polygons safe_log(paste("Loading classification polygons from:", classes_file)) classifications <- sf::st_read(classes_file, quiet = TRUE) # Ensure the CRS is set (assuming WGS84 here, adjust if necessary) safe_log("No CRS found for the classifications. Setting to WGS84 (EPSG:4326).", "WARNING") sf::st_crs(classifications) <- 4326 # List all raster files in the directory raster_files <- list.files(rasters_dir, pattern = "\\.tif$", full.names = TRUE) # Create a named vector for extraction_rasters based on base names extraction_rasters <- setNames(raster_files, tools::file_path_sans_ext(basename(raster_files))) # Create a stack of all rasters extraction_stack <- terra::rast(extraction_rasters) # User-provided simplified extraction for mean statistics per polygon safe_log("Extracting mean statistics per polygon using exactextractr...") all_stats <- cbind( classifications, round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2) ) %>% sf::st_drop_geometry() # Ensures all_stats is a data frame # Ensure 'class_name' column exists, if not, use 'class' as 'class_name' all_stats$class_name <- all_stats$class # Save the extracted statistics to a CSV file stats_file <- file.path(diagnostic_dir, "polygon_mean_spectral_stats.csv") write.csv(all_stats, stats_file, row.names = FALSE) safe_log(paste("Saved mean spectral statistics per polygon to:", stats_file)) # Calculate optimized thresholds for cloud/shadow detection threshold_results <- data.frame( parameter = character(), best_threshold = numeric(), direction = character(), target_class = character(), vs_class = character(), accuracy = numeric(), stringsAsFactors = FALSE ) class_pairs <- list( c("cloud", "crop"), c("cloud", "bare_soil_dry"), c("cloud", "bare_soil_wet"), c("shadow_over_crop", "crop"), c("shadow_over_bare_soil", "bare_soil_dry"), c("shadow_over_bare_soil", "bare_soil_wet") ) cloud_detection_params_for_threshold <- intersect( c("mean.brightness", "mean.very_bright_pixels", "mean.blue_dominant", "mean.low_ndvi", "mean.green_dominant_nir", "mean.high_ndwi", "mean.blue_ratio", "mean.ndvi"), colnames(all_stats) ) shadow_detection_params_for_threshold <- intersect( c("mean.brightness", "mean.dark_pixels", "mean.very_dark_pixels", "mean.low_nir", "mean.shadow_ndvi", "mean.low_red_to_blue", "mean.high_blue_to_nir_ratio", "mean.blue_nir_ratio_raw", "mean.red_blue_ratio_raw"), colnames(all_stats) ) for (pair in class_pairs) { target_class <- pair[1] vs_class <- pair[2] params_to_check <- c(cloud_detection_params_for_threshold, shadow_detection_params_for_threshold) for (param in params_to_check) { target_values <- all_stats[all_stats$class_name == target_class, param] vs_values <- all_stats[all_stats$class_name == vs_class, param] target_values <- target_values[!is.na(target_values)] vs_values <- vs_values[!is.na(vs_values)] # Only proceed if both groups have at least one value if (length(target_values) > 0 && length(vs_values) > 0) { target_mean <- mean(target_values) target_sd <- sd(target_values) vs_mean <- mean(vs_values) vs_sd <- sd(vs_values) target_sd[is.na(target_sd)] <- 0 vs_sd[is.na(vs_sd)] <- 0 direction <- ifelse(target_mean > vs_mean, ">", "<") all_values <- c(target_values, vs_values) min_val <- min(all_values) max_val <- max(all_values) # Only proceed if min and max are finite and not equal if (is.finite(min_val) && is.finite(max_val) && min_val != max_val) { potential_thresholds <- seq(min_val, max_val, length.out = 20) best_accuracy <- -1 best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds)) for (threshold in potential_thresholds) { if (direction == ">") { correct_target <- sum(target_values > threshold) correct_vs <- sum(vs_values <= threshold) } else { correct_target <- sum(target_values < threshold) correct_vs <- sum(vs_values >= threshold) } accuracy <- (correct_target + correct_vs) / (length(target_values) + length(vs_values)) if (accuracy > best_accuracy) { best_accuracy <- accuracy best_threshold <- threshold } } threshold_results <- rbind(threshold_results, data.frame( parameter = param, best_threshold = best_threshold, direction = direction, target_class = target_class, vs_class = vs_class, accuracy = best_accuracy, stringsAsFactors = FALSE )) } } } } thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv") write.csv(threshold_results, thresholds_file, row.names = FALSE) safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file)) # Fix: get plot_measure_cols by matching raster base names to all_stats columns with 'mean.' prefix plot_measure_cols <- intersect(names(extraction_rasters), gsub('^mean\\.', '', colnames(all_stats))) plot_data <- reshape2::melt( all_stats, id.vars = c("class", "class_name"), measure.vars = paste0("mean.", plot_measure_cols), variable.name = "parameter", value.name = "value" ) # Remove 'mean.' prefix from parameter column for clarity plot_data$parameter <- sub("^mean\\.", "", plot_data$parameter) plots_dir <- file.path(diagnostic_dir, "class_plots") dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE) key_params_for_plot_list <- c("brightness", "ndvi", "blue_ratio", "ndwi", "blue_nir_ratio_raw", "red_blue_ratio_raw") key_params_to_plot <- intersect(key_params_for_plot_list, plot_measure_cols) for (param_to_plot in key_params_to_plot) { param_data_subset <- plot_data[plot_data$parameter == param_to_plot, ] p <- ggplot2::ggplot(param_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) + ggplot2::geom_boxplot() + ggplot2::theme_minimal() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + ggplot2::labs( title = paste("Distribution of", param_to_plot, "by Land Cover Class"), x = "Class", y = param_to_plot, fill = "Class" ) plot_file <- file.path(plots_dir, paste0("boxplot_", param_to_plot, ".png")) ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150) } summary_params_for_plot_list <- c("brightness", "ndvi", "blue_nir_ratio_raw", "red_blue_ratio_raw") summary_params_to_plot <- intersect(summary_params_for_plot_list, plot_measure_cols) summary_data_subset <- plot_data[plot_data$parameter %in% summary_params_to_plot,] p_summary <- ggplot2::ggplot(summary_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) + ggplot2::geom_boxplot() + ggplot2::facet_wrap(~parameter, scales = "free_y") + ggplot2::theme_minimal() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1), strip.text = ggplot2::element_text(size = 8)) + ggplot2::labs( title = "Summary of Key Spectral Parameters by Land Cover Class", x = "Class", y = "Value", fill = "Class" ) summary_file <- file.path(plots_dir, "spectral_parameters_summary.png") ggplot2::ggsave(summary_file, p_summary, width = 12, height = 8, dpi = 150) safe_log(paste("Generated spectral parameter plots in:", plots_dir)) safe_log("Cloud detection analysis script finished.") ```