SmartCane/r_app/experiments/delete_cloud_exploratoin.R
Timon 6efcc8cfec Fix CI report pipeline: update tmap v4 syntax, add continuous color scales, fix formatting
- Updated all CI maps to use tm_scale_continuous() for proper tmap v4 compatibility
- Added fixed color scale limits (1-8 for CI, -3 to +3 for differences) for consistent field comparison
- Fixed YAML header formatting issues in CI_report_dashboard_planet.Rmd
- Positioned RGB map before CI overview map as requested
- Removed all obsolete use_breaks parameter references
- Enhanced error handling and logging throughout the pipeline
- Added new experimental analysis scripts and improvements to mosaic creation
2025-06-19 20:37:20 +02:00

191 lines
8 KiB
R

```r
# Cloud detection analysis script
# Load necessary libraries
library(terra)
library(exactextractr)
library(sf)
library(dplyr)
library(ggplot2)
library(tidyr)
library(reshape2)
# Define file paths (these should be set to your actual file locations)
classes_file <- "path/to/classes.geojson"
rasters_dir <- "path/to/rasters"
diagnostic_dir <- "path/to/diagnostics"
# Helper function for logging
safe_log <- function(message, level = "INFO") {
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
cat(paste0("[", timestamp, "] [", level, "] ", message, "\n"))
}
# Main processing block
# Load classification polygons
safe_log(paste("Loading classification polygons from:", classes_file))
classifications <- sf::st_read(classes_file, quiet = TRUE)
# Ensure the CRS is set (assuming WGS84 here, adjust if necessary)
safe_log("No CRS found for the classifications. Setting to WGS84 (EPSG:4326).", "WARNING")
sf::st_crs(classifications) <- 4326
# List all raster files in the directory
raster_files <- list.files(rasters_dir, pattern = "\\.tif$", full.names = TRUE)
# Create a named vector for extraction_rasters based on base names
extraction_rasters <- setNames(raster_files, tools::file_path_sans_ext(basename(raster_files)))
# Create a stack of all rasters
extraction_stack <- terra::rast(extraction_rasters)
# User-provided simplified extraction for mean statistics per polygon
safe_log("Extracting mean statistics per polygon using exactextractr...")
all_stats <- cbind(
classifications,
round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2)
) %>%
sf::st_drop_geometry() # Ensures all_stats is a data frame
# Ensure 'class_name' column exists, if not, use 'class' as 'class_name'
all_stats$class_name <- all_stats$class
# Save the extracted statistics to a CSV file
stats_file <- file.path(diagnostic_dir, "polygon_mean_spectral_stats.csv")
write.csv(all_stats, stats_file, row.names = FALSE)
safe_log(paste("Saved mean spectral statistics per polygon to:", stats_file))
# Calculate optimized thresholds for cloud/shadow detection
threshold_results <- data.frame(
parameter = character(),
best_threshold = numeric(),
direction = character(),
target_class = character(),
vs_class = character(),
accuracy = numeric(),
stringsAsFactors = FALSE
)
class_pairs <- list(
c("cloud", "crop"),
c("cloud", "bare_soil_dry"),
c("cloud", "bare_soil_wet"),
c("shadow_over_crop", "crop"),
c("shadow_over_bare_soil", "bare_soil_dry"),
c("shadow_over_bare_soil", "bare_soil_wet")
)
cloud_detection_params_for_threshold <- intersect(
c("mean.brightness", "mean.very_bright_pixels", "mean.blue_dominant", "mean.low_ndvi", "mean.green_dominant_nir", "mean.high_ndwi", "mean.blue_ratio", "mean.ndvi"),
colnames(all_stats)
)
shadow_detection_params_for_threshold <- intersect(
c("mean.brightness", "mean.dark_pixels", "mean.very_dark_pixels", "mean.low_nir", "mean.shadow_ndvi", "mean.low_red_to_blue", "mean.high_blue_to_nir_ratio", "mean.blue_nir_ratio_raw", "mean.red_blue_ratio_raw"),
colnames(all_stats)
)
for (pair in class_pairs) {
target_class <- pair[1]
vs_class <- pair[2]
params_to_check <- c(cloud_detection_params_for_threshold, shadow_detection_params_for_threshold)
for (param in params_to_check) {
target_values <- all_stats[all_stats$class_name == target_class, param]
vs_values <- all_stats[all_stats$class_name == vs_class, param]
target_values <- target_values[!is.na(target_values)]
vs_values <- vs_values[!is.na(vs_values)]
# Only proceed if both groups have at least one value
if (length(target_values) > 0 && length(vs_values) > 0) {
target_mean <- mean(target_values)
target_sd <- sd(target_values)
vs_mean <- mean(vs_values)
vs_sd <- sd(vs_values)
target_sd[is.na(target_sd)] <- 0
vs_sd[is.na(vs_sd)] <- 0
direction <- ifelse(target_mean > vs_mean, ">", "<")
all_values <- c(target_values, vs_values)
min_val <- min(all_values)
max_val <- max(all_values)
# Only proceed if min and max are finite and not equal
if (is.finite(min_val) && is.finite(max_val) && min_val != max_val) {
potential_thresholds <- seq(min_val, max_val, length.out = 20)
best_accuracy <- -1
best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds))
for (threshold in potential_thresholds) {
if (direction == ">") {
correct_target <- sum(target_values > threshold)
correct_vs <- sum(vs_values <= threshold)
} else {
correct_target <- sum(target_values < threshold)
correct_vs <- sum(vs_values >= threshold)
}
accuracy <- (correct_target + correct_vs) / (length(target_values) + length(vs_values))
if (accuracy > best_accuracy) {
best_accuracy <- accuracy
best_threshold <- threshold
}
}
threshold_results <- rbind(threshold_results, data.frame(
parameter = param,
best_threshold = best_threshold,
direction = direction,
target_class = target_class,
vs_class = vs_class,
accuracy = best_accuracy,
stringsAsFactors = FALSE
))
}
}
}
}
thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")
write.csv(threshold_results, thresholds_file, row.names = FALSE)
safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file))
# Fix: get plot_measure_cols by matching raster base names to all_stats columns with 'mean.' prefix
plot_measure_cols <- intersect(names(extraction_rasters), gsub('^mean\\.', '', colnames(all_stats)))
plot_data <- reshape2::melt(
all_stats,
id.vars = c("class", "class_name"),
measure.vars = paste0("mean.", plot_measure_cols),
variable.name = "parameter",
value.name = "value"
)
# Remove 'mean.' prefix from parameter column for clarity
plot_data$parameter <- sub("^mean\\.", "", plot_data$parameter)
plots_dir <- file.path(diagnostic_dir, "class_plots")
dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE)
key_params_for_plot_list <- c("brightness", "ndvi", "blue_ratio", "ndwi",
"blue_nir_ratio_raw", "red_blue_ratio_raw")
key_params_to_plot <- intersect(key_params_for_plot_list, plot_measure_cols)
for (param_to_plot in key_params_to_plot) {
param_data_subset <- plot_data[plot_data$parameter == param_to_plot, ]
p <- ggplot2::ggplot(param_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
ggplot2::geom_boxplot() +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
ggplot2::labs(
title = paste("Distribution of", param_to_plot, "by Land Cover Class"),
x = "Class",
y = param_to_plot,
fill = "Class"
)
plot_file <- file.path(plots_dir, paste0("boxplot_", param_to_plot, ".png"))
ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)
}
summary_params_for_plot_list <- c("brightness", "ndvi",
"blue_nir_ratio_raw", "red_blue_ratio_raw")
summary_params_to_plot <- intersect(summary_params_for_plot_list, plot_measure_cols)
summary_data_subset <- plot_data[plot_data$parameter %in% summary_params_to_plot,]
p_summary <- ggplot2::ggplot(summary_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
ggplot2::geom_boxplot() +
ggplot2::facet_wrap(~parameter, scales = "free_y") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
strip.text = ggplot2::element_text(size = 8)) +
ggplot2::labs(
title = "Summary of Key Spectral Parameters by Land Cover Class",
x = "Class",
y = "Value",
fill = "Class"
)
summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")
ggplot2::ggsave(summary_file, p_summary, width = 12, height = 8, dpi = 150)
safe_log(paste("Generated spectral parameter plots in:", plots_dir))
safe_log("Cloud detection analysis script finished.")
```