376 lines
13 KiB
R
376 lines
13 KiB
R
#' DEBUG_REMOVE_DATE_TIFFS.R
|
|
#' ==============================================================================
|
|
#' PURPOSE:
|
|
#' Remove all TIFFs of a specific date OR date range from multiple storage folders.
|
|
#' Useful for debugging/re-running parts of the pipeline without full re-download.
|
|
#'
|
|
#' USAGE:
|
|
#' Rscript DEBUG_remove_date_tiffs.R [project] [date] [options]
|
|
#' Rscript DEBUG_remove_date_tiffs.R [project] --start-date [START] --end-date [END] [options]
|
|
#'
|
|
#' SINGLE DATE EXAMPLES:
|
|
#' # Remove 2026-02-08 from all folders (WITH CONFIRMATION)
|
|
#' Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08
|
|
#'
|
|
#' # Remove from all folders without confirmation
|
|
#' Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08 --no-confirm
|
|
#'
|
|
#' # Dry run - show what WOULD be deleted without deleting
|
|
#' Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08 --dry-run
|
|
#'
|
|
#' DATE RANGE EXAMPLES:
|
|
#' # Remove all dates from Nov 1, 2025 to Feb 11, 2026
|
|
#' Rscript DEBUG_remove_date_tiffs.R aura --start-date 2025-11-01 --end-date 2026-02-11 --no-confirm
|
|
#'
|
|
#' # Dry run for date range
|
|
#' Rscript DEBUG_remove_date_tiffs.R aura --start-date 2025-11-01 --end-date 2026-02-11 --dry-run
|
|
#'
|
|
#' OPTIONS:
|
|
#' --dry-run Preview deletions without actually deleting
|
|
#' --no-confirm Delete without confirmation
|
|
#' --skip-merged Skip merged_tif folder
|
|
#' --skip-field-tiles Skip field_tiles folder
|
|
#' --skip-field-tiles-ci Skip field_tiles_CI folder
|
|
#' --skip-daily-vals Skip daily_vals folder
|
|
#'
|
|
#' ==============================================================================
|
|
|
|
# ==============================================================================
|
|
# CONFIGURATION - TOGGLE WHICH FOLDERS TO DELETE FROM (DEFAULT: ALL)
|
|
# ==============================================================================
|
|
|
|
# Set these to FALSE to skip deletion from that folder
|
|
DELETE_FROM_MERGED_TIF <- TRUE
|
|
DELETE_FROM_FIELD_TILES <- TRUE
|
|
DELETE_FROM_FIELD_TILES_CI <- TRUE
|
|
DELETE_FROM_DAILY_VALS <- TRUE
|
|
|
|
# Safety settings
|
|
DRY_RUN <- FALSE # Set to TRUE to preview deletions without actually deleting
|
|
REQUIRE_CONFIRMATION <- TRUE # Set to FALSE to delete without asking
|
|
|
|
# ==============================================================================
|
|
# MAIN FUNCTION
|
|
# ==============================================================================
|
|
|
|
main <- function() {
|
|
# Parse command-line arguments
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
|
|
# Validate minimum arguments
|
|
if (length(args) < 2) {
|
|
cat("\n[ERROR] Missing arguments\n")
|
|
cat("Usage:\n")
|
|
cat(" Single date: Rscript DEBUG_remove_date_tiffs.R [project] [date] [options]\n")
|
|
cat(" Date range: Rscript DEBUG_remove_date_tiffs.R [project] --start-date [START] --end-date [END] [options]\n\n")
|
|
cat("Examples:\n")
|
|
cat(" Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08\n")
|
|
cat(" Rscript DEBUG_remove_date_tiffs.R aura --start-date 2025-11-01 --end-date 2026-02-11 --no-confirm\n\n")
|
|
cat("Options:\n")
|
|
cat(" --dry-run Preview deletions without actually deleting\n")
|
|
cat(" --no-confirm Delete without confirmation\n")
|
|
cat(" --skip-merged Skip merged_tif folder\n")
|
|
cat(" --skip-field-tiles Skip field_tiles folder\n")
|
|
cat(" --skip-field-tiles-ci Skip field_tiles_CI folder\n")
|
|
cat(" --skip-daily-vals Skip daily_vals folder\n\n")
|
|
quit(status = 1)
|
|
}
|
|
|
|
# Parse positional arguments
|
|
project <- args[1]
|
|
|
|
# Check if using date range or single date
|
|
date_str <- NULL
|
|
start_date_str <- NULL
|
|
end_date_str <- NULL
|
|
|
|
# Look for --start-date and --end-date flags
|
|
start_idx <- which(args == "--start-date")
|
|
end_idx <- which(args == "--end-date")
|
|
|
|
if (length(start_idx) > 0 && length(end_idx) > 0) {
|
|
# Date range mode
|
|
if (start_idx + 1 <= length(args)) {
|
|
start_date_str <- args[start_idx + 1]
|
|
}
|
|
if (end_idx + 1 <= length(args)) {
|
|
end_date_str <- args[end_idx + 1]
|
|
}
|
|
|
|
if (is.null(start_date_str) || is.null(end_date_str)) {
|
|
cat("\n[ERROR] --start-date and --end-date require date values\n")
|
|
quit(status = 1)
|
|
}
|
|
} else {
|
|
# Single date mode
|
|
if (length(args) < 2 || startsWith(args[2], "--")) {
|
|
cat("\n[ERROR] Missing date argument. Either provide:\n")
|
|
cat(" - A single date: Rscript ... [project] 2026-02-08\n")
|
|
cat(" - Or --start-date and --end-date flags\n\n")
|
|
quit(status = 1)
|
|
}
|
|
date_str <- args[2]
|
|
}
|
|
|
|
# Parse optional flags
|
|
if (length(args) >= 3) {
|
|
for (i in 3:length(args)) {
|
|
arg <- args[i]
|
|
|
|
# Skip NA, empty, or flag values (already processed)
|
|
if (is.na(arg) || nchar(arg) == 0 || arg %in% c("--start-date", "--end-date")) {
|
|
next
|
|
}
|
|
if (i > 1 && args[i-1] %in% c("--start-date", "--end-date")) {
|
|
# Skip date values (already processed)
|
|
next
|
|
}
|
|
|
|
if (arg == "--dry-run") {
|
|
DRY_RUN <<- TRUE
|
|
} else if (arg == "--no-confirm") {
|
|
REQUIRE_CONFIRMATION <<- FALSE
|
|
} else if (arg == "--skip-merged") {
|
|
DELETE_FROM_MERGED_TIF <<- FALSE
|
|
} else if (arg == "--skip-field-tiles") {
|
|
DELETE_FROM_FIELD_TILES <<- FALSE
|
|
} else if (arg == "--skip-field-tiles-ci") {
|
|
DELETE_FROM_FIELD_TILES_CI <<- FALSE
|
|
} else if (arg == "--skip-daily-vals") {
|
|
DELETE_FROM_DAILY_VALS <<- FALSE
|
|
}
|
|
}
|
|
}
|
|
|
|
# Validate and convert dates
|
|
dates_to_process <- c()
|
|
|
|
if (!is.null(date_str)) {
|
|
# Single date mode
|
|
date_obj <- tryCatch(
|
|
as.Date(date_str, format = "%Y-%m-%d"),
|
|
error = function(e) NULL
|
|
)
|
|
|
|
if (is.null(date_obj) || is.na(date_obj)) {
|
|
cat(sprintf("[ERROR] Invalid date format: %s (expected YYYY-MM-DD)\n", date_str))
|
|
quit(status = 1)
|
|
}
|
|
dates_to_process <- date_obj
|
|
} else {
|
|
# Date range mode
|
|
start_date_obj <- tryCatch(
|
|
as.Date(start_date_str, format = "%Y-%m-%d"),
|
|
error = function(e) NULL
|
|
)
|
|
|
|
end_date_obj <- tryCatch(
|
|
as.Date(end_date_str, format = "%Y-%m-%d"),
|
|
error = function(e) NULL
|
|
)
|
|
|
|
if (is.null(start_date_obj) || is.na(start_date_obj)) {
|
|
cat(sprintf("[ERROR] Invalid start date format: %s (expected YYYY-MM-DD)\n", start_date_str))
|
|
quit(status = 1)
|
|
}
|
|
|
|
if (is.null(end_date_obj) || is.na(end_date_obj)) {
|
|
cat(sprintf("[ERROR] Invalid end date format: %s (expected YYYY-MM-DD)\n", end_date_str))
|
|
quit(status = 1)
|
|
}
|
|
|
|
if (start_date_obj > end_date_obj) {
|
|
cat(sprintf("[ERROR] Start date (%s) is after end date (%s)\n", start_date_str, end_date_str))
|
|
quit(status = 1)
|
|
}
|
|
|
|
# Generate sequence of dates
|
|
dates_to_process <- seq(start_date_obj, end_date_obj, by = "1 day")
|
|
}
|
|
|
|
# ===========================================================================
|
|
# PROCESS DATES
|
|
# ===========================================================================
|
|
|
|
total_dates <- length(dates_to_process)
|
|
total_files_deleted <- 0
|
|
total_errors <- 0
|
|
|
|
cat("\n")
|
|
cat(strrep("=", 70), "\n")
|
|
cat("DELETE TIFFS - SUMMARY\n")
|
|
cat(strrep("=", 70), "\n")
|
|
cat(sprintf("Project: %s\n", project))
|
|
|
|
if (total_dates == 1) {
|
|
cat(sprintf("Date: %s\n", format(dates_to_process[1], "%Y-%m-%d")))
|
|
} else {
|
|
cat(sprintf("Date range: %s to %s (%d dates)\n",
|
|
format(dates_to_process[1], "%Y-%m-%d"),
|
|
format(dates_to_process[total_dates], "%Y-%m-%d"),
|
|
total_dates))
|
|
}
|
|
|
|
cat(sprintf("Dry run: %s\n", if (DRY_RUN) "YES" else "NO"))
|
|
cat("\n")
|
|
|
|
# Confirm before proceeding
|
|
if (REQUIRE_CONFIRMATION && !DRY_RUN) {
|
|
cat("⚠️ This will PERMANENTLY DELETE files from the above date(s)!\n")
|
|
cat("Use --no-confirm flag to skip this prompt\n")
|
|
|
|
# Check if running in interactive mode
|
|
if (interactive()) {
|
|
response <- readline(prompt = "Type 'yes' to confirm, or anything else to cancel: ")
|
|
|
|
if (tolower(response) != "yes") {
|
|
cat("[CANCELLED] No files deleted\n")
|
|
cat(strrep("=", 70), "\n\n")
|
|
quit(status = 0)
|
|
}
|
|
} else {
|
|
cat("\n[ERROR] Non-interactive mode detected (running via Rscript)\n")
|
|
cat("Cannot prompt for confirmation. Use --no-confirm flag to proceed\n\n")
|
|
cat(strrep("=", 70), "\n\n")
|
|
quit(status = 1)
|
|
}
|
|
}
|
|
|
|
# ===========================================================================
|
|
# LOOP THROUGH DATES AND DELETE
|
|
# ===========================================================================
|
|
|
|
cat("Processing...\n\n")
|
|
|
|
for (date_idx in seq_along(dates_to_process)) {
|
|
current_date <- dates_to_process[date_idx]
|
|
date_str <- format(current_date, "%Y-%m-%d")
|
|
|
|
base_path <- file.path("laravel_app", "storage", "app", project)
|
|
files_to_delete <- list()
|
|
|
|
# FOLDER 1: merged_tif/{DATE}.tif
|
|
if (DELETE_FROM_MERGED_TIF) {
|
|
merged_tif_file <- file.path(base_path, "merged_tif", paste0(date_str, ".tif"))
|
|
if (file.exists(merged_tif_file)) {
|
|
files_to_delete[["merged_tif"]] <- merged_tif_file
|
|
}
|
|
}
|
|
|
|
# FOLDER 2: field_tiles/{FIELD}/{DATE}.tif (per-field structure)
|
|
if (DELETE_FROM_FIELD_TILES) {
|
|
field_tiles_dir <- file.path(base_path, "field_tiles")
|
|
if (dir.exists(field_tiles_dir)) {
|
|
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
|
|
for (field_dir in field_dirs) {
|
|
tif_file <- file.path(field_dir, paste0(date_str, ".tif"))
|
|
if (file.exists(tif_file)) {
|
|
folder_name <- basename(field_dir)
|
|
key <- paste0("field_tiles/", folder_name)
|
|
files_to_delete[[key]] <- tif_file
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# FOLDER 3: field_tiles_CI/{FIELD}/{DATE}.tif (per-field structure)
|
|
if (DELETE_FROM_FIELD_TILES_CI) {
|
|
field_tiles_ci_dir <- file.path(base_path, "field_tiles_CI")
|
|
if (dir.exists(field_tiles_ci_dir)) {
|
|
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
|
|
for (field_dir in field_dirs) {
|
|
tif_file <- file.path(field_dir, paste0(date_str, ".tif"))
|
|
if (file.exists(tif_file)) {
|
|
folder_name <- basename(field_dir)
|
|
key <- paste0("field_tiles_CI/", folder_name)
|
|
files_to_delete[[key]] <- tif_file
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# FOLDER 4: Data/extracted_ci/daily_vals/{SUBDIR}/{DATE}.rds (per-subdirectory structure)
|
|
if (DELETE_FROM_DAILY_VALS) {
|
|
daily_vals_dir <- file.path(base_path, "Data", "extracted_ci", "daily_vals")
|
|
if (dir.exists(daily_vals_dir)) {
|
|
subdirs <- list.dirs(daily_vals_dir, full.names = TRUE, recursive = FALSE)
|
|
for (subdir in subdirs) {
|
|
rds_file <- file.path(subdir, paste0(date_str, ".rds"))
|
|
if (file.exists(rds_file)) {
|
|
subdir_name <- basename(subdir)
|
|
key <- paste0("daily_vals/", subdir_name)
|
|
files_to_delete[[key]] <- rds_file
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Delete files for this date
|
|
deleted_count <- 0
|
|
error_count <- 0
|
|
|
|
for (i in seq_along(files_to_delete)) {
|
|
file_path <- files_to_delete[[i]]
|
|
|
|
if (!DRY_RUN) {
|
|
success <- tryCatch({
|
|
file.remove(file_path)
|
|
}, error = function(e) {
|
|
FALSE
|
|
})
|
|
|
|
if (isTRUE(success)) {
|
|
deleted_count <- deleted_count + 1
|
|
} else {
|
|
error_count <- error_count + 1
|
|
}
|
|
} else {
|
|
deleted_count <- length(files_to_delete)
|
|
}
|
|
}
|
|
|
|
total_files_deleted <- total_files_deleted + deleted_count
|
|
total_errors <- total_errors + error_count
|
|
|
|
# Progress indicator (every 5 dates or on last date)
|
|
if (total_dates == 1 || date_idx %% 5 == 0 || date_idx == total_dates) {
|
|
if (DRY_RUN) {
|
|
cat(sprintf("[%d/%d] %s: Would delete %d files\n", date_idx, total_dates, date_str, deleted_count))
|
|
} else {
|
|
cat(sprintf("[%d/%d] %s: Deleted %d files\n", date_idx, total_dates, date_str, deleted_count))
|
|
}
|
|
}
|
|
}
|
|
|
|
# ===========================================================================
|
|
# FINAL SUMMARY
|
|
# ===========================================================================
|
|
|
|
cat("\n")
|
|
cat(strrep("=", 70), "\n")
|
|
cat("SUMMARY\n")
|
|
cat(strrep("=", 70), "\n")
|
|
cat(sprintf("Dates processed: %d\n", total_dates))
|
|
|
|
if (DRY_RUN) {
|
|
cat(sprintf("Files that would be deleted: %d\n", total_files_deleted))
|
|
} else {
|
|
cat(sprintf("Total files deleted: %d\n", total_files_deleted))
|
|
if (total_errors > 0) {
|
|
cat(sprintf("Errors: %d\n", total_errors))
|
|
}
|
|
}
|
|
|
|
cat(strrep("=", 70), "\n\n")
|
|
|
|
quit(status = 0)
|
|
}
|
|
|
|
# ==============================================================================
|
|
# EXECUTE
|
|
# ==============================================================================
|
|
|
|
if (sys.nframe() == 0) {
|
|
main()
|
|
}
|