SmartCane/r_app/DEBUG_remove_date_tiffs.R

376 lines
13 KiB
R

#' DEBUG_REMOVE_DATE_TIFFS.R
#' ==============================================================================
#' PURPOSE:
#' Remove all TIFFs of a specific date OR date range from multiple storage folders.
#' Useful for debugging/re-running parts of the pipeline without full re-download.
#'
#' USAGE:
#' Rscript DEBUG_remove_date_tiffs.R [project] [date] [options]
#' Rscript DEBUG_remove_date_tiffs.R [project] --start-date [START] --end-date [END] [options]
#'
#' SINGLE DATE EXAMPLES:
#' # Remove 2026-02-08 from all folders (WITH CONFIRMATION)
#' Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08
#'
#' # Remove from all folders without confirmation
#' Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08 --no-confirm
#'
#' # Dry run - show what WOULD be deleted without deleting
#' Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08 --dry-run
#'
#' DATE RANGE EXAMPLES:
#' # Remove all dates from Nov 1, 2025 to Feb 11, 2026
#' Rscript DEBUG_remove_date_tiffs.R aura --start-date 2025-11-01 --end-date 2026-02-11 --no-confirm
#'
#' # Dry run for date range
#' Rscript DEBUG_remove_date_tiffs.R aura --start-date 2025-11-01 --end-date 2026-02-11 --dry-run
#'
#' OPTIONS:
#' --dry-run Preview deletions without actually deleting
#' --no-confirm Delete without confirmation
#' --skip-merged Skip merged_tif folder
#' --skip-field-tiles Skip field_tiles folder
#' --skip-field-tiles-ci Skip field_tiles_CI folder
#' --skip-daily-vals Skip daily_vals folder
#'
#' ==============================================================================
# ==============================================================================
# CONFIGURATION - TOGGLE WHICH FOLDERS TO DELETE FROM (DEFAULT: ALL)
# ==============================================================================
# Set these to FALSE to skip deletion from that folder
DELETE_FROM_MERGED_TIF <- TRUE
DELETE_FROM_FIELD_TILES <- TRUE
DELETE_FROM_FIELD_TILES_CI <- TRUE
DELETE_FROM_DAILY_VALS <- TRUE
# Safety settings
DRY_RUN <- FALSE # Set to TRUE to preview deletions without actually deleting
REQUIRE_CONFIRMATION <- TRUE # Set to FALSE to delete without asking
# ==============================================================================
# MAIN FUNCTION
# ==============================================================================
main <- function() {
# Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
# Validate minimum arguments
if (length(args) < 2) {
cat("\n[ERROR] Missing arguments\n")
cat("Usage:\n")
cat(" Single date: Rscript DEBUG_remove_date_tiffs.R [project] [date] [options]\n")
cat(" Date range: Rscript DEBUG_remove_date_tiffs.R [project] --start-date [START] --end-date [END] [options]\n\n")
cat("Examples:\n")
cat(" Rscript DEBUG_remove_date_tiffs.R angata 2026-02-08\n")
cat(" Rscript DEBUG_remove_date_tiffs.R aura --start-date 2025-11-01 --end-date 2026-02-11 --no-confirm\n\n")
cat("Options:\n")
cat(" --dry-run Preview deletions without actually deleting\n")
cat(" --no-confirm Delete without confirmation\n")
cat(" --skip-merged Skip merged_tif folder\n")
cat(" --skip-field-tiles Skip field_tiles folder\n")
cat(" --skip-field-tiles-ci Skip field_tiles_CI folder\n")
cat(" --skip-daily-vals Skip daily_vals folder\n\n")
quit(status = 1)
}
# Parse positional arguments
project <- args[1]
# Check if using date range or single date
date_str <- NULL
start_date_str <- NULL
end_date_str <- NULL
# Look for --start-date and --end-date flags
start_idx <- which(args == "--start-date")
end_idx <- which(args == "--end-date")
if (length(start_idx) > 0 && length(end_idx) > 0) {
# Date range mode
if (start_idx + 1 <= length(args)) {
start_date_str <- args[start_idx + 1]
}
if (end_idx + 1 <= length(args)) {
end_date_str <- args[end_idx + 1]
}
if (is.null(start_date_str) || is.null(end_date_str)) {
cat("\n[ERROR] --start-date and --end-date require date values\n")
quit(status = 1)
}
} else {
# Single date mode
if (length(args) < 2 || startsWith(args[2], "--")) {
cat("\n[ERROR] Missing date argument. Either provide:\n")
cat(" - A single date: Rscript ... [project] 2026-02-08\n")
cat(" - Or --start-date and --end-date flags\n\n")
quit(status = 1)
}
date_str <- args[2]
}
# Parse optional flags
if (length(args) >= 3) {
for (i in 3:length(args)) {
arg <- args[i]
# Skip NA, empty, or flag values (already processed)
if (is.na(arg) || nchar(arg) == 0 || arg %in% c("--start-date", "--end-date")) {
next
}
if (i > 1 && args[i-1] %in% c("--start-date", "--end-date")) {
# Skip date values (already processed)
next
}
if (arg == "--dry-run") {
DRY_RUN <<- TRUE
} else if (arg == "--no-confirm") {
REQUIRE_CONFIRMATION <<- FALSE
} else if (arg == "--skip-merged") {
DELETE_FROM_MERGED_TIF <<- FALSE
} else if (arg == "--skip-field-tiles") {
DELETE_FROM_FIELD_TILES <<- FALSE
} else if (arg == "--skip-field-tiles-ci") {
DELETE_FROM_FIELD_TILES_CI <<- FALSE
} else if (arg == "--skip-daily-vals") {
DELETE_FROM_DAILY_VALS <<- FALSE
}
}
}
# Validate and convert dates
dates_to_process <- c()
if (!is.null(date_str)) {
# Single date mode
date_obj <- tryCatch(
as.Date(date_str, format = "%Y-%m-%d"),
error = function(e) NULL
)
if (is.null(date_obj) || is.na(date_obj)) {
cat(sprintf("[ERROR] Invalid date format: %s (expected YYYY-MM-DD)\n", date_str))
quit(status = 1)
}
dates_to_process <- date_obj
} else {
# Date range mode
start_date_obj <- tryCatch(
as.Date(start_date_str, format = "%Y-%m-%d"),
error = function(e) NULL
)
end_date_obj <- tryCatch(
as.Date(end_date_str, format = "%Y-%m-%d"),
error = function(e) NULL
)
if (is.null(start_date_obj) || is.na(start_date_obj)) {
cat(sprintf("[ERROR] Invalid start date format: %s (expected YYYY-MM-DD)\n", start_date_str))
quit(status = 1)
}
if (is.null(end_date_obj) || is.na(end_date_obj)) {
cat(sprintf("[ERROR] Invalid end date format: %s (expected YYYY-MM-DD)\n", end_date_str))
quit(status = 1)
}
if (start_date_obj > end_date_obj) {
cat(sprintf("[ERROR] Start date (%s) is after end date (%s)\n", start_date_str, end_date_str))
quit(status = 1)
}
# Generate sequence of dates
dates_to_process <- seq(start_date_obj, end_date_obj, by = "1 day")
}
# ===========================================================================
# PROCESS DATES
# ===========================================================================
total_dates <- length(dates_to_process)
total_files_deleted <- 0
total_errors <- 0
cat("\n")
cat(strrep("=", 70), "\n")
cat("DELETE TIFFS - SUMMARY\n")
cat(strrep("=", 70), "\n")
cat(sprintf("Project: %s\n", project))
if (total_dates == 1) {
cat(sprintf("Date: %s\n", format(dates_to_process[1], "%Y-%m-%d")))
} else {
cat(sprintf("Date range: %s to %s (%d dates)\n",
format(dates_to_process[1], "%Y-%m-%d"),
format(dates_to_process[total_dates], "%Y-%m-%d"),
total_dates))
}
cat(sprintf("Dry run: %s\n", if (DRY_RUN) "YES" else "NO"))
cat("\n")
# Confirm before proceeding
if (REQUIRE_CONFIRMATION && !DRY_RUN) {
cat("⚠️ This will PERMANENTLY DELETE files from the above date(s)!\n")
cat("Use --no-confirm flag to skip this prompt\n")
# Check if running in interactive mode
if (interactive()) {
response <- readline(prompt = "Type 'yes' to confirm, or anything else to cancel: ")
if (tolower(response) != "yes") {
cat("[CANCELLED] No files deleted\n")
cat(strrep("=", 70), "\n\n")
quit(status = 0)
}
} else {
cat("\n[ERROR] Non-interactive mode detected (running via Rscript)\n")
cat("Cannot prompt for confirmation. Use --no-confirm flag to proceed\n\n")
cat(strrep("=", 70), "\n\n")
quit(status = 1)
}
}
# ===========================================================================
# LOOP THROUGH DATES AND DELETE
# ===========================================================================
cat("Processing...\n\n")
for (date_idx in seq_along(dates_to_process)) {
current_date <- dates_to_process[date_idx]
date_str <- format(current_date, "%Y-%m-%d")
base_path <- file.path("laravel_app", "storage", "app", project)
files_to_delete <- list()
# FOLDER 1: merged_tif/{DATE}.tif
if (DELETE_FROM_MERGED_TIF) {
merged_tif_file <- file.path(base_path, "merged_tif", paste0(date_str, ".tif"))
if (file.exists(merged_tif_file)) {
files_to_delete[["merged_tif"]] <- merged_tif_file
}
}
# FOLDER 2: field_tiles/{FIELD}/{DATE}.tif (per-field structure)
if (DELETE_FROM_FIELD_TILES) {
field_tiles_dir <- file.path(base_path, "field_tiles")
if (dir.exists(field_tiles_dir)) {
field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
for (field_dir in field_dirs) {
tif_file <- file.path(field_dir, paste0(date_str, ".tif"))
if (file.exists(tif_file)) {
folder_name <- basename(field_dir)
key <- paste0("field_tiles/", folder_name)
files_to_delete[[key]] <- tif_file
}
}
}
}
# FOLDER 3: field_tiles_CI/{FIELD}/{DATE}.tif (per-field structure)
if (DELETE_FROM_FIELD_TILES_CI) {
field_tiles_ci_dir <- file.path(base_path, "field_tiles_CI")
if (dir.exists(field_tiles_ci_dir)) {
field_dirs <- list.dirs(field_tiles_ci_dir, full.names = TRUE, recursive = FALSE)
for (field_dir in field_dirs) {
tif_file <- file.path(field_dir, paste0(date_str, ".tif"))
if (file.exists(tif_file)) {
folder_name <- basename(field_dir)
key <- paste0("field_tiles_CI/", folder_name)
files_to_delete[[key]] <- tif_file
}
}
}
}
# FOLDER 4: Data/extracted_ci/daily_vals/{SUBDIR}/{DATE}.rds (per-subdirectory structure)
if (DELETE_FROM_DAILY_VALS) {
daily_vals_dir <- file.path(base_path, "Data", "extracted_ci", "daily_vals")
if (dir.exists(daily_vals_dir)) {
subdirs <- list.dirs(daily_vals_dir, full.names = TRUE, recursive = FALSE)
for (subdir in subdirs) {
rds_file <- file.path(subdir, paste0(date_str, ".rds"))
if (file.exists(rds_file)) {
subdir_name <- basename(subdir)
key <- paste0("daily_vals/", subdir_name)
files_to_delete[[key]] <- rds_file
}
}
}
}
# Delete files for this date
deleted_count <- 0
error_count <- 0
for (i in seq_along(files_to_delete)) {
file_path <- files_to_delete[[i]]
if (!DRY_RUN) {
success <- tryCatch({
file.remove(file_path)
}, error = function(e) {
FALSE
})
if (isTRUE(success)) {
deleted_count <- deleted_count + 1
} else {
error_count <- error_count + 1
}
} else {
deleted_count <- length(files_to_delete)
}
}
total_files_deleted <- total_files_deleted + deleted_count
total_errors <- total_errors + error_count
# Progress indicator (every 5 dates or on last date)
if (total_dates == 1 || date_idx %% 5 == 0 || date_idx == total_dates) {
if (DRY_RUN) {
cat(sprintf("[%d/%d] %s: Would delete %d files\n", date_idx, total_dates, date_str, deleted_count))
} else {
cat(sprintf("[%d/%d] %s: Deleted %d files\n", date_idx, total_dates, date_str, deleted_count))
}
}
}
# ===========================================================================
# FINAL SUMMARY
# ===========================================================================
cat("\n")
cat(strrep("=", 70), "\n")
cat("SUMMARY\n")
cat(strrep("=", 70), "\n")
cat(sprintf("Dates processed: %d\n", total_dates))
if (DRY_RUN) {
cat(sprintf("Files that would be deleted: %d\n", total_files_deleted))
} else {
cat(sprintf("Total files deleted: %d\n", total_files_deleted))
if (total_errors > 0) {
cat(sprintf("Errors: %d\n", total_errors))
}
}
cat(strrep("=", 70), "\n\n")
quit(status = 0)
}
# ==============================================================================
# EXECUTE
# ==============================================================================
if (sys.nframe() == 0) {
main()
}