# R script to analyze image dates and missing weeks library(dplyr) library(lubridate) library(ggplot2) # Set folder path folder <- "laravel_app/storage/app/esa/merged_final_tif" files <- list.files(folder, pattern = "\\.tif$", full.names = FALSE) df <- data.frame(date = dates) # Extract dates and file sizes dates <- as.Date(sub(".tif$", "", files)) sizes_kb <- file.info(file.path(folder, files))$size / 1024 df <- data.frame(date = dates, size_kb = sizes_kb, file = files) %>% mutate(year = year(date), week = isoweek(date), completeness = ifelse(size_kb >= 9000, "Complete", "Incomplete")) # Get all years in data years <- sort(unique(df$year)) # Prepare output table output <- data.frame( year = integer(), n_images = integer(), n_weeks_missing = integer(), max_consec_weeks_missing = integer(), avg_images_per_week = numeric(), stringsAsFactors = FALSE ) missing_weeks_list <- list() current_year <- as.integer(format(Sys.Date(), "%Y")) # For plotting: build a data frame with all year/week combinations and count images per week # For plotting: count complete/incomplete images per week/year plot_weeks <- expand.grid(year = years, week = 1:52, completeness = c("Complete", "Incomplete")) plot_weeks$n_images <- 0 for (i in seq_len(nrow(plot_weeks))) { y <- plot_weeks$year[i] w <- plot_weeks$week[i] ctype <- plot_weeks$completeness[i] plot_weeks$n_images[i] <- sum(df$year == y & df$week == w & df$completeness == ctype) } # Plot: X = week, Y = number of images, fill = completeness, color = year (stacked bar chart) gg <- ggplot(plot_weeks, aes(x = week, y = n_images, fill = completeness)) + geom_col(position = "stack") + facet_wrap(~ year, ncol = 1) + scale_x_continuous(breaks = 1:52) + scale_y_continuous(breaks = 0:max(plot_weeks$n_images)) + labs(x = "Week number", y = "Number of images", fill = "Completeness", title = "Complete vs Incomplete Images per Week (by Year)") + theme_minimal() ggsave("images_per_week_by_year_stacked.png", gg, width = 12, height = 10) cat("Plot saved as images_per_week_by_year_stacked.png\n") current_week <- isoweek(Sys.Date()) for (y in years) { # For current year, only consider weeks up to today; for past years, all 1:52 if (y == current_year) { all_weeks <- 1:current_week } else { all_weeks <- 1:52 } weeks_with_images <- unique(df$week[df$year == y]) weeks_missing <- setdiff(all_weeks, weeks_with_images) n_weeks_missing <- length(weeks_missing) n_images <- sum(df$year == y) if ((y == current_year) && (current_week - n_weeks_missing > 0)) { avg_images_per_week <- n_images / (current_week - n_weeks_missing) } else if (y != current_year && (52 - n_weeks_missing > 0)) { avg_images_per_week <- n_images / (52 - n_weeks_missing) } else { avg_images_per_week <- NA } # Find longest run of consecutive missing weeks if (n_weeks_missing == 0) { max_consec <- 0 } else { w <- sort(weeks_missing) runs <- rle(c(1, diff(w)) == 1) max_consec <- max(runs$lengths[runs$values], na.rm = TRUE) } output <- rbind(output, data.frame( year = y, n_images = n_images, n_weeks_missing = n_weeks_missing, max_consec_weeks_missing = max_consec, avg_images_per_week = round(avg_images_per_week, 2) )) if (n_weeks_missing > 0) { missing_weeks_list[[as.character(y)]] <- weeks_missing } } # Write to CSV print(output) write.csv(output, file = "image_availability_by_year.csv", row.names = FALSE) # Print missing weeks for years with missing data for (y in names(missing_weeks_list)) { cat(sprintf("Year %s missing weeks: %s\n", y, paste(missing_weeks_list[[y]], collapse=", "))) } # Calculate and print max consecutive weeks with only incomplete data per year cat("\nMax consecutive weeks with only incomplete images per year:\n") for (y in years) { if (y == current_year) { all_weeks <- 1:current_week } else { all_weeks <- 1:52 } # Weeks where all images are incomplete (no complete images) weeks_incomplete <- plot_weeks$week[plot_weeks$year == y & plot_weeks$completeness == "Complete" & plot_weeks$n_images == 0] # Only keep weeks that actually have at least one image (i.e., not missing entirely) weeks_with_any_image <- unique(df$week[df$year == y]) weeks_incomplete <- intersect(weeks_incomplete, weeks_with_any_image) if (length(weeks_incomplete) == 0) { max_consec_incomplete <- 0 } else { w <- sort(weeks_incomplete) runs <- rle(c(1, diff(w)) == 1) max_consec_incomplete <- max(runs$lengths[runs$values], na.rm = TRUE) } cat(sprintf("Year %d: %d\n", y, max_consec_incomplete)) }