Major Changes: - NEW: Scripts 09 & 10 for KPI calculation and enhanced reporting - NEW: Shell script wrappers (01-10) for easier execution - NEW: R packages flextable and officer for enhanced Word reports - NEW: DEPLOYMENT_README.md with complete deployment guide - RENAMED: Numbered R scripts (02, 03, 04) for clarity - REMOVED: Old package management scripts (using renv only) - UPDATED: Workflow now uses scripts 09->10 instead of 05 Files Changed: 90+ files New Packages: flextable, officer New Scripts: 09_run_calculate_kpis.sh, 10_run_kpi_report.sh Documentation: DEPLOYMENT_README.md, EMAIL_TO_ADMIN.txt See DEPLOYMENT_README.md for full deployment instructions.
137 lines
4.5 KiB
R
137 lines
4.5 KiB
R
# R script to analyze image dates and missing weeks
|
|
library(dplyr)
|
|
library(lubridate)
|
|
library(ggplot2)
|
|
|
|
# Set folder path
|
|
folder <- "laravel_app/storage/app/esa/merged_final_tif"
|
|
files <- list.files(folder, pattern = "\\.tif$", full.names = FALSE)
|
|
|
|
df <- data.frame(date = dates)
|
|
# Extract dates and file sizes
|
|
dates <- as.Date(sub(".tif$", "", files))
|
|
sizes_kb <- file.info(file.path(folder, files))$size / 1024
|
|
df <- data.frame(date = dates, size_kb = sizes_kb, file = files) %>%
|
|
mutate(year = year(date),
|
|
week = isoweek(date),
|
|
completeness = ifelse(size_kb >= 9000, "Complete", "Incomplete"))
|
|
|
|
# Get all years in data
|
|
years <- sort(unique(df$year))
|
|
|
|
# Prepare output table
|
|
output <- data.frame(
|
|
year = integer(),
|
|
n_images = integer(),
|
|
n_weeks_missing = integer(),
|
|
max_consec_weeks_missing = integer(),
|
|
avg_images_per_week = numeric(),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
missing_weeks_list <- list()
|
|
current_year <- as.integer(format(Sys.Date(), "%Y"))
|
|
# For plotting: build a data frame with all year/week combinations and count images per week
|
|
|
|
# For plotting: count complete/incomplete images per week/year
|
|
plot_weeks <- expand.grid(year = years, week = 1:52, completeness = c("Complete", "Incomplete"))
|
|
plot_weeks$n_images <- 0
|
|
for (i in seq_len(nrow(plot_weeks))) {
|
|
y <- plot_weeks$year[i]
|
|
w <- plot_weeks$week[i]
|
|
ctype <- plot_weeks$completeness[i]
|
|
plot_weeks$n_images[i] <- sum(df$year == y & df$week == w & df$completeness == ctype)
|
|
}
|
|
|
|
|
|
|
|
# Plot: X = week, Y = number of images, fill = completeness, color = year (stacked bar chart)
|
|
gg <- ggplot(plot_weeks, aes(x = week, y = n_images, fill = completeness)) +
|
|
geom_col(position = "stack") +
|
|
facet_wrap(~ year, ncol = 1) +
|
|
scale_x_continuous(breaks = 1:52) +
|
|
scale_y_continuous(breaks = 0:max(plot_weeks$n_images)) +
|
|
labs(x = "Week number", y = "Number of images", fill = "Completeness",
|
|
title = "Complete vs Incomplete Images per Week (by Year)") +
|
|
theme_minimal()
|
|
|
|
ggsave("images_per_week_by_year_stacked.png", gg, width = 12, height = 10)
|
|
cat("Plot saved as images_per_week_by_year_stacked.png\n")
|
|
current_week <- isoweek(Sys.Date())
|
|
|
|
|
|
|
|
for (y in years) {
|
|
# For current year, only consider weeks up to today; for past years, all 1:52
|
|
if (y == current_year) {
|
|
all_weeks <- 1:current_week
|
|
} else {
|
|
all_weeks <- 1:52
|
|
}
|
|
weeks_with_images <- unique(df$week[df$year == y])
|
|
weeks_missing <- setdiff(all_weeks, weeks_with_images)
|
|
n_weeks_missing <- length(weeks_missing)
|
|
n_images <- sum(df$year == y)
|
|
if ((y == current_year) && (current_week - n_weeks_missing > 0)) {
|
|
avg_images_per_week <- n_images / (current_week - n_weeks_missing)
|
|
} else if (y != current_year && (52 - n_weeks_missing > 0)) {
|
|
avg_images_per_week <- n_images / (52 - n_weeks_missing)
|
|
} else {
|
|
avg_images_per_week <- NA
|
|
}
|
|
# Find longest run of consecutive missing weeks
|
|
if (n_weeks_missing == 0) {
|
|
max_consec <- 0
|
|
} else {
|
|
w <- sort(weeks_missing)
|
|
runs <- rle(c(1, diff(w)) == 1)
|
|
max_consec <- max(runs$lengths[runs$values], na.rm = TRUE)
|
|
}
|
|
output <- rbind(output, data.frame(
|
|
year = y,
|
|
n_images = n_images,
|
|
n_weeks_missing = n_weeks_missing,
|
|
max_consec_weeks_missing = max_consec,
|
|
avg_images_per_week = round(avg_images_per_week, 2)
|
|
))
|
|
if (n_weeks_missing > 0) {
|
|
missing_weeks_list[[as.character(y)]] <- weeks_missing
|
|
}
|
|
}
|
|
|
|
|
|
# Write to CSV
|
|
|
|
print(output)
|
|
|
|
write.csv(output, file = "image_availability_by_year.csv", row.names = FALSE)
|
|
|
|
|
|
# Print missing weeks for years with missing data
|
|
for (y in names(missing_weeks_list)) {
|
|
cat(sprintf("Year %s missing weeks: %s\n", y, paste(missing_weeks_list[[y]], collapse=", ")))
|
|
}
|
|
|
|
# Calculate and print max consecutive weeks with only incomplete data per year
|
|
cat("\nMax consecutive weeks with only incomplete images per year:\n")
|
|
for (y in years) {
|
|
if (y == current_year) {
|
|
all_weeks <- 1:current_week
|
|
} else {
|
|
all_weeks <- 1:52
|
|
}
|
|
# Weeks where all images are incomplete (no complete images)
|
|
weeks_incomplete <- plot_weeks$week[plot_weeks$year == y & plot_weeks$completeness == "Complete" & plot_weeks$n_images == 0]
|
|
# Only keep weeks that actually have at least one image (i.e., not missing entirely)
|
|
weeks_with_any_image <- unique(df$week[df$year == y])
|
|
weeks_incomplete <- intersect(weeks_incomplete, weeks_with_any_image)
|
|
if (length(weeks_incomplete) == 0) {
|
|
max_consec_incomplete <- 0
|
|
} else {
|
|
w <- sort(weeks_incomplete)
|
|
runs <- rle(c(1, diff(w)) == 1)
|
|
max_consec_incomplete <- max(runs$lengths[runs$values], na.rm = TRUE)
|
|
}
|
|
cat(sprintf("Year %d: %d\n", y, max_consec_incomplete))
|
|
}
|