SmartCane/analyze_image_availability.R
Timon d5fd4bb463 Add KPI reporting system and deployment documentation
Major Changes:
- NEW: Scripts 09 & 10 for KPI calculation and enhanced reporting
- NEW: Shell script wrappers (01-10) for easier execution
- NEW: R packages flextable and officer for enhanced Word reports
- NEW: DEPLOYMENT_README.md with complete deployment guide
- RENAMED: Numbered R scripts (02, 03, 04) for clarity
- REMOVED: Old package management scripts (using renv only)
- UPDATED: Workflow now uses scripts 09->10 instead of 05

Files Changed: 90+ files
New Packages: flextable, officer
New Scripts: 09_run_calculate_kpis.sh, 10_run_kpi_report.sh
Documentation: DEPLOYMENT_README.md, EMAIL_TO_ADMIN.txt

See DEPLOYMENT_README.md for full deployment instructions.
2025-10-14 11:49:30 +02:00

137 lines
4.5 KiB
R

# R script to analyze image dates and missing weeks
library(dplyr)
library(lubridate)
library(ggplot2)
# Set folder path
folder <- "laravel_app/storage/app/esa/merged_final_tif"
files <- list.files(folder, pattern = "\\.tif$", full.names = FALSE)
df <- data.frame(date = dates)
# Extract dates and file sizes
dates <- as.Date(sub(".tif$", "", files))
sizes_kb <- file.info(file.path(folder, files))$size / 1024
df <- data.frame(date = dates, size_kb = sizes_kb, file = files) %>%
mutate(year = year(date),
week = isoweek(date),
completeness = ifelse(size_kb >= 9000, "Complete", "Incomplete"))
# Get all years in data
years <- sort(unique(df$year))
# Prepare output table
output <- data.frame(
year = integer(),
n_images = integer(),
n_weeks_missing = integer(),
max_consec_weeks_missing = integer(),
avg_images_per_week = numeric(),
stringsAsFactors = FALSE
)
missing_weeks_list <- list()
current_year <- as.integer(format(Sys.Date(), "%Y"))
# For plotting: build a data frame with all year/week combinations and count images per week
# For plotting: count complete/incomplete images per week/year
plot_weeks <- expand.grid(year = years, week = 1:52, completeness = c("Complete", "Incomplete"))
plot_weeks$n_images <- 0
for (i in seq_len(nrow(plot_weeks))) {
y <- plot_weeks$year[i]
w <- plot_weeks$week[i]
ctype <- plot_weeks$completeness[i]
plot_weeks$n_images[i] <- sum(df$year == y & df$week == w & df$completeness == ctype)
}
# Plot: X = week, Y = number of images, fill = completeness, color = year (stacked bar chart)
gg <- ggplot(plot_weeks, aes(x = week, y = n_images, fill = completeness)) +
geom_col(position = "stack") +
facet_wrap(~ year, ncol = 1) +
scale_x_continuous(breaks = 1:52) +
scale_y_continuous(breaks = 0:max(plot_weeks$n_images)) +
labs(x = "Week number", y = "Number of images", fill = "Completeness",
title = "Complete vs Incomplete Images per Week (by Year)") +
theme_minimal()
ggsave("images_per_week_by_year_stacked.png", gg, width = 12, height = 10)
cat("Plot saved as images_per_week_by_year_stacked.png\n")
current_week <- isoweek(Sys.Date())
for (y in years) {
# For current year, only consider weeks up to today; for past years, all 1:52
if (y == current_year) {
all_weeks <- 1:current_week
} else {
all_weeks <- 1:52
}
weeks_with_images <- unique(df$week[df$year == y])
weeks_missing <- setdiff(all_weeks, weeks_with_images)
n_weeks_missing <- length(weeks_missing)
n_images <- sum(df$year == y)
if ((y == current_year) && (current_week - n_weeks_missing > 0)) {
avg_images_per_week <- n_images / (current_week - n_weeks_missing)
} else if (y != current_year && (52 - n_weeks_missing > 0)) {
avg_images_per_week <- n_images / (52 - n_weeks_missing)
} else {
avg_images_per_week <- NA
}
# Find longest run of consecutive missing weeks
if (n_weeks_missing == 0) {
max_consec <- 0
} else {
w <- sort(weeks_missing)
runs <- rle(c(1, diff(w)) == 1)
max_consec <- max(runs$lengths[runs$values], na.rm = TRUE)
}
output <- rbind(output, data.frame(
year = y,
n_images = n_images,
n_weeks_missing = n_weeks_missing,
max_consec_weeks_missing = max_consec,
avg_images_per_week = round(avg_images_per_week, 2)
))
if (n_weeks_missing > 0) {
missing_weeks_list[[as.character(y)]] <- weeks_missing
}
}
# Write to CSV
print(output)
write.csv(output, file = "image_availability_by_year.csv", row.names = FALSE)
# Print missing weeks for years with missing data
for (y in names(missing_weeks_list)) {
cat(sprintf("Year %s missing weeks: %s\n", y, paste(missing_weeks_list[[y]], collapse=", ")))
}
# Calculate and print max consecutive weeks with only incomplete data per year
cat("\nMax consecutive weeks with only incomplete images per year:\n")
for (y in years) {
if (y == current_year) {
all_weeks <- 1:current_week
} else {
all_weeks <- 1:52
}
# Weeks where all images are incomplete (no complete images)
weeks_incomplete <- plot_weeks$week[plot_weeks$year == y & plot_weeks$completeness == "Complete" & plot_weeks$n_images == 0]
# Only keep weeks that actually have at least one image (i.e., not missing entirely)
weeks_with_any_image <- unique(df$week[df$year == y])
weeks_incomplete <- intersect(weeks_incomplete, weeks_with_any_image)
if (length(weeks_incomplete) == 0) {
max_consec_incomplete <- 0
} else {
w <- sort(weeks_incomplete)
runs <- rle(c(1, diff(w)) == 1)
max_consec_incomplete <- max(runs$lengths[runs$values], na.rm = TRUE)
}
cat(sprintf("Year %d: %d\n", y, max_consec_incomplete))
}