# Required packages # library(ggplot2) # library(dplyr) raster_files_NEW <- list.files(merged_final,full.names = T, pattern = ".tif") # Extracting the dates from vrt_list (assuming the format "YYYY-MM-DD.vrt" at the end) no_cloud_dates <- as.Date(sapply(raster_files_NEW, function(x) { sub(".*/([0-9]{4}-[0-9]{2}-[0-9]{2})\\.tif", "\\1", x) })) # Generate a full sequence of dates in the range start_date <- min(no_cloud_dates) end_date <- max(no_cloud_dates) all_dates <- seq(start_date, end_date, by = "day") # Create a data frame marking no clouds (1) and clouds (0) cloud_data <- data.frame( date = all_dates, cloud_status = ifelse(all_dates %in% no_cloud_dates, 1, 0) ) # Plot the data ggplot(cloud_data, aes(x = date, y = cloud_status)) + geom_point() + labs(x = "Date", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") + scale_y_continuous(breaks = c(0, 1)) + theme_minimal() # Updated ggplot code to display months on the x-axis ggplot(cloud_data, aes(x = date, y = cloud_status)) + geom_point() + scale_x_date(date_labels = "%b", date_breaks = "1 month") + labs(x = "Month", y = "Cloud Status (1 = No Cloud, 0 = Cloud)") + scale_y_continuous(breaks = c(0, 1)) + theme_minimal() # Group data by year and week cloud_data <- cloud_data %>% mutate(week = isoweek(date), year = year(date)) %>% group_by(year, week) %>% summarise(no_cloud_days = sum(cloud_status == 1), cloud_days = sum(cloud_status == 0)) # 1. Show how many weeks per year have no images (clouds for all 7 days) weeks_no_images <- cloud_data %>% filter(cloud_days == 7) # Plot weeks with no images ggplot(weeks_no_images, aes(x = week, y = year)) + geom_tile(fill = "red") + labs(x = "Week", y = "Year", title = "Weeks with No Images (Full Cloud Cover)") + theme_minimal() # 2. Determine when most clouds are present (cloud_days > no_cloud_days) weeks_most_clouds <- cloud_data %>% filter(cloud_days > no_cloud_days) # Plot when most clouds are present ggplot(weeks_most_clouds, aes(x = week, y = year)) + geom_tile(fill = "blue") + labs(x = "Week", y = "Year", title = "Weeks with Most Clouds") + theme_minimal() # Group weeks by number of cloud days and count how many weeks had 0-7 cloud days weeks_by_cloud_days <- cloud_data %>% group_by(cloud_days) %>% summarise(week_count = n()) # Display the summary print(weeks_by_cloud_days) # Optional: Plot the results to visualise how many weeks had 0-7 cloud days ggplot(weeks_by_cloud_days, aes(x = cloud_days, y = week_count)) + geom_bar(stat = "identity", fill = "skyblue") + labs(x = "Number of Cloud Days (per week)", y = "Number of Weeks", title = "Distribution of Cloud Days per Week") + theme_minimal() weeks_by_no_cloud_days <- cloud_data %>% mutate(no_cloud_days = 7 - cloud_days) %>% group_by(no_cloud_days) %>% summarise(week_count = n()) # Plot the results to visualise how many weeks had 0-7 cloud-free days ggplot(weeks_by_no_cloud_days, aes(x = no_cloud_days, y = week_count)) + geom_bar(stat = "identity", fill = "#00A799") + geom_text(aes(label = week_count), vjust = -0.5, size = 4) + # Add the count of weeks on top of bars labs(x = "Number of Cloud-Free Days (per week)", y = "Number of Weeks", title = "Distribution of Cloud-Free Days per Week") + theme_minimal()