SmartCane/r_app/2_CI_data_prep.R
Martin Folkerts c222314950 wip
2024-03-05 09:15:46 +01:00

360 lines
14 KiB
R

# activeer de renv omgeving;
renv::activate('~/smartCane/r_app')
renv::restore()
library(here)
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(exactextractr)
library(CIprep)
# Vang alle command line argumenten op
args <- commandArgs(trailingOnly = TRUE)
# Controleer of er ten minste één argument is doorgegeven
if (length(args) == 0) {
stop("Geen argumenten doorgegeven aan het script")
}
# Converteer het eerste argument naar een numerieke waarde
weeks_ago <- as.numeric(args[1])
# Controleer of weeks_ago een geldig getal is
if (is.na(weeks_ago)) {
stop("Het argument weeks_ago is geen geldig getal")
}
# Converteer het tweede argument naar een string waarde
project_dir <- as.character(args[2])
# Controleer of data_dir een geldige waarde is
if (!is.character(project_dir)) {
stop("Het argument project_dir is geen geldige string")
}
laravel_storage_dir <- here("../laravel_app/storage/app", project_dir)
#preparing directories
planet_tif_folder <- here(laravel_storage_dir, "merged_tif")
merged_final <- here(laravel_storage_dir, "merged_final_tif")
data_dir <- here(laravel_storage_dir, "Data")
extracted_CI_dir <- here(data_dir, "extracted_ci")
daily_CI_vals_dir <- here(extracted_CI_dir, "daily_vals")
cumulative_CI_vals_dir <- here(extracted_CI_dir, "cumulative_vals")
weekly_CI_mosaic <- here(laravel_storage_dir, "weekly_mosaic")
daily_vrt <- here(data_dir, "vrt")
harvest_dir <- here(data_dir, "HarvestData")
dir.create(here(extracted_CI_dir))
dir.create(here(daily_CI_vals_dir))
dir.create(here(cumulative_CI_vals_dir))
dir.create(here(weekly_CI_mosaic))
dir.create(here(daily_vrt))
dir.create(merged_final)
# Creating weekly mosaic
dates <- date_list(weeks_ago)
#load pivot geojson
pivot_sf_q <- st_read(here( "pivot_20210625.geojson")) %>% dplyr::select(pivot, pivot_quadrant) %>% vect()
raster_files <- list.files(planet_tif_folder,full.names = T, pattern = ".tif")
head(raster_files)
filtered_files <- map(dates$days_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>%
compact() %>%
flatten_chr()
#rasters_masked <- map(filtered_files, mask_raster, fields = pivot_sf_q) %>% set_names(filtered_files)
# rasters_masked <- list() # Creëer een lege lijst om de resultaten op te slaan
# for (i in seq_along(filtered_files[1])) {
# file_name <- filtered_files[i]
# result <- mask_raster(file_name, fields = pivot_sf_q)
# rasters_masked[[file_name]] <- result
# }
create_mask_and_crop <- function(file, pivot_sf_q) {
message("starting ", file)
CI <- rast(file)
# names(CI) <- c("green","nir")
message("raster loaded")
CI <- CI[[2]]/CI[[1]]-1
# CI <- CI$nir/CI$green-1
message("CI calculated")
CI <- terra::crop(CI, pivot_sf_q, mask = TRUE) #%>% CI_func()
# v_crop[v_crop == 0] <- NA
# names(v_crop) <- c("red", "green", "blue","nir", "cloud" ,"CI")
# v_crop$CI <- v_crop$CI - 1
new_file <- here(merged_final, paste0(tools::file_path_sans_ext(basename(file)), ".tif"))
writeRaster(CI, new_file, overwrite = TRUE)
vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
terra::vrt(new_file, vrt_file, overwrite = TRUE)
# v_crop <- mask_raster(v, pivot_sf_q)
return(CI)
}
# rasters_masked <- map(filtered_files, create_mask_and_crop, pivot_sf_q)
# list_global <- list()
vrt_list <- list()
for (file in filtered_files) {
v_crop <- create_mask_and_crop(file, pivot_sf_q)
emtpy_or_full <- global(v_crop, "notNA")
vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
if(emtpy_or_full[1,] > 10000){
# list_global[file] <- file
vrt_list[vrt_file] <- vrt_file
}else{
file.remove(vrt_file)
# file.remove(file)
# message(file, " removed")
}
# Save the processed raster to a new file
# output_file <- here(data_dir, "vrt", paste0(basename(file), "_processed"))
# terra::writeRaster(v_crop, output_file, overwrite = TRUE)
message(file, " processed")
gc()
}
# list_global <- list_global %>% flatten_chr()
vrt_list <- vrt_list %>% flatten_chr()
#testing writing raster
# for(i in seq_along(filtered_files)){
# message("starting ", i)
# x <- mask_raster(filtered_files[i], fields = pivot_sf_q)
# writeRaster(x, filtered_files[i], overwrite=TRUE)
# message("writing ", i)
# }
# rasters_masked[sapply(rasters_masked, is.null)] <- NULL
# rasters_masked <- setNames(list_global, map_chr(names(list_global), date_extract))
# })
total_pix_area <- rast(vrt_list[1]) %>% terra::subset(1) %>% setValues(1) %>%
crop(pivot_sf_q, mask = TRUE) %>%
global(., fun="notNA") #%>%
# as.matrix() %>%
# `[`(1, 1)
# total_pix_area <- rast(filtered_files[1]) %>% subset(1) %>% crop(pivot_sf_q, mask = TRUE) %>% freq(., usenames = TRUE)
# list_global
# rast(list_global[1])[[5]] %>% plot()
# vrt_files <- list.files(here(data_dir, "vrt"),full.names = T)
# vrt_days_filter <- tools::file_path_sans_ext(basename(list_global))
# vrt_list <- map(vrt_days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_files)]) %>%
# compact() %>%
# flatten_chr()
layer_5_list <- purrr::map(vrt_list, function(vrt_list) {
rast(vrt_list[1]) %>% terra::subset(1)
}) %>% rast()
missing_pixels_count <- layer_5_list %>% global(., fun="notNA") %>%
mutate(
total_pixels = total_pix_area$notNA,
missing_pixels_percentage = round(100 -((notNA/total_pix_area$notNA)*100)),
thres_5perc = as.integer(missing_pixels_percentage < 5),
thres_40perc = as.integer(missing_pixels_percentage < 45)
)
# cloud_perc_list <- freq(layer_5_list, usenames = TRUE) %>%
# mutate(cloud_perc = (100 -((count/sum(total_pix_area$notNA))*100)),
# cloud_thres_5perc = as.integer(cloud_perc < 5),
# cloud_thres_40perc = as.integer(cloud_perc < 40)) %>%
# rename(Date = layer) %>% select(-value, -count)
index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc) )
index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
## Create mosaic
if(sum(missing_pixels_count$thres_5perc)>1){
message("More than 1 raster without clouds (<5%), max composite made")
cloudy_rasters_list <- vrt_list[index_5perc]
rsrc <- sprc(cloudy_rasters_list)
x <- mosaic(rsrc, fun = "max")
names(x) <- "CI"
}else if(sum(missing_pixels_count$thres_5perc)==1){
message("Only 1 raster without clouds (<5%)")
x <- rast(vrt_list[index_5perc[1]])
names(x) <- c("CI")
}else if(sum(missing_pixels_count$thres_40perc)>1){
message("More than 1 image contains clouds, composite made of <40% cloud cover images")
cloudy_rasters_list <- vrt_list[index_40perc]
rsrc <- sprc(cloudy_rasters_list)
x <- mosaic(rsrc, fun = "max")
names(x) <- "CI"
}else if(sum(missing_pixels_count$thres_40perc)==1){
message("Only 1 image available but contains clouds")
x <- rast(vrt_list[index_40perc[1]])
names(x) <- c("CI")
}else{
message("No cloud free images available")
x <- rast(vrt_list[1]) %>% setValues(NA)
names(x) <- c("CI")
}
plot(x$CI, main = paste("CI map", dates$week))
#plotRGB(x, main = paste("RGB image week", dates$week))
file_path_tif <- here(weekly_CI_mosaic ,paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif"))
writeRaster(x, file_path_tif, overwrite=TRUE)
message("Raster written/made at: ", file_path_tif)
# Extracting CI
extract_rasters_daily <- function(file, field_geojson, quadrants = TRUE, save_dir) {
# x <- rast(filtered_files[1])%>% CI_func(drop_layers = TRUE)
# date <- date_extract(filtered_files[1])
# field_geojson <- sf::st_as_sf(pivot_sf_q)
field_geojson <- sf::st_as_sf(field_geojson)
x <- rast(file[1])
date <- date_extract(file)
pivot_stats <- cbind(field_geojson, mean_CI = round(exactextractr::exact_extract(x, field_geojson, fun = "mean"), 2)) %>%
st_drop_geometry() %>% rename("{date}" := mean_CI)
save_suffix <- if (quadrants){"quadrant"} else {"whole_field"}
save_path <- here(save_dir, paste0("extracted_", date, "_", save_suffix, ".rds"))
saveRDS(pivot_stats, save_path)
}
# pivot_sf_q <- st_read(here("..", "Data", "pivot_20210625.geojson")) %>% dplyr::select(pivot, pivot_quadrant) %>% vect()
pivot_sf <- st_read(here(data_dir, "pivot_20210625.geojson")) %>% dplyr::select(pivot, pivot_quadrant) %>% group_by(pivot) %>% summarise() %>% vect()
message("pivot loaded")
raster_files_NEW <- list.files(merged_final,full.names = T, pattern = ".tif")
filtered_files <- map(dates$days_filter, ~ raster_files_NEW[grepl(pattern = .x, x = raster_files_NEW)]) %>%
compact() %>%
flatten_chr()
walk(filtered_files, extract_rasters_daily, field_geojson= pivot_sf_q, quadrants = TRUE, daily_CI_vals_dir)
message("after walk")
pivots_dates0 <- readRDS(here(harvest_dir, "harvest_data_new")) %>% filter(
pivot %in% c("1.1", "1.2", "1.3", "1.4", "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12", "1.13",
"1.14" , "1.16" , "1.17" , "1.18" ,"2.1", "2.2", "2.3" , "2.4", "2.5", "3.1", "3.2", "3.3",
"4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "5.1" ,"5.2", "5.3", "5.4", "6.1", "6.2", "DL1.1", "DL1.3")
)
# pivots_dates_long <- pivots_dates0 %>%
# select(c("pivot_quadrant", "season_start_2021", "season_end_2021", "season_start_2022", "season_end_2022", "season_start_2023", "season_end_2023")) %>%
# pivot_longer(cols = c("season_start_2021", "season_end_2021", "season_start_2022", "season_end_2022", "season_start_2023", "season_end_2023")) %>%
# separate(pivot_quadrant, into = c("name", "Year"), sep = "\\.")
harvesting_data <- pivots_dates0 %>%
select(c("pivot_quadrant", "season_start_2021", "season_end_2021", "season_start_2022", "season_end_2022", "season_start_2023", "season_end_2023", "season_start_2024", "season_end_2024")) %>%
pivot_longer(cols = starts_with("season"), names_to = "Year", values_to = "value") %>%
separate(Year, into = c("name", "Year"), sep = "(?<=season_start|season_end)\\_", remove = FALSE) %>%
mutate(name = str_to_title(name)) %>%
pivot_wider(names_from = name, values_from = value) %>%
rename(Field = pivot_quadrant)
# extracted_values <- list.files("C:\\Users\\timon\\Resilience BV\\4002 CMD App - General\\4002 CMD Team\\4002 TechnicalData\\04 WP2 technical\\DetectingSpotsR\\EcoFarm\\planet\\extracted",
# pattern ="_quadrant", full.names = TRUE)
extracted_values <- list.files(here(daily_CI_vals_dir), full.names = TRUE)
#get CI values for this week only
#extracted_values <- map(dates$days_filter, ~ extracted_values[grepl(pattern = .x, x = extracted_values)]) %>%
# compact() %>%
# flatten_chr()
#combine them into one df
pivot_stats <- extracted_values %>%
map(readRDS) %>% list_rbind() %>%
group_by(pivot_quadrant) %>%
summarise(across(everything(), ~ first(na.omit(.))))
#saveRDS(pivot_stats, here(cumulative_CI_vals_dir,"combined_CI_data.rds")) #used to save the rest of the data into one file
#load historic CI data and update it with last week of CI data
combined_CI_data <- readRDS(here(cumulative_CI_vals_dir,"combined_CI_data.rds")) %>% drop_na(pivot_quadrant)
pivot_stats2 <- bind_rows(pivot_stats, combined_CI_data)
# pivot_stats2 <- purrr::map(list.files(here(daily_CI_vals_dir), full.names = TRUE, pattern = "quadrant"), readRDS) %>% list_rbind() %>% group_by(pivot_quadrant) %>%
# summarise(across(everything(), ~ first(na.omit(.))))
pivots_data_present <- unique(pivots_dates0$pivot_quadrant)
quadrant_list <- pivots_data_present
# gather data into long format for easier calculation and visualisation
pivot_stats_long <- pivot_stats2 %>%
tidyr::gather("Date", value, -pivot_quadrant, -pivot ) %>%
mutate(Date = right(Date, 8),
Date = lubridate::ymd(Date)
) %>%
drop_na(c("value","Date")) %>%
mutate(value = as.numeric(value))%>%
filter_all(all_vars(!is.infinite(.)))%>%
rename(Field = pivot_quadrant)
# #2021
pivots_dates_Data_2021 <- pivots_dates0 %>% filter(!is.na(season_start_2021))
pivot_select_model_Data_2021 <- unique(pivots_dates_Data_2021$pivot_quadrant)
# #2022
pivots_dates_Data_2022 <- pivots_dates0 %>% filter(!is.na(season_end_2022))
pivot_select_model_Data_2022 <- unique(pivots_dates_Data_2022$pivot_quadrant )
# #2023
pivots_dates_Data_2023 <- pivots_dates0 %>% filter(!is.na(season_start_2023))
pivot_select_model_Data_2023 <- unique(pivots_dates_Data_2023$pivot_quadrant)
# #2024
pivots_dates_Data_2024 <- pivots_dates0 %>% filter(!is.na(season_start_2024))
pivot_select_model_Data_2024 <- unique(pivots_dates_Data_2024$pivot_quadrant)
## Extracting the correct CI values
#Data_2021 <- map(pivot_select_model_Data_2021, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2021)) %>% list_rbind()
message('2021')
Data_2022 <- map(pivot_select_model_Data_2022, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2022)) %>% list_rbind()
message('2022')
Data_2023 <- map(pivot_select_model_Data_2023, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2023)) %>% list_rbind()
message('2023')
Data_2024 <- map(pivot_select_model_Data_2024, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2024)) %>% list_rbind()
message('2024')
CI_all <- rbind(Data_2022, Data_2023, Data_2024)
message('CI_all created')
#CI_all <- Data_2023
CI_all <- CI_all %>% group_by(model) %>% mutate(CI_per_day = FitData - lag(FitData),
cumulative_CI = cumsum(FitData))
message('CI_all cumulative')
head(CI_all)
message('show head')
saveRDS(CI_all, here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
message('rds saved')