adjusted all functions and cleaned scripts

2024-08-29 16:35:59 +02:00 · 2024-08-29 16:35:59 +02:00 · 611bc17017
parent ad2f41482f
commit 611bc17017
9 changed files with 227 additions and 961 deletions
--- a/data.xlsx
+++ b/data.xlsx
--- a/laravel_app/tests/fixtures/harvest.xlsx
+++ b/laravel_app/tests/fixtures/harvest.xlsx
--- a/r_app/1_harvest_data_EcoFarm_v2.R
+++ b/r_app/1_harvest_data_EcoFarm_v2.R
@ -1,195 +0,0 @@
-renv::activate()
-renv::restore()
-#download excel with planting dates
-library(googledrive)
-library(here)
-library(tidyverse)
-library(lubridate)
-library(readxl)
-
-#create directory
-storage_dir <- here("../laravel_app/storage/app")
-data_dir <- here(storage_dir, "Data")
-harvest_dir <-  here(data_dir, "HarvestData")
-
-dir.create(file.path(data_dir))
-dir.create(file.path(harvest_dir))
-
-
-week = ymd(Sys.Date())
-file_name <- here(harvest_dir, paste("Data", week,  ".xlsx"))
-# yield_file_path <- here("Data", "HarvestData", paste("Current - Pivots planting date and harvesting data.xlsx"))
-
-#Check its existence
-if (file.exists(file_name)) {
-  #Delete file if it exists
-  file.remove(file_name)
-  print("File deleted")
-}
-
-#options(gargle_oauth_email = "henkpoldergraaf@gmail.com")
-
-#folder_url <- "https://docs.google.com/spreadsheets/d/1KPOPHvlzTBDvgFCYQetss0yoafeZeoNW/edit?rtpof=true#gid=1683410275"
-#folder <- drive_get(as_id(folder_url))
-
-#drive_download(folder, file_name, overwrite = T)
-
-excel_file_name = here(storage_dir,"harvesting_data", "Current - Pivots planting date and harevsting data.xlsx")
-
-# library(googlesheets4)
-# read_sheet(folder_url)
-#get dates in same column
-
-dates <- read_excel(excel_file_name,
-                    skip = 2,
-                    col_types = c("text", "numeric", "text", "date", "numeric", "numeric", "numeric",
-                                  "date", "numeric", "skip", "skip", "numeric", "numeric", "numeric","skip", #2020 harvesting data
-                                  "date", "numeric", "skip", "skip", "numeric", "numeric", "numeric","skip", #2021 harvesting data
-                                  "date", "numeric", "skip", "skip", "numeric", "skip", "numeric","numeric", "skip", #2022 harvesting data
-                                  "date", "numeric", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip",  "numeric", "numeric","numeric","skip", #2023 harvesting data
-                                  "skip", "skip", "skip", "skip", "skip")) %>% #empty columns
-  rename(pivot_quadrant = 1,
-         area = 2,
-         variety = 3,
-         planting_date = 4,
-         Age = 5,
-         ratoon = 6,
-         Year_replanted = 7,
-         Harvesting_date_2020 = 8,
-         Harvesting_age_2020 = 9,
-         MT_weight_2020 = 10,
-         Tcha_2020 = 11,
-         Tchm_2020 = 12,
-         Harvesting_date_2021 = 13,
-         Harvesting_age_2021 = 14,
-         MT_weight_2021 = 15,
-         Tcha_2021 = 16,
-         Tchm_2021 = 17,
-         Harvesting_date_2022 = 18,
-         Harvesting_age_2022 = 19,
-         MT_weight_2022 = 20,
-         Tcha_2022 = 21,
-         Tchm_2022 = 22,
-         Harvesting_date_2023 = 23,
-         Harvesting_age_2023 = 24,
-         MT_weight_2023 = 25,
-         Tcha_2023 = 26,
-         Tchm_2023 = 27,
-  ) %>%
-  
-  # slice(-1) %>% #select(-age) %>%
-  # filter(pivot_quadrant != "Total") %>% #drop_na(pivot_quadrant) %>%
-  mutate(planting_date          = ymd(planting_date         ),
-         Harvesting_date_2020 = ymd(Harvesting_date_2020),
-         Harvesting_date_2021 = ymd(Harvesting_date_2021),
-         Harvesting_date_2022= ymd(Harvesting_date_2022),
-         Age = round(Age,0)) %>% filter(pivot_quadrant != "Total/Average")%>%
-  filter(pivot_quadrant != "Total")
-
-#copy each row and add ABCD
-quadrants <- dates %>% slice(rep(1:n(), each=4)) %>%
-  group_by(pivot_quadrant) %>%
-  mutate(pivot_quadrant = paste0(pivot_quadrant, c("A", "B", "C", "D"))) %>%
-  filter(pivot_quadrant != "P1.8D" & pivot_quadrant != "P1.8 Q.DA"& pivot_quadrant != "P1.8 Q.DB"& pivot_quadrant != "P1.8 Q.DC") %>%
-  mutate(pivot_quadrant = case_when(pivot_quadrant == "P1.3 ABA" ~ "P1.3A",
-                                    pivot_quadrant == "P1.3 ABB" ~ "1.3B",
-                                    pivot_quadrant == "P1.3 ABC" ~ "1.3A",
-                                    pivot_quadrant == "P1.3 ABD" ~ "1.3A",
-                                    pivot_quadrant == "P1.3 CDA" ~ "1.3C",
-                                    pivot_quadrant == "P1.3 CDB" ~ "1.3D",
-                                    pivot_quadrant == "P1.3 CDC" ~ "1.3C",
-                                    pivot_quadrant == "P1.3 CDD" ~ "1.3C",
-                                    
-                                    pivot_quadrant == "P1.8 Q.DD" ~ "1.8D",
-                                    
-                                    pivot_quadrant == "P1.9.ABA" ~ "1.9A",
-                                    pivot_quadrant == "P1.9.ABB" ~ "1.9B",
-                                    pivot_quadrant == "P1.9.ABC" ~ "1.9A",
-                                    pivot_quadrant == "P1.9.ABD" ~ "1.9A",
-                                    pivot_quadrant == "P1.9.CDA" ~ "1.9C",
-                                    pivot_quadrant == "P1.9.CDB" ~ "1.9D",
-                                    pivot_quadrant == "P1.9.CDC" ~ "1.9C",
-                                    pivot_quadrant == "P1.9.CDD" ~ "1.9C",
-                                    
-                                    pivot_quadrant == "P2.3AA" ~ "2.3A",
-                                    pivot_quadrant == "P2.3AB" ~ "2.3A",
-                                    pivot_quadrant == "P2.3AC" ~ "2.3A",
-                                    pivot_quadrant == "P2.3AD" ~ "2.3A",
-                                    pivot_quadrant == "P2.3DA" ~ "2.3D",
-                                    pivot_quadrant == "P2.3DB" ~ "2.3D",
-                                    pivot_quadrant == "P2.3DC" ~ "2.3D",
-                                    pivot_quadrant == "P2.3DD" ~ "2.3D",
-                                    pivot_quadrant == "P2.3BCA" ~ "2.3B",
-                                    pivot_quadrant == "P2.3BCB" ~ "2.3B",
-                                    pivot_quadrant == "P2.3BCC" ~ "2.3C",
-                                    pivot_quadrant == "P2.3BCD" ~ "2.3C",
-                                    
-                                    pivot_quadrant == "P2.5 ABA" ~ "2.5A",
-                                    pivot_quadrant == "P2.5 ABB" ~ "2.5B",
-                                    pivot_quadrant == "P2.5 ABC" ~ "2.5A",
-                                    pivot_quadrant == "P2.5 ABD" ~ "2.5A",
-                                    pivot_quadrant == "P2.5 CDA" ~ "2.5C",
-                                    pivot_quadrant == "P2.5 CDB" ~ "2.5D",
-                                    pivot_quadrant == "P2.5 CDC" ~ "2.5C",
-                                    pivot_quadrant == "P2.5 CDD" ~ "2.5C",
-                                    
-                                    pivot_quadrant == "P3.1ABA" ~ "3.1A",
-                                    pivot_quadrant == "P3.1ABB" ~ "3.1B",
-                                    pivot_quadrant == "P3.1ABC" ~ "3.1A",
-                                    pivot_quadrant == "P3.1ABD" ~ "3.1A",
-                                    pivot_quadrant == "P3.1CDA" ~ "3.1C",
-                                    pivot_quadrant == "P3.1CDB" ~ "3.1D",
-                                    pivot_quadrant == "P3.1CDC" ~ "3.1C",
-                                    pivot_quadrant == "P3.1CDD" ~ "3.1C",
-                                    
-                                    pivot_quadrant == "P3.2 ABA" ~ "3.2A",
-                                    pivot_quadrant == "P3.2 ABB" ~ "3.2B",
-                                    pivot_quadrant == "P3.2 ABC" ~ "3.2A",
-                                    pivot_quadrant == "P3.2 ABD" ~ "3.2A",
-                                    pivot_quadrant == "P3.2 CDA" ~ "3.2C",
-                                    pivot_quadrant == "P3.2 CDB" ~ "3.2D",
-                                    pivot_quadrant == "P3.2 CDC" ~ "3.2C",
-                                    pivot_quadrant == "P3.2 CDD" ~ "3.2C",
-                                    
-                                    pivot_quadrant == "DL 1.3A" ~ "DL1.3",
-                                    pivot_quadrant == "DL 1.3B" ~ "DL1.3",
-                                    pivot_quadrant == "DL 1.3C" ~ "DL1.3",
-                                    pivot_quadrant == "DL 1.3D" ~ "DL1.3",
-                                    
-                                    pivot_quadrant == "DL 1.1A" ~ "DL1.1",
-                                    pivot_quadrant == "DL 1.1B" ~ "DL1.1",
-                                    pivot_quadrant == "DL 1.1C" ~ "DL1.1",
-                                    pivot_quadrant == "DL 1.1D" ~ "DL1.1",
-                                    
-                                    TRUE ~ pivot_quadrant) ) %>% unique() %>%
-  mutate_at("pivot_quadrant", str_replace, "P", "") %>%
-  mutate(pivot = pivot_quadrant) %>%
-  mutate_at("pivot",str_replace, "[ABCD]", "") %>%
-  mutate(pivot = case_when(pivot == "L1.1"~"DL1.1",
-                           pivot == "L1.3" ~"DL1.3",
-                           TRUE ~ pivot))
-
-quadrants2 <- quadrants %>%
-  mutate(
-    season_start_2021 = case_when(!is.na(Harvesting_date_2021) ~ Harvesting_date_2021 - (Harvesting_age_2021 * 30)) ,
-    season_end_2021 = case_when(!is.na(Harvesting_date_2021) ~ Harvesting_date_2021),
-    
-    season_start_2022 = case_when(is.na(Harvesting_date_2021) & !is.na(Harvesting_date_2022) ~ Harvesting_date_2022 - (Harvesting_age_2022 * 30),
-                                  !is.na(Harvesting_date_2021) & !is.na(Harvesting_date_2022) ~ Harvesting_date_2021
-    ),
-    season_end_2022 = case_when(!is.na(Harvesting_date_2022) & !is.na(season_start_2022) ~ Harvesting_date_2022),
-    
-    season_start_2023 = case_when(ratoon == 0  ~  planting_date,
-                                  TRUE ~ Harvesting_date_2022),
-    season_start_2023 = case_when(is.na(Harvesting_date_2022) ~  Harvesting_date_2021,
-                                  TRUE ~ season_start_2023),
-    
-    season_end_2023 = case_when(!is.na(Harvesting_date_2023) ~ Harvesting_date_2023,
-                                TRUE ~  ymd(Sys.Date())),
-    
-    season_start_2024 = case_when(!is.na(Harvesting_date_2023) ~ Harvesting_date_2023,
-                                  TRUE ~ NA),
-    season_end_2024 = case_when(!is.na(season_start_2024) ~ ymd(Sys.Date())),
-  )
-
-saveRDS(quadrants2, here(harvest_dir, "harvest_data_new"))
--- a/r_app/ci_extraction.R
+++ b/r_app/ci_extraction.R
@ -7,7 +7,6 @@ library(lubridate)
 library(exactextractr)
 library(readxl)

-
 # Vang alle command line argumenten op
 args <- commandArgs(trailingOnly = TRUE)

@ -54,38 +53,31 @@ weekly_CI_mosaic <- here(laravel_storage_dir, "weekly_mosaic")
 daily_vrt <- here(data_dir, "vrt")
 harvest_dir <-  here(data_dir, "HarvestData")

+source(here("r_app/parameters_project.R"))
+source(here("r_app/ci_extraction_utils.R"))
+# source(here("r_app/mosaic_creation_utils.R"))
+
 source("parameters_project.R")
 source("ci_extraction_utils.R")
-source("mosaic_creation_utils.R")
+# source("mosaic_creation_utils.R")

-dir.create(here(laravel_storage_dir))
-dir.create(here(data_dir))
-dir.create(here(extracted_CI_dir))
-dir.create(here(daily_CI_vals_dir))
-dir.create(here(cumulative_CI_vals_dir))
-dir.create(here(weekly_CI_mosaic))
-dir.create(here(daily_vrt))
-dir.create(merged_final)
-dir.create(harvest_dir)
+# dir.create(here(laravel_storage_dir))
+# dir.create(here(data_dir))
+# dir.create(here(extracted_CI_dir))
+# dir.create(here(daily_CI_vals_dir))
+# dir.create(here(cumulative_CI_vals_dir))
+# dir.create(here(weekly_CI_mosaic))
+# dir.create(here(daily_vrt))
+# dir.create(merged_final)
+# dir.create(harvest_dir)

-# end_date <- lubridate::dmy("20-6-2024")
+end_date <- lubridate::dmy("28-08-2024")
 week <- week(end_date)


-#weeks_ago = 0
-# Creating weekly mosaic
-#dates <- date_list(weeks_ago)
 dates <- date_list(end_date, offset)
 print(dates)
-#load pivot geojson
-# pivot_sf_q <- st_read(here(data_dir, "pivot.geojson")) %>% dplyr::select(pivot, pivot_quadrant) %>% vect()

-# raster_files <- list.files(planet_tif_folder,full.names = T, pattern = ".tif")
-
-# filtered_files  <- map(dates$days_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>%
-#   compact() %>%
-#   flatten_chr()
-# head(filtered_files)

 raster_files <- list.files(planet_tif_folder,full.names = T, pattern = ".tif")

@ -93,39 +85,13 @@ filtered_files  <- map(dates$days_filter, ~ raster_files[grepl(pattern = .x, x =
  compact() %>%
  flatten_chr()
 head(filtered_files)
-# filtered_files <- raster_files #for first CI extraction

-# create_mask_and_crop <- function(file, field_boundaries) {
-# message("starting ", file)
-#   CI <- rast(file)
-# #   names(CI) <- c("green","nir")
-# message("raster loaded")
-#
-#   CI <- CI[[2]]/CI[[1]]-1
-# #   CI <- CI$nir/CI$green-1
-# message("CI calculated")
-#   CI <- terra::crop(CI, field_boundaries, mask = TRUE) #%>% CI_func()
-#
-#   new_file <- here(merged_final, paste0(tools::file_path_sans_ext(basename(file)), ".tif"))
-#   writeRaster(CI, new_file, overwrite = TRUE)
-#
-#   vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
-#   terra::vrt(new_file, vrt_file, overwrite = TRUE)
-#
-#   return(CI)
-# }


 vrt_list <- list()

-# for (file in raster_files) {
-#   v_crop <- create_mask_and_crop(file, field_boundaries)
-#   message(file, " processed")
-#   gc()
-# }
-
 for (file in filtered_files) {
-  v_crop <- create_mask_and_crop(file, field_boundaries)
+  v_crop <- create_mask_and_crop(file, field_boundaries, merged_final)
  emtpy_or_full <- global(v_crop, "notNA")

  vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
@ -140,63 +106,59 @@ for (file in filtered_files) {
  gc()
 }

-# Extracting CI
-
-
-# pivot_sf_q <- st_read(here("..", "Data", "pivot_20210625.geojson")) %>% dplyr::select(pivot, pivot_quadrant) %>% vect()
-# pivot_sf <- st_read(here(data_dir, "pivot.geojson")) %>% dplyr::select(pivot, pivot_quadrant)   %>% group_by(pivot) %>% summarise() %>% vect()
-# message("pivot loaded")

 raster_files_NEW <- list.files(merged_final,full.names = T, pattern = ".tif")

-# pivots_dates0 <- readRDS(here(harvest_dir, "harvest_data_new")) %>% filter(
-#   pivot %in% c("1.1", "1.2", "1.3", "1.4", "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12", "1.13",
-#                "1.14" , "1.16" , "1.17" , "1.18" ,"2.1", "2.2", "2.3" , "2.4", "2.5", "3.1", "3.2", "3.3",
-#                "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "5.1" ,"5.2", "5.3", "5.4", "6.1", "6.2", "DL1.1", "DL1.3")
-# )
-
-# harvesting_data <- pivots_dates0 %>%
-#   select(c("pivot_quadrant", "season_start_2021", "season_end_2021", "season_start_2022", "season_end_2022", "season_start_2023", "season_end_2023", "season_start_2024", "season_end_2024")) %>%
-#   pivot_longer(cols = starts_with("season"), names_to = "Year", values_to = "value") %>%
-#   separate(Year, into = c("name", "Year"), sep = "(?<=season_start|season_end)\\_", remove = FALSE) %>%
-#   mutate(name = str_to_title(name)) %>%
-#   pivot_wider(names_from = name, values_from = value) %>%
-#   rename(field = pivot_quadrant)
-
-#If run for the firsttime, it will extract all data since the start and put into a table.rds. otherwise it will only add on to the existing table.
-
-
 # Define the path to the file
-file_path <- here(cumulative_CI_vals_dir,"combined_CI_data.rds")
+file_path <- here(cumulative_CI_vals_dir, "combined_CI_data.rds")

 # Check if the file exists
 if (!file.exists(file_path)) {
-  # Create the file with columns "column1" and "column2"
-  data <- data.frame(sub_field=NA, field=NA)
-  saveRDS(data, file_path)
+  # File does not exist, create it with all available data
+  print("combined_CI_data.rds does not exist. Preparing combined_CI_data.rds file for all available images.")
+  
+  # Extract data from all raster files
+  walk(raster_files_NEW, extract_rasters_daily, field_geojson = field_boundaries, quadrants = FALSE, daily_CI_vals_dir)
+  
+  # Combine all extracted data
+  extracted_values <- list.files(here(daily_CI_vals_dir), full.names = TRUE)
+  
+  pivot_stats <- extracted_values %>%
+    map(readRDS) %>% list_rbind() %>%
+    group_by(sub_field) 
+  
+  # Save the combined data to the file
+  saveRDS(pivot_stats, file_path)
+  
+  print("All CI values extracted from all historic images and saved to combined_CI_data.rds.")
+  
+} else {
+  # File exists, add new data
+  print("combined_CI_data.rds exists, adding the latest image data to the table.")
+  
+  # Filter and process the latest data
+  filtered_files <- map(dates$days_filter, ~ raster_files_NEW[grepl(pattern = .x, x = raster_files_NEW)]) %>%
+    compact() %>%
+    flatten_chr()
+  
+  walk(filtered_files, extract_rasters_daily, field_geojson = field_boundaries, quadrants = TRUE, daily_CI_vals_dir)
+  
+  # Extract new values
+  extracted_values <- list.files(daily_CI_vals_dir, full.names = TRUE)
+  extracted_values <- map(dates$days_filter, ~ extracted_values[grepl(pattern = .x, x = extracted_values)]) %>%
+    compact() %>%
+    flatten_chr()
+  
+  pivot_stats <- extracted_values %>%
+    map(readRDS) %>% list_rbind() %>%
+    group_by(sub_field)
+  
+  # Load existing data and append new data
+  combined_CI_data <- readRDS(file_path)
+  pivot_stats2 <- bind_rows(pivot_stats, combined_CI_data)
+  
+  # Save the updated combined data
+  saveRDS(pivot_stats2, file_path)
+  
+  print("All CI values extracted from the latest images and added to combined_CI_data.rds.")
 }
-
-print("combined_CI_data.rds exists, adding the latest image data to the table.")
-
-filtered_files  <- map(dates$days_filter, ~ raster_files_NEW[grepl(pattern = .x, x = raster_files_NEW)]) %>%
-  compact() %>%
-  flatten_chr()
-walk(filtered_files, extract_rasters_daily, field_geojson= field_boundaries, quadrants = TRUE, daily_CI_vals_dir)
-
-extracted_values <-  list.files(daily_CI_vals_dir, full.names = TRUE)
-
-extracted_values <- map(dates$days_filter, ~ extracted_values[grepl(pattern = .x, x = extracted_values)]) %>%
-  compact() %>%
-  flatten_chr()
-
-pivot_stats <- extracted_values %>%
-  map(readRDS) %>% list_rbind() %>%
-  group_by(sub_field)
-
-
-combined_CI_data <- readRDS(here(cumulative_CI_vals_dir,"combined_CI_data.rds")) #%>% drop_na(pivot_quadrant)
-head(combined_CI_data)
-pivot_stats2 <- bind_rows(pivot_stats, combined_CI_data)
-# pivot_stats2 <- combined_CI_data
-print("All CI values extracted from latest image.")
-saveRDS(pivot_stats2, here(cumulative_CI_vals_dir,"combined_CI_data.rds")) #used to save the rest of the data into one file
--- a/r_app/ci_extraction_utils.R
+++ b/r_app/ci_extraction_utils.R
@ -1,6 +1,5 @@
 # Utils for ci extraction
-
-date_list <- function(end_date, offset, week){
+date_list <- function(end_date, offset){
  offset <- as.numeric(offset) - 1
  end_date <- as.Date(end_date)
  start_date <- end_date - lubridate::days(offset)
@ -12,44 +11,26 @@ date_list <- function(end_date, offset, week){
  return(list("week" = week, "year" = year, "days_filter" = days_filter))
 }

-# date_list <- function(weeks_in_the_paste){
-#   week <- week(Sys.Date()- weeks(weeks_in_the_paste) )
-#   year <- year(Sys.Date()- weeks(weeks_in_the_paste) )
-#   days_filter <- Sys.Date() - weeks(weeks_in_the_paste) - days(0:6)
-#
-#   return(c("week" = week,
-#            "year" = year,
-#            "days_filter" = list(days_filter)))
-#
-# }
-
-CI_func <- function(x, drop_layers = FALSE){
-  CI <- x[[4]]/x[[2]]-1
-  add(x) <- CI
-  names(x) <- c("red", "green", "blue","nir", "cloud" ,"CI")
-  if(drop_layers == FALSE){
-    return(x)
-  }else{
-    return(x$CI)
-  }
-}
-
-mask_raster <- function(raster, fields){
-  # x <- rast(filtered_files[1])
-  x <- rast(raster) 
-  emtpy_or_full <- global(x, sum)
+create_mask_and_crop <- function(file, field_boundaries, merged_final_dir) {
+  message("starting ", file)
+  loaded_raster <- rast(file)
+  names(loaded_raster) <- c("Red", "Green", "Blue", "NIR")
+  message("raster loaded")
  
-  if(emtpy_or_full[1,] >=  2000000){
-    names(x) <- c("red", "green", "blue","nir", "cloud")
-    cloud <- x$cloud
-    
-    cloud[cloud == 0 ] <- NA
-    x_masked <- mask(x, cloud, inverse = T) %>% crop(.,fields, mask = TRUE )
-    x_masked <-  x_masked %>% CI_func()
-    
-    message(raster, " masked")
-    return(x_masked)
-  }
+  CI <- loaded_raster$NIR / loaded_raster$Green - 1
+  
+  loaded_raster$CI <- CI
+  message("CI calculated")
+  loaded_raster <- terra::crop(loaded_raster, field_boundaries, mask = TRUE) #%>% CI_func()
+  
+  loaded_raster[loaded_raster == 0] <- NA
+  new_file <- here(merged_final_dir, paste0(tools::file_path_sans_ext(basename(file)), ".tif"))
+  writeRaster(loaded_raster, new_file, overwrite = TRUE)
+  
+  vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
+  terra::vrt(new_file, vrt_file, overwrite = TRUE)
+  
+  return(loaded_raster)
 }

 date_extract <- function(file_path) {
@ -57,228 +38,16 @@ date_extract <- function(file_path) {
 }

 extract_rasters_daily <- function(file, field_geojson, quadrants = TRUE, save_dir) {
-  # x <- rast(filtered_files[1])%>% CI_func(drop_layers = TRUE)
-  # date <- date_extract(filtered_files[1])
-  # field_geojson <- sf::st_as_sf(pivot_sf_q)
  field_geojson <- sf::st_as_sf(field_geojson)
  x <- rast(file[1])
  date <- date_extract(file)
+  
  pivot_stats <- cbind(field_geojson, mean_CI = round(exactextractr::exact_extract(x$CI, field_geojson, fun = "mean"), 2)) %>%
    st_drop_geometry() %>% rename("{date}" := mean_CI)
+  
  save_suffix <- if (quadrants){"quadrant"} else {"whole_field"}
  save_path <- here(save_dir, paste0("extracted_", date, "_", save_suffix, ".rds"))
+  
  saveRDS(pivot_stats, save_path)
 }

-right = function(text, num_char) {
-  substr(text, nchar(text) - (num_char-1), nchar(text))
-}
-
-extract_CI_data <- function(field_names, harvesting_data, field_CI_data, season) {
-  #  field_names =  "1.2A"
-  #  harvesting_data = harvesting_data
-  # field_CI_data = pivot_stats_long
-  #  season= 2021
-  
-  filtered_harvesting_data <- harvesting_data %>%
-    filter(year == season, field %in% field_names)
-  
-  filtered_field_CI_data <- field_CI_data %>%
-    filter(field %in% field_names)
-  
-  # CI <- map_df(field_names, ~ {
-  ApproxFun <- approxfun(x = filtered_field_CI_data$Date, y = filtered_field_CI_data$value)
-  Dates <- seq.Date(ymd(min(filtered_field_CI_data$Date)), ymd(max(filtered_field_CI_data$Date)), by = 1)
-  LinearFit <- ApproxFun(Dates)
-  
-  CI <-  data.frame(Date = Dates, FitData = LinearFit) %>%
-    left_join(., filtered_field_CI_data, by = "Date") %>%
-    filter(Date > filtered_harvesting_data$season_start & Date < filtered_harvesting_data$season_end) %>%
-    mutate(DOY = seq(1, n(), 1),
-           model = paste0("Data", season, " : ", field_names),
-           season = season,
-           field = field_names)
-  # }) #%>%
-  #{if (length(field_names) > 0) message("Done!")}
-  
-  return(CI)
-}
-# 
-# load_fields <- function(geojson_path) {
-#   field_geojson <- st_read(geojson_path) %>%
-#     select(pivot, pivot_quadrant) %>%
-#     vect()
-#   return(field_geojson)
-# }
-# 
-# load_harvest_data <- function(havest_data_path){
-#   harvest_data <- readRDS(havest_data_path)
-#   return(harvest_data)
-# }
-# 
-# load_rasters <- function(raster_path, dates) {
-#   raster_files <- list.files(raster_path, full.names = TRUE, pattern = ".tif")
-#   filtered_files <- map(dates$days_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>%
-#     compact() %>%
-#     flatten_chr()
-#   
-#   return(filtered_files)
-# }
-# 
-# mask_and_set_names <- function(filtered_files, fields) {
-#   rasters_masked <- map(filtered_files, mask_raster, fields = fields) %>% set_names(filtered_files)
-#   rasters_masked[sapply(rasters_masked, is.null)] <- NULL
-#   rasters_masked <- setNames(rasters_masked, map_chr(names(rasters_masked), date_extract))
-#   
-#   return(rasters_masked)
-# }
-# 
-# calculate_total_pix_area <- function(filtered_files, fields_geojson) {
-#   # total_pix_area <- rast(filtered_files[1]) %>%
-#   #   subset(1) %>%
-#   #   crop(fields_geojson, mask = TRUE)%>%
-#   #   global(.data, fun = "notNA")
-#   total_pix_area <- rast(filtered_files[1]) %>% subset(1) %>% crop(fields_geojson, mask = TRUE) %>% freq(., usenames = TRUE)
-#   
-#   return(total_pix_area)
-# }
-# 
-# cloud_layer_extract <- function(rasters_masked){
-#   cloud_layer_rast <- map(rasters_masked, function(spatraster) {
-#     spatraster[[5]]
-#   }) %>% rast()
-#   
-#   return(cloud_layer_rast)
-# }
-# 
-# calculate_cloud_coverage <- function(cloud_layer_rast, total_pix_area) {
-#   cloud_perc_list <- freq(cloud_layer_rast, usenames = TRUE) %>%
-#     mutate(cloud_perc = (100 -((count/total_pix_area$count)*100)),
-#            cloud_thres_5perc = as.integer(cloud_perc < 5),
-#            cloud_thres_40perc = as.integer(cloud_perc < 40)) %>%
-#     rename(Date = layer) %>% select(-value, -count)
-#   
-#   cloud_index_5perc <- which(cloud_perc_list$cloud_thres_5perc == max(cloud_perc_list$cloud_thres_5perc))
-#   cloud_index_40perc <- which(cloud_perc_list$cloud_thres_40perc == max(cloud_perc_list$cloud_thres_40perc))
-#   
-#   return(list(cloud_perc_list = cloud_perc_list, cloud_index_5perc = cloud_index_5perc, cloud_index_40perc = cloud_index_40perc))
-# }
-# 
-# process_cloud_coverage <- function(cloud_coverage, rasters_masked) {
-#   if (sum(cloud_coverage$cloud_perc_list$cloud_thres_5perc) > 1) {
-#     message("More than 1 raster without clouds (<5%), max mosaic made ")
-#     
-#     cloudy_rasters_list <- rasters_masked[cloud_coverage$cloud_index_5perc]
-#     rsrc <- sprc(cloudy_rasters_list)
-#     x <- mosaic(rsrc)
-#     names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-#     
-#   } else if (sum(cloud_coverage$cloud_perc_list$cloud_thres_5perc) == 1) {
-#     message("Only 1 raster without clouds (<5%)")
-#     
-#     x <- rast(rasters_masked[cloud_coverage$cloud_index_5perc[1]])
-#     names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-#     
-#   } else if (sum(cloud_coverage$cloud_perc_list$cloud_thres_40perc) > 1) {
-#     message("More than 1 image contains clouds, composite made of <40% cloud cover images")
-#     
-#     cloudy_rasters_list <- rasters_masked[cloud_coverage$cloud_index_40perc]
-#     rsrc <- sprc(cloudy_rasters_list)
-#     x <- mosaic(rsrc)
-#     names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-#     
-#   } else if (sum(cloud_coverage$cloud_perc_list$cloud_thres_40perc) == 0) {
-#     message("No cloud free images available")
-#     x <-  rast(rasters_masked[1])
-#     x[x] <- NA
-#     names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-#   }
-#   
-#   return(x)
-# }
-
-# extract_rasters_daily_func <- function(daily_vals_dir, filtered_files, fields_geojson) {
-#   extracted_files <- walk(filtered_files, extract_rasters_daily, field_geojson = fields_geojson, quadrants = TRUE, daily_vals_dir)
-#   return(extracted_files)
-# }
-
-# CI_load <- function(daily_vals_dir, grouping_variable){
-#   extracted_values <-  list.files(here(daily_vals_dir), full.names = TRUE)
-#   
-#   field_CI_values <- extracted_values %>%
-#     map_dfr(readRDS) %>%
-#     group_by(.data[[grouping_variable]]) %>%
-#     summarise(across(everything(), ~ first(na.omit(.))))
-#   return(field_CI_values)
-# }
-# 
-# CI_long <- function(field_CI_values, pivot_long_cols){
-#   field_CI_long <- field_CI_values %>%
-#     gather("Date", value, -pivot_long_cols) %>%
-#     mutate(Date = right(Date, 8),
-#            Date = ymd(Date)
-#     ) %>%
-#     drop_na(c("value","Date")) %>%
-#     mutate(value = as.numeric(value))%>%
-#     filter_all(all_vars(!is.infinite(.)))%>%
-#     rename(field = pivot_quadrant)
-#   
-#   return(field_CI_long)
-# }
-# 
-# process_year_data <- function(year, harvest_data, field_CI_long) {
-#   pivots_dates_year <- harvest_data %>% na.omit() %>% filter(year == year)
-#   pivot_select_model_year <- unique(pivots_dates_year$field)
-#   
-#   data <- map_dfr(pivot_select_model_year, ~ extract_CI_data(.x, harvest_data, field_CI_long, season = year))
-#   
-#   return(data)
-# }
-
-
-
-#functions for CI_data_prep
-# create_mask_and_crop <- function(file, pivot_sf_q) {
-#   # file <- filtered_files[5]
-#   message("starting ", file)
-#   loaded_raster <- rast(file)
-#   names(loaded_raster) <- c("Red", "Green", "Blue", "NIR")
-#   #   names(CI) <- c("green","nir")
-#   message("raster loaded")
-#   
-#   # CI <- CI[[2]] / CI[[1]] - 1
-#   CI <- loaded_raster$NIR / loaded_raster$Green - 1
-#   
-#   loaded_raster$CI <- CI
-#   #   CI <- CI$nir/CI$green-1
-#   message("CI calculated")
-#   loaded_raster <- terra::crop(loaded_raster, pivot_sf_q, mask = TRUE) #%>% CI_func()
-#   
-#   loaded_raster[loaded_raster == 0] <- NA
-#   # names(v_crop) <- c("red", "green", "blue","nir", "cloud" ,"CI")
-#   # v_crop$CI <- v_crop$CI - 1
-#   new_file <- here(merged_final, paste0(tools::file_path_sans_ext(basename(file)), ".tif"))
-#   writeRaster(loaded_raster, new_file, overwrite = TRUE)
-#   
-#   vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
-#   terra::vrt(new_file, vrt_file, overwrite = TRUE)
-#   
-#   # v_crop <- mask_raster(v, pivot_sf_q)
-#   return(loaded_raster)
-# }
-
-
-# extract_rasters_daily <- function(file, field_geojson, quadrants = TRUE, save_dir) {
-#   # x <- rast(filtered_files[1])%>% CI_func(drop_layers = TRUE)
-#   # date <- date_extract(filtered_files[1])
-#   # field_geojson <- sf::st_as_sf(pivot_sf_q)
-#   field_geojson <- sf::st_as_sf(field_geojson)
-#   x <- rast(file[1])
-#   date <- date_extract(file)
-#   pivot_stats <- cbind(field_geojson, mean_CI = round(exactextractr::exact_extract(x$CI, field_geojson, fun = "mean"), 2)) %>%
-#     st_drop_geometry() %>% rename("{date}" := mean_CI)
-#   save_suffix <- if (quadrants){"quadrant"} else {"whole_field"}
-#   save_path <- here(save_dir, paste0("extracted_", date, "_", save_suffix, ".rds"))
-#   saveRDS(pivot_stats, save_path)
-# }
-
--- a/r_app/interpolate_growth_model.R
+++ b/r_app/interpolate_growth_model.R
@ -5,7 +5,7 @@ library(terra)
 library(tidyverse)
 library(lubridate)
 library(exactextractr)
-library(readxl)
+# library(readxl)


 # Vang alle command line argumenten op
@ -37,10 +37,13 @@ weekly_CI_mosaic <- here(laravel_storage_dir, "weekly_mosaic")
 daily_vrt <- here(data_dir, "vrt")
 harvest_dir <-  here(data_dir, "HarvestData")

+source(here("r_app/parameters_project.R"))
+source(here("r_app/ci_extraction_utils.R"))
+# source(here("r_app/mosaic_creation_utils.R"))
+
 source("parameters_project.R")
 source("ci_extraction_utils.R")

-
 pivot_stats2 <- readRDS(here(cumulative_CI_vals_dir,"combined_CI_data.rds")) %>%
    ungroup() %>%
    group_by(field, sub_field) %>%
@ -61,76 +64,74 @@ pivot_stats_long <- pivot_stats2 %>%
  #        sub_field = field) %>%
  unique()

-
-# #2021
-# pivot_select_model_Data_2021 <- harvesting_data %>% filter(Year == 2021) %>% pull(field)
-#
-# pivot_select_model_Data_2022 <- harvesting_data %>% filter(Year == 2022) %>% pull(field)
-
-# pivot_select_model_Data_2023 <- harvesting_data %>% filter(year == 2023) %>% filter(!is.na(season_start))  %>% pull(sub_field)
-
-pivot_select_model_Data_2024 <- harvesting_data %>% filter(year == 2024)%>% filter(!is.na(season_start)) %>% pull(sub_field)
-message(pivot_select_model_Data_2024)
-# pivots_dates_Data_2022 <- pivots_dates0 %>% filter(!is.na(season_end_2022))
-# pivot_select_model_Data_2022 <- unique(pivots_dates_Data_2022$pivot_quadrant )
-#
-# pivots_dates_Data_2023 <- pivots_dates0 %>% filter(!is.na(season_start_2023))
-# pivot_select_model_Data_2023 <- unique(pivots_dates_Data_2023$pivot_quadrant)
-#
-# pivots_dates_Data_2024 <- pivots_dates0 %>% filter(!is.na(season_start_2024))
-# pivot_select_model_Data_2024 <- unique(pivots_dates_Data_2024$pivot_quadrant)
+years <- harvesting_data %>% 
+  filter(!is.na(season_start)) %>% 
+  distinct(year) %>% 
+  pull(year)

 extract_CI_data <- function(field_names, harvesting_data, field_CI_data, season) {
-  #  field_names =  "Nandi A1a"
-  #  field_names =  "1.13A"
-  #  harvesting_data = harvesting_data
-  # field_CI_data = pivot_stats_long
-  #  season= 2024
-
+  # Filter harvesting data for the given season and field names
  filtered_harvesting_data <- harvesting_data %>%
-  #  na.omit() %>%
    filter(year == season, sub_field %in% field_names)
-
+  
+  # Filter field CI data for the given field names
  filtered_field_CI_data <- field_CI_data %>%
    filter(sub_field %in% field_names)
-   if (nrow(filtered_field_CI_data) == 0) {
-     return(data.frame())  # Return an empty data frame if no data is found
-   }
-
-  # CI <- map_df(field_names, ~ {
+  
+  # Return an empty data frame if no CI data is found
+  if (nrow(filtered_field_CI_data) == 0) {
+    return(data.frame())
+  }
+  
+  # Create a linear interpolation function for the CI data
  ApproxFun <- approxfun(x = filtered_field_CI_data$Date, y = filtered_field_CI_data$value)
  Dates <- seq.Date(ymd(min(filtered_field_CI_data$Date)), ymd(max(filtered_field_CI_data$Date)), by = 1)
  LinearFit <- ApproxFun(Dates)
-
-  CI <-  data.frame(Date = Dates, FitData = LinearFit) %>%
+  
+  # Combine interpolated data with the original CI data
+  CI <- data.frame(Date = Dates, FitData = LinearFit) %>%
    left_join(., filtered_field_CI_data, by = "Date") %>%
-    filter(Date > filtered_harvesting_data$season_start & Date < filtered_harvesting_data$season_end) %>%
+    filter(Date > filtered_harvesting_data$season_start & Date < filtered_harvesting_data$season_end)
+  
+  # If CI is empty after filtering, return an empty dataframe
+  if (nrow(CI) == 0) {
+    return(data.frame())
+  }
+  
+  # Add additional columns if data exists
+  CI <- CI %>%
    mutate(DOY = seq(1, n(), 1),
           model = paste0("Data", season, " : ", field_names),
           season = season,
-           sub_field = field_names)
-  # }) #%>%
-
+           subField = field_names)
+  
  return(CI)
 }

-## Extracting the correct CI values
-# Data_2021 <- map(pivot_select_model_Data_2021, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2021)) %>% list_rbind()
-# message('2021')
-# Data_2022 <- map(pivot_select_model_Data_2022, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2022)) %>% list_rbind()
-# message('2022')
-# Data_2023 <- map(pivot_select_model_Data_2023, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2023)) %>% list_rbind()
-# message('2023')
-head(harvesting_data)
-Data_2024 <- map(pivot_select_model_Data_2024, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2024)) %>% list_rbind()
-message('2024')
-head(Data_2024)


-#CI_all <- rbind(Data_2023, Data_2024)
-CI_all <-  Data_2024
-message('CI_all created')
-#CI_all <-  Data_2023
+CI_all <- map_df(years, function(yr) {
+  # yr = 2021
+  message(yr)
+  
+  # Get the fields harvested in this year
+  sub_fields <- harvesting_data %>%
+    filter(year == yr) %>%
+    filter(!is.na(season_start)) %>%
+    pull(sub_field)
+  
+  # Filter sub_fields to only include those with value data in pivot_stats_long
+  valid_sub_fields <- sub_fields %>%
+    keep(~ any(pivot_stats_long$sub_field == .x))
+  
+  # Extract data for each valid field
+  map(valid_sub_fields, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = yr)) %>%
+    list_rbind()
+})
+
+CI_all <- CI_all %>% group_by(model) %>% mutate(CI_per_day = FitData - lag(FitData),
+                                                cumulative_CI = cumsum(FitData))
+

 CI_all <- CI_all %>% group_by(model) %>% mutate(CI_per_day = FitData - lag(FitData),
                                                cumulative_CI = cumsum(FitData))
--- a/r_app/mosaic_creation.R
+++ b/r_app/mosaic_creation.R
@ -11,7 +11,7 @@ library(terra)
 library(tidyverse)
 library(lubridate)
 # library(exactextractr)
-library(readxl)
+# library(readxl)
 #funcion CI_prep package


@ -62,21 +62,24 @@ weekly_CI_mosaic <- here(laravel_storage_dir, "weekly_mosaic")
 daily_vrt <- here(data_dir, "vrt")
 harvest_dir <-  here(data_dir, "HarvestData")

+source(here("r_app/parameters_project.R"))
+source(here("r_app/mosaic_creation_utils.R"))
+
 source("parameters_project.R")
 source("mosaic_creation_utils.R")

-dir.create(here(laravel_storage_dir))
-dir.create(here(reports_dir))
-dir.create(here(data_dir))
-dir.create(here(extracted_CI_dir))
-dir.create(here(daily_CI_vals_dir))
-dir.create(here(cumulative_CI_vals_dir))
-dir.create(here(weekly_CI_mosaic))
-dir.create(here(daily_vrt))
-dir.create(merged_final)
-dir.create(harvest_dir)
+# dir.create(here(laravel_storage_dir))
+# dir.create(here(reports_dir))
+# dir.create(here(data_dir))
+# dir.create(here(extracted_CI_dir))
+# dir.create(here(daily_CI_vals_dir))
+# dir.create(here(cumulative_CI_vals_dir))
+# dir.create(here(weekly_CI_mosaic))
+# dir.create(here(daily_vrt))
+# dir.create(merged_final)
+# dir.create(harvest_dir)

-# end_date <- lubridate::dmy("20-6-2024")
+# end_date <- lubridate::dmy("28-8-2024")
 week <- week(end_date)


@ -101,78 +104,81 @@ vrt_list  <- map(dates$days_filter, ~ vrt_files[grepl(pattern = .x, x = vrt_file
  compact() %>%
  flatten_chr()

- raster_files_final <- list.files(merged_final,full.names = T, pattern = ".tif")
-if (length(vrt_list) > 0 ){
+raster_files_final <- list.files(merged_final,full.names = T, pattern = ".tif")
+
+if (length(vrt_list) > 0) {
  print("vrt list made, preparing mosaic creation by counting cloud cover")
-
-  total_pix_area <- rast(vrt_list[1]) %>% terra::subset(1) %>% setValues(1) %>%
+  
+  total_pix_area <-
+    rast(vrt_list[1]) %>% terra::subset(1) %>% setValues(1) %>%
    crop(field_boundaries, mask = TRUE) %>%
-    global(., fun="notNA") #%>%
-
+    global(., fun = "notNA") #%>%
+  
  layer_5_list <- purrr::map(vrt_list, function(vrt_list) {
    rast(vrt_list[1]) %>% terra::subset(1)
  }) %>% rast()
-
-  missing_pixels_count <- layer_5_list %>% global(., fun="notNA") %>%
+  
+  missing_pixels_count <-
+    layer_5_list %>% global(., fun = "notNA") %>%
    mutate(
      total_pixels = total_pix_area$notNA,
-      missing_pixels_percentage = round(100 -((notNA/total_pix_area$notNA)*100)),
+      missing_pixels_percentage = round(100 - ((
+        notNA / total_pix_area$notNA
+      ) * 100)),
      thres_5perc = as.integer(missing_pixels_percentage < 5),
      thres_40perc = as.integer(missing_pixels_percentage < 45)
    )
-    message('hier')
-  message(missing_pixels_count)
-  index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc)  )
+  
+  index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
  index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
-
+  
  ## Create mosaic
-
-  if(sum(missing_pixels_count$thres_5perc)>1){
+  
+  if (sum(missing_pixels_count$thres_5perc) > 1) {
    message("More than 1 raster without clouds (<5%), max composite made")
-
+    
    cloudy_rasters_list <- vrt_list[index_5perc]
-
+    
    rsrc <- sprc(cloudy_rasters_list)
    x <- terra::mosaic(rsrc, fun = "max")
    names(x) <-  c("Red", "Green", "Blue", "NIR", "CI")
-
-  }else if(sum(missing_pixels_count$thres_5perc)==1){
+    
+  } else if (sum(missing_pixels_count$thres_5perc) == 1) {
    message("Only 1 raster without clouds (<5%)")
-
+    
    x <- rast(vrt_list[index_5perc[1]])
    names(x) <-  c("Red", "Green", "Blue", "NIR", "CI")
-
-  }else if(sum(missing_pixels_count$thres_40perc)>1){
+    
+  } else if (sum(missing_pixels_count$thres_40perc) > 1) {
    message("More than 1 image contains clouds, composite made of <40% cloud cover images")
-
+    
    cloudy_rasters_list <- vrt_list[index_40perc]
-
+    
    rsrc <- sprc(cloudy_rasters_list)
    x <- mosaic(rsrc, fun = "max")
    names(x) <-  c("Red", "Green", "Blue", "NIR", "CI")
-
-  }else if(sum(missing_pixels_count$thres_40perc)==1){
+    
+  } else if (sum(missing_pixels_count$thres_40perc) == 1) {
    message("Only 1 image available but contains clouds")
-
+    
    x <- rast(vrt_list[index_40perc[1]])
    names(x) <-  c("Red", "Green", "Blue", "NIR", "CI")
-
-  }else if(sum(missing_pixels_count$thres_40perc)==0){
+    
+  } else if (sum(missing_pixels_count$thres_40perc) == 0) {
    message("No cloud free images available, all images combined")
-
+    
    rsrc <- sprc(vrt_list)
    x <- mosaic(rsrc, fun = "max")
    names(x) <-  c("Red", "Green", "Blue", "NIR", "CI")
-
+    
  }
-
+  
 } else{
-
  message("No images available this week, empty mosaic created")
-
+  
  x <- rast(raster_files_final[1]) %>% setValues(0) %>%
    crop(field_boundaries, mask = TRUE)
-
+  
  names(x) <-  c("Red", "Green", "Blue", "NIR", "CI")
 }
 plot(x$CI, main = paste("CI map ", dates$week))
--- a/r_app/mosaic_creation_utils.R
+++ b/r_app/mosaic_creation_utils.R
@ -11,207 +11,3 @@ date_list <- function(end_date, offset){
  
  return(list("week" = week, "year" = year, "days_filter" = days_filter))
 }
-
-# date_list <- function(weeks_in_the_paste){
-#   week <- week(Sys.Date()- weeks(weeks_in_the_paste) )
-#   year <- year(Sys.Date()- weeks(weeks_in_the_paste) )
-#   days_filter <- Sys.Date() - weeks(weeks_in_the_paste) - days(0:6)
-#
-#   return(c("week" = week,
-#            "year" = year,
-#            "days_filter" = list(days_filter)))
-#
-# }
-create_mask_and_crop <- function(file, field_boundaries) {
-  # file <- filtered_files[5]
-  message("starting ", file)
-  loaded_raster <- rast(file)
-  names(loaded_raster) <- c("Red", "Green", "Blue", "NIR")
-  #   names(CI) <- c("green","nir")
-  message("raster loaded")
-  
-  # CI <- CI[[2]] / CI[[1]] - 1
-  CI <- loaded_raster$NIR / loaded_raster$Green - 1
-  
-  loaded_raster$CI <- CI
-  #   CI <- CI$nir/CI$green-1
-  message("CI calculated")
-  loaded_raster <- terra::crop(loaded_raster, field_boundaries, mask = TRUE) #%>% CI_func()
-  
-  loaded_raster[loaded_raster == 0] <- NA
-  # names(v_crop) <- c("red", "green", "blue","nir", "cloud" ,"CI")
-  # v_crop$CI <- v_crop$CI - 1
-  new_file <- here(merged_final, paste0(tools::file_path_sans_ext(basename(file)), ".tif"))
-  writeRaster(loaded_raster, new_file, overwrite = TRUE)
-  
-  vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
-  terra::vrt(new_file, vrt_file, overwrite = TRUE)
-  
-  # v_crop <- mask_raster(v, pivot_sf_q)
-  return(loaded_raster)
-}
-
-CI_func <- function(x, drop_layers = FALSE){
-  CI <- x[[4]]/x[[2]]-1
-  add(x) <- CI
-  names(x) <- c("red", "green", "blue","nir", "cloud" ,"CI")
-  if(drop_layers == FALSE){
-    return(x)
-  }else{
-    return(x$CI)
-  }
-}
-
-mask_raster <- function(raster, fields){
-  # x <- rast(filtered_files[1])
-  x <- rast(raster) 
-  emtpy_or_full <- global(x, sum)
-  
-  if(emtpy_or_full[1,] >=  2000000){
-    names(x) <- c("red", "green", "blue","nir", "cloud")
-    cloud <- x$cloud
-    
-    cloud[cloud == 0 ] <- NA
-    x_masked <- mask(x, cloud, inverse = T) %>% crop(.,fields, mask = TRUE )
-    x_masked <-  x_masked %>% CI_func()
-    
-    message(raster, " masked")
-    return(x_masked)
-  }
-}
-
-date_extract <- function(file_path) {
-  str_extract(file_path, "\\d{4}-\\d{2}-\\d{2}")
-}
-
-
-
-right = function(text, num_char) {
-  substr(text, nchar(text) - (num_char-1), nchar(text))
-}
-
-# load_fields <- function(geojson_path) {
-#   field_geojson <- st_read(geojson_path) %>%
-#     select(pivot, pivot_quadrant) %>%
-#     vect()
-#   return(field_geojson)
-# }
-# 
-# load_harvest_data <- function(havest_data_path){
-#   harvest_data <- readRDS(havest_data_path)
-#   return(harvest_data)
-# }
-
-# load_rasters <- function(raster_path, dates) {
-#   raster_files <- list.files(raster_path, full.names = TRUE, pattern = ".tif")
-#   filtered_files <- map(dates$days_filter, ~ raster_files[grepl(pattern = .x, x = raster_files)]) %>%
-#     compact() %>%
-#     flatten_chr()
-#   
-#   return(filtered_files)
-# }
-
-# mask_and_set_names <- function(filtered_files, fields) {
-#   rasters_masked <- map(filtered_files, mask_raster, fields = fields) %>% set_names(filtered_files)
-#   rasters_masked[sapply(rasters_masked, is.null)] <- NULL
-#   rasters_masked <- setNames(rasters_masked, map_chr(names(rasters_masked), date_extract))
-#   
-#   return(rasters_masked)
-# }
-
-# calculate_total_pix_area <- function(filtered_files, fields_geojson) {
-#   # total_pix_area <- rast(filtered_files[1]) %>%
-#   #   subset(1) %>%
-#   #   crop(fields_geojson, mask = TRUE)%>%
-#   #   global(.data, fun = "notNA")
-#   total_pix_area <- rast(filtered_files[1]) %>% subset(1) %>% crop(fields_geojson, mask = TRUE) %>% freq(., usenames = TRUE)
-#   
-#   return(total_pix_area)
-# }
-
-# cloud_layer_extract <- function(rasters_masked){
-#   cloud_layer_rast <- map(rasters_masked, function(spatraster) {
-#     spatraster[[5]]
-#   }) %>% rast()
-#   
-#   return(cloud_layer_rast)
-# }
-
-# calculate_cloud_coverage <- function(cloud_layer_rast, total_pix_area) {
-#   cloud_perc_list <- freq(cloud_layer_rast, usenames = TRUE) %>%
-#     mutate(cloud_perc = (100 -((count/total_pix_area$count)*100)),
-#            cloud_thres_5perc = as.integer(cloud_perc < 5),
-#            cloud_thres_40perc = as.integer(cloud_perc < 40)) %>%
-#     rename(Date = layer) %>% select(-value, -count)
-#   
-#   cloud_index_5perc <- which(cloud_perc_list$cloud_thres_5perc == max(cloud_perc_list$cloud_thres_5perc))
-#   cloud_index_40perc <- which(cloud_perc_list$cloud_thres_40perc == max(cloud_perc_list$cloud_thres_40perc))
-#   
-#   return(list(cloud_perc_list = cloud_perc_list, cloud_index_5perc = cloud_index_5perc, cloud_index_40perc = cloud_index_40perc))
-# }
-
-process_cloud_coverage <- function(cloud_coverage, rasters_masked) {
-  if (sum(cloud_coverage$cloud_perc_list$cloud_thres_5perc) > 1) {
-    message("More than 1 raster without clouds (<5%), max mosaic made ")
-    
-    cloudy_rasters_list <- rasters_masked[cloud_coverage$cloud_index_5perc]
-    rsrc <- sprc(cloudy_rasters_list)
-    x <- mosaic(rsrc)
-    names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-    
-  } else if (sum(cloud_coverage$cloud_perc_list$cloud_thres_5perc) == 1) {
-    message("Only 1 raster without clouds (<5%)")
-    
-    x <- rast(rasters_masked[cloud_coverage$cloud_index_5perc[1]])
-    names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-    
-  } else if (sum(cloud_coverage$cloud_perc_list$cloud_thres_40perc) > 1) {
-    message("More than 1 image contains clouds, composite made of <40% cloud cover images")
-    
-    cloudy_rasters_list <- rasters_masked[cloud_coverage$cloud_index_40perc]
-    rsrc <- sprc(cloudy_rasters_list)
-    x <- mosaic(rsrc)
-    names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-    
-  } else if (sum(cloud_coverage$cloud_perc_list$cloud_thres_40perc) == 0) {
-    message("No cloud free images available")
-    x <-  rast(rasters_masked[1])
-    x[x] <- NA
-    names(x) <-  c("red", "green", "blue", "nir", "cloud", "CI")
-  }
-  
-  return(x)
-}
-
-
-#functions for CI_data_prep
-# create_mask_and_crop <- function(file, pivot_sf_q) {
-#   # file <- filtered_files[5]
-#   message("starting ", file)
-#   loaded_raster <- rast(file)
-#   names(loaded_raster) <- c("Red", "Green", "Blue", "NIR")
-#   #   names(CI) <- c("green","nir")
-#   message("raster loaded")
-#   
-#   # CI <- CI[[2]] / CI[[1]] - 1
-#   CI <- loaded_raster$NIR / loaded_raster$Green - 1
-#   
-#   loaded_raster$CI <- CI
-#   #   CI <- CI$nir/CI$green-1
-#   message("CI calculated")
-#   loaded_raster <- terra::crop(loaded_raster, pivot_sf_q, mask = TRUE) #%>% CI_func()
-#   
-#   loaded_raster[loaded_raster == 0] <- NA
-#   # names(v_crop) <- c("red", "green", "blue","nir", "cloud" ,"CI")
-#   # v_crop$CI <- v_crop$CI - 1
-#   new_file <- here(merged_final, paste0(tools::file_path_sans_ext(basename(file)), ".tif"))
-#   writeRaster(loaded_raster, new_file, overwrite = TRUE)
-#   
-#   vrt_file <- here(daily_vrt, paste0(tools::file_path_sans_ext(basename(file)), ".vrt"))
-#   terra::vrt(new_file, vrt_file, overwrite = TRUE)
-#   
-#   # v_crop <- mask_raster(v, pivot_sf_q)
-#   return(loaded_raster)
-# }
-
-
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@ -1,111 +1,38 @@

 library('readxl')
 #chemba
-if(project_dir == "chemba1"){
-  message("Yield data for Chemba")

-  
-  field_boundaries_sf <- st_read(here(data_dir, "pivot.geojson")) %>% dplyr::select(pivot, pivot_quadrant)%>% 
-    mutate(sub_area = case_when(pivot %in% c("1.1", "1.2", "1.3", "1.4", "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12", "1.13", "1.14" , "1.16" , "1.17" , "1.18" , "6.1", "6.2", "DL1.1", "DL1.3") ~ "estate",
-                                TRUE ~ "Cooperative")) 
-  names(field_boundaries_sf) <- c("field", "sub_field", "geometry", "sub_area")
-  
-  field_boundaries <- field_boundaries_sf %>% vect()
-  names(field_boundaries) <- c("field", "sub_field", "sub_area")
-  
-  # field_boundaries_merged <- st_read(here(data_dir, "pivot.geojson")) %>% dplyr::select(pivot, pivot_quadrant)   %>% group_by(pivot) %>% summarise() %>% vect()
-  
-  joined_spans <-st_read(here(data_dir, "span.geojson")) %>% st_transform(crs(field_boundaries_sf))
-  names(joined_spans) <- c("field", "area", "radius", "spans", "span", "geometry")
-  
-  
-  pivots_dates0 <- readRDS(here(harvest_dir, "harvest_data_new")) %>% filter(
-    pivot %in% c("1.1", "1.2", "1.3", "1.4", "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12", "1.13",
-                 "1.14" , "1.16" , "1.17" , "1.18" ,"2.1", "2.2", "2.3" , "2.4", "2.5", "3.1", "3.2", "3.3",
-                 "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "5.1" ,"5.2", "5.3", "5.4", "6.1", "6.2", "DL1.1", "DL1.3")
+field_boundaries_sf <- st_read(here(data_dir, "pivot.geojson"))
+
+names(field_boundaries_sf) <- c("field", "sub_field", "geometry")
+field_boundaries <- field_boundaries_sf %>% terra::vect()
+
+harvesting_data <- read_excel(here(data_dir, "harvest.xlsx")) %>%
+  dplyr::select(
+    c(
+      "field",
+      "sub_field",
+      "year",
+      "season_start",
+      "season_end",
+      "age",
+      "sub_area",
+      "tonnage_ha"
+    )
+  ) %>%
+  mutate(
+    field = as.character(field),
+    sub_field = as.character(sub_field),
+    year = as.numeric(year),
+    season_start = as.Date(season_start),
+    season_end = as.Date(season_end),
+    age = as.numeric(age),
+    sub_area = as.character(sub_area),
+    tonnage_ha = as.numeric(tonnage_ha)
+  ) %>%
+  mutate(
+    season_end = case_when(season_end > Sys.Date() ~ Sys.Date(),
+                           TRUE ~ season_end),
+    age = round(as.numeric(season_end - season_start) / 7, 0)
  )
-  
-  harvesting_data <- pivots_dates0 %>%
-    dplyr::select(c("pivot_quadrant", "pivot", "season_start_2021", "season_end_2021", "season_start_2022", 
-                    "season_end_2022", "season_start_2023", "season_end_2023", "season_start_2024", "season_end_2024", "Age")) %>%
-    pivot_longer(cols = starts_with("season"), names_to = "Year", values_to = "value") %>%
-    separate(Year, into = c("name", "Year"), sep = "(?<=season_start|season_end)\\_", remove = FALSE) %>%
-    mutate(name = str_to_title(name)) %>%
-    pivot_wider(names_from = name, values_from = value) %>%
-    rename(Field = pivot,
-           subField = pivot_quadrant) 
-  
-  
-  
-  
-  
-  
-  
-  
-} else if (project_dir == "xinavane"){
-  library(readxl)
-  message("Yield data for Xinavane")
-  field_boundaries_sf <- st_read(here(data_dir, "pivot.geojson")) %>% dplyr::select(-Pivot)
-  names(field_boundaries_sf) <- c("field", "sub_field", "geometry")
-  
-  field_boundaries <- field_boundaries_sf %>% vect() 
-  
-  joined_spans <- field_boundaries_sf %>% dplyr::select(Field)
-  
-  pivots_dates0 <- read_excel(here(harvest_dir, "Yield data.xlsx"), 
-                              skip = 3,
-                              col_types = c("numeric", "text", "skip", "text", "numeric", "numeric", "numeric", "numeric", "date", 
-                              "numeric", "skip", "numeric")) %>%
-    rename(
-      year = 1,
-      field = 2,
-      sub_area = 3,
-      hectares = 4,
-      tons = 5,
-      tcha = 6,
-      tchy = 7, 
-      season_end = 8,
-      age = 9,
-      ratoon = 10
-    ) %>%
-    mutate(Season_end = ymd(season_end),
-           Season_start = as.Date(season_end - (duration(months = age))),
-           subField = Field) #don't forget to add 2022 as a year for the 'curent' season
-  
-  pivots_dates0 <- pivots_dates0 %>%
-    mutate(year = year + 6)
-  
-  # Add 6 years to Season_end column
-  pivots_dates0 <- pivots_dates0 %>%
-    mutate(season_end = season_end + years(6))
-  
-  # Add 6 years to Season_start column
-  pivots_dates0 <- pivots_dates0 %>%
-    mutate(season_start = season_start + years(6))
-  
-  
-  harvesting_data <- pivots_dates0 %>% dplyr::select(c("field","sub_field", "year", "season_start","season_end", "age" , "sub_area", "tcha"))
-
-  
-} else {
- field_boundaries_sf <- st_read(here(data_dir, "pivot.geojson"))
- head(field_boundaries_sf)
-   names(field_boundaries_sf) <- c("field", "sub_field", "geometry")
-   field_boundaries <- field_boundaries_sf %>% terra::vect()
-   harvesting_data <- read_excel(here(data_dir, "harvest.xlsx")) %>%
-   dplyr::select(c("field", "sub_field", "year", "season_start", "season_end", "age", "sub_area", "tonnage_ha")) %>%
-       mutate(field = as.character(field),
-              sub_field = as.character(sub_field),
-              year = as.numeric(year),
-              season_start = as.Date(season_start),
-              season_end = as.Date(season_end),
-              age = as.numeric(age),
-              sub_area = as.character(sub_area),
-              tonnage_ha = as.numeric(tonnage_ha)
-              ) %>%
-       mutate(season_end = case_when(
-         season_end > Sys.Date() ~ Sys.Date(),
-         TRUE ~ season_end),
-         age = round(as.numeric(season_end - season_start)/7,0))
-}