added CI graphs to pdf

2023-12-12 16:37:47 +01:00 · 2023-12-12 16:37:47 +01:00 · bd4ac224ad
parent 36d74ed2af
commit bd4ac224ad
6 changed files with 143 additions and 118 deletions
--- a/data.xlsx
+++ b/data.xlsx
--- a/r_app/1_harvest_data_EcoFarm_v2.R
+++ b/r_app/1_harvest_data_EcoFarm_v2.R
@ -8,7 +8,7 @@ library(lubridate)
 library(readxl)

 #create directory
-storage_dir <- here("laravel_app/storage/app")
+storage_dir <- here("../laravel_app/storage/app")
 data_dir <- here(storage_dir, "Data")
 harvest_dir <-  here(data_dir, "HarvestData")

@ -41,39 +41,50 @@ excel_file_name = here(storage_dir,"harvesting_data", "Current - Pivots planting
 #get dates in same column

 dates <- read_excel(excel_file_name,
-                    skip=1,
+                    skip = 2,
                    col_types = c("text", "numeric", "text", "date", "numeric", "numeric", "numeric",
                                  "date", "numeric", "skip", "skip", "numeric", "numeric", "numeric","skip", #2020 harvesting data
                                  "date", "numeric", "skip", "skip", "numeric", "numeric", "numeric","skip", #2021 harvesting data
-                                  "date", "numeric", "skip", "skip", "numeric", "skip", "numeric","skip", #2022 harvesting data
-                                  "date", "numeric", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip",  "numeric", "numeric","numeric","skip", #2023
-                                  "skip", "skip", "skip", "skip", "skip", "skip","skip", "skip")) %>% #empty columns
-  rename(pivot_quadrant = PIVOT,
-         planting_date = `Date planted`,
-         Age = `Age (WEEK)`,
-         Year_replanted = `Year replanted`,
-         Harvesting_date_2020 = `2020 Harvest data`,
-         Harvesting_age_2020 = `...9`,
-         MT_weight_2020 = `...10`,
-         Tcha_2020 = `...11`,
-         Tchm_2020 = `...12`,
-         Harvesting_date_2021 = `2021/2022 Harvest data`,
-         Harvesting_age_2021 = `...14`,
-         MT_weight_2021 = `...15`,
-         Tcha_2021 = `...16`,
-         Tchm_2021 = `...17`,
-         Harvesting_date_2022 = `2022/2023 Harvest data`,
-         Harvesting_age_2022 = `...19`,
-         MT_weight_2022 = `...20`,
-         Tcha_2022 = `...21`) %>%
+                                  "date", "numeric", "skip", "skip", "numeric", "skip", "numeric","numeric", "skip", #2022 harvesting data
+                                  "date", "numeric", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip", "skip",  "numeric", "numeric","numeric","skip", #2023 harvesting data
+                                  "skip", "skip", "skip", "skip", "skip")) %>% #empty columns
+  rename(pivot_quadrant = 1,
+         area = 2,
+         variety = 3,
+         planting_date = 4,
+         Age = 5,
+         ratoon = 6,
+         Year_replanted = 7,
+         Harvesting_date_2020 = 8,
+         Harvesting_age_2020 = 9,
+         MT_weight_2020 = 10,
+         Tcha_2020 = 11,
+         Tchm_2020 = 12,
+         Harvesting_date_2021 = 13,
+         Harvesting_age_2021 = 14,
+         MT_weight_2021 = 15,
+         Tcha_2021 = 16,
+         Tchm_2021 = 17,
+         Harvesting_date_2022 = 18,
+         Harvesting_age_2022 = 19,
+         MT_weight_2022 = 20,
+         Tcha_2022 = 21,
+         Tchm_2022 = 22,
+         Harvesting_date_2023 = 23,
+         Harvesting_age_2023 = 24,
+         MT_weight_2023 = 25,
+         Tcha_2023 = 26,
+         Tchm_2023 = 27,
+  ) %>%
  
-  slice(-1) %>% #select(-age) %>%
-  filter(pivot_quadrant != "Total") %>% #drop_na(pivot_quadrant) %>%
+  # slice(-1) %>% #select(-age) %>%
+  # filter(pivot_quadrant != "Total") %>% #drop_na(pivot_quadrant) %>%
  mutate(planting_date          = ymd(planting_date         ),
         Harvesting_date_2020 = ymd(Harvesting_date_2020),
         Harvesting_date_2021 = ymd(Harvesting_date_2021),
         Harvesting_date_2022= ymd(Harvesting_date_2022),
-         Age = round(Age,0)) %>% filter(pivot_quadrant != "Total/Average")
+         Age = round(Age,0)) %>% filter(pivot_quadrant != "Total/Average")%>%
+  filter(pivot_quadrant != "Total")

 #copy each row and add ABCD
 quadrants <- dates %>% slice(rep(1:n(), each=4)) %>%
@ -168,12 +179,17 @@ quadrants2 <- quadrants %>%
    ),
    season_end_2022 = case_when(!is.na(Harvesting_date_2022) & !is.na(season_start_2022) ~ Harvesting_date_2022),
    
-    season_start_2023 = case_when(Ratoons == 0  ~  planting_date,
+    season_start_2023 = case_when(ratoon == 0  ~  planting_date,
                                  TRUE ~ Harvesting_date_2022),
    season_start_2023 = case_when(is.na(Harvesting_date_2022) ~  Harvesting_date_2021,
                                  TRUE ~ season_start_2023),
-    season_end_2023 = case_when(!is.na(season_start_2023) ~ ymd(Sys.Date()))
    
+    season_end_2023 = case_when(!is.na(Harvesting_date_2023) ~ Harvesting_date_2023,
+                                TRUE ~  ymd(Sys.Date())),
+    
+    season_start_2024 = case_when(!is.na(Harvesting_date_2023) ~ Harvesting_date_2023,
+                                  TRUE ~ NA),
+    season_end_2024 = case_when(!is.na(season_start_2024) ~ ymd(Sys.Date())),
  )

 saveRDS(quadrants2, here(harvest_dir, "harvest_data_new"))
--- a/r_app/2_CI_data_prep.R
+++ b/r_app/2_CI_data_prep.R
@ -38,7 +38,7 @@ dir.create(merged_final)


 # Creating weekly mosaic
-dates <- date_list(2)
+dates <- date_list(0)

 #load pivot geojson
 pivot_sf_q <- st_read(here( "pivot_20210625.geojson")) %>% dplyr::select(pivot, pivot_quadrant) %>% vect()
@ -252,7 +252,7 @@ pivots_dates0 <- readRDS(here(harvest_dir, "harvest_data_new")) %>% filter(
 #   separate(pivot_quadrant, into = c("name", "Year"), sep = "\\.")

 harvesting_data <- pivots_dates0 %>%
-  select(c("pivot_quadrant", "season_start_2021", "season_end_2021", "season_start_2022", "season_end_2022", "season_start_2023", "season_end_2023")) %>%
+  select(c("pivot_quadrant", "season_start_2021", "season_end_2021", "season_start_2022", "season_end_2022", "season_start_2023", "season_end_2023", "season_start_2024", "season_end_2024")) %>%
  pivot_longer(cols = starts_with("season"), names_to = "Year", values_to = "value") %>%
  separate(Year, into = c("name", "Year"), sep = "(?<=season_start|season_end)\\_", remove = FALSE) %>%
  mutate(name = str_to_title(name)) %>%
@ -273,12 +273,14 @@ pivot_stats <- extracted_values %>%
  summarise(across(everything(), ~ first(na.omit(.))))


- saveRDS(pivot_stats, here(cumulative_CI_vals_dir,"combined_CI_data.rds")) #used to save the rest of the data into one file
+ #saveRDS(pivot_stats, here(cumulative_CI_vals_dir,"combined_CI_data.rds")) #used to save the rest of the data into one file


 #load historic CI data and update it with last week of CI data
 combined_CI_data <- readRDS(here(cumulative_CI_vals_dir,"combined_CI_data.rds")) %>% drop_na(pivot_quadrant)
 pivot_stats2 <- bind_rows(pivot_stats, combined_CI_data)
+# pivot_stats2 <- purrr::map(list.files(here(daily_CI_vals_dir), full.names = TRUE, pattern = "quadrant"), readRDS) %>% list_rbind() %>% group_by(pivot_quadrant) %>%
+#   summarise(across(everything(), ~ first(na.omit(.))))

 pivots_data_present <- unique(pivots_dates0$pivot_quadrant)
 quadrant_list <- pivots_data_present
@ -305,23 +307,30 @@ pivot_select_model_Data_2022 <- unique(pivots_dates_Data_2022$pivot_quadrant )
 pivots_dates_Data_2023 <- pivots_dates0 %>% filter(!is.na(season_start_2023))
 pivot_select_model_Data_2023 <- unique(pivots_dates_Data_2023$pivot_quadrant)
 # #2024
-#pivots_dates_Data_2024 <- pivots_dates0 %>% filter(!is.na(season_start_2024))
-#pivot_select_model_Data_2024 <- unique(pivots_dates_Data_2024$pivot_quadrant)
+pivots_dates_Data_2024 <- pivots_dates0 %>% filter(!is.na(season_start_2024))
+pivot_select_model_Data_2024 <- unique(pivots_dates_Data_2024$pivot_quadrant)

 ## Extracting the correct CI values
 #Data_2021 <- map(pivot_select_model_Data_2021, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2021)) %>% list_rbind()
-#Data_2022 <- map(pivot_select_model_Data_2022, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2022)) %>% list_rbind()
+message('2021')
+Data_2022 <- map(pivot_select_model_Data_2022, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2022)) %>% list_rbind()
+message('2022')
 Data_2023 <- map(pivot_select_model_Data_2023, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2023)) %>% list_rbind()
-#Data_2024 <- map_dfr(pivot_select_model_Data_2024, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2024))
+message('2023')
+Data_2024 <- map(pivot_select_model_Data_2024, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = pivot_stats_long, season = 2024)) %>% list_rbind()
+message('2024')


-#CI_all <- rbind(Data_2021, Data_2022, Data_2023)
-CI_all <-  Data_2023
+CI_all <- rbind(Data_2022, Data_2023, Data_2024)
+message('CI_all created')
+#CI_all <-  Data_2023

 CI_all <- CI_all %>% group_by(model) %>% mutate(CI_per_day = FitData - lag(FitData),
                                                cumulative_CI = cumsum(FitData))
+message('CI_all cumulative')

 head(CI_all)
-
+message('show head')

 saveRDS(CI_all, here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
+message('rds saved')
--- a/r_app/CI_report_dashboard_planet.Rmd
+++ b/r_app/CI_report_dashboard_planet.Rmd
@ -282,7 +282,7 @@ cum_ci_plot <- function(pivotName){

  g <- ggplot() +
          facet_wrap(~season, scales = "free_x") +
-          geom_line(data= data_ci2 %>% filter(season %in% unique_seasons), aes(Date, mean_rolling10, col = pivot_quadrant)) +
+          geom_line(data= data_ci2 %>% filter(season %in% unique_seasons), aes(Date, mean_rolling10, col = Field)) +
          geom_line(data= perfect_pivot, aes(Date , mean_rolling10, col = "Model CI (p5.1 Data 2022, \n date x axis is fictive)"), lty="11",size=1) +
          labs(title = paste("14 day rolling MEAN CI rate - Pivot ", pivotName))+
          # scale_y_continuous(limits=c(0.5,3), breaks = seq(0.5, 3, 0.5))+
@ -359,7 +359,7 @@ pivots <- AllPivots_merged %>% filter(pivot != "1.17")

 for(i in pivots$pivot) {
     ci_plot(i)
-     #cum_ci_plot(i)
+     cum_ci_plot(i)
    }

 # lapply(pivots, function(pivot) {
@ -436,7 +436,7 @@ CI_and_yield <- left_join(CI_quadrant , pivots_dates0, by = c("pivot", "pivot_qu
  mutate(CI_per_day = cumulative_CI/ DOY)

 set.seed(20)
-CI_and_yield_split <- initial_split(CI_and_yield, prop = 0.75, weight = pivot_quadrant)
+CI_and_yield_split <- initial_split(CI_and_yield, prop = 0.75, strata = pivot_quadrant)
 CI_and_yield_test <- training(CI_and_yield_split)
 CI_and_yield_validation <- testing(CI_and_yield_split)

--- a/r_app/Rplots.pdf
+++ b/r_app/Rplots.pdf
--- a/runcane.sh
+++ b/runcane.sh
@ -14,7 +14,7 @@
 # Kopieer de excel file met harvesting data en maak directory aan indien nodig
 cd r_app
 #Rscript 1_harvest_data_EcoFarm_v2.R
-#Rscript 2_CI_data_prep.R
+Rscript 2_CI_data_prep.R
 #
 ## Runnen van Rmd bestand
 # -e betekent "evalueren" en -i specificeert de input file