SmartCane/r_app/CI_report_dashboard_planet.Rmd

---
params:
  ref: "word-styles-reference-var1.docx"
  output_file: CI_report.docx
  report_date: "2024-04-18"
  data_dir: "Sony"
output:
 # html_document:
 #   toc: yes
 #   df_print: paged
  word_document:
    reference_docx: !expr file.path("word-styles-reference-var1.docx")
    toc: yes
editor_options:
  chunk_output_type: console
---

```{r setup, include=FALSE}
#set de filename van de output
# knitr::opts_chunk$set(echo = TRUE)
# output_file <- params$output_file
report_date <- params$report_date
#
#
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# Optioneel: Herstel de omgeving als dat nodig is
# Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
```

```{r libraries, message=FALSE, warning=FALSE, include=FALSE}
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(here)
library(sf)
library(tidyverse)
library(tmap)
library(lubridate)
library(exactextractr)
library(zoo)
library(raster)
library(terra)
library(rsample)
library(caret)
library(randomForest)
library(CAST)


```

```{r directories, message=FALSE, warning=FALSE, include=FALSE}
project_dir <- params$data_dir
# laravel_storage_dir <- here("laravel_app/storage/app/",params$data_dir)
laravel_storage_dir <- here("laravel_app/storage/app",project_dir)

data_dir <- here(laravel_storage_dir, "Data")
# message('DATA_DIR',data_dir)
extracted_CI_dir <- here(data_dir, "extracted_ci")
daily_CI_vals_dir <- here(extracted_CI_dir, "daily_vals")
cumulative_CI_vals_dir <- here(extracted_CI_dir, "cumulative_vals")
harvest_dir <-  here(data_dir, "HarvestData")

weekly_CI_mosaic <- here(laravel_storage_dir, "weekly_mosaic")

source(here("r_app", "parameters_project.R"))
# s2_dir <- "C:/Users/timon/Resilience BV/4002 CMD App - General/4002 CMD Team/4002 TechnicalData/04 WP2 technical/python/chemba_S2/"
```


```{r week, message=FALSE, warning=FALSE, include=FALSE}
today <- as.character(report_date)
week <- week(today)
# week <- 25
# today = "2024-06-21"


#today = as.character(Sys.Date())
#week = lubridate::week(Sys.time())
## week = 26
#title_var <- paste0("CI dashboard week ", week, " - all pivots dashboard using 3x3 meter resolution")
subtitle_var <- paste("Report generated on", Sys.Date())

today_minus_1 <- as.character(ymd(today) - 7)
today_minus_2 <- as.character(ymd(today) - 14)
today_minus_3 <- as.character(ymd(today) - 21)

week <- sprintf("%02d", week)
week_minus_1 <- sprintf("%02d", week(today_minus_1))
week_minus_2 <- sprintf("%02d", week(today_minus_2))
week_minus_3 <- sprintf("%02d", week(today_minus_3))

year = year(today)
year_1 = year(today_minus_1)
year_2 = year(today_minus_2)
year_3 = year(today_minus_3)

```

`r subtitle_var`

\pagebreak
# Explanation of the maps

This PDF-dashboard shows the status of your fields on a weekly basis. We will show this in different ways:

1)	The first way is with a general overview of field heterogeneity using ‘variation’ – a higher number indicates more differences between plants in the same field.
2)	The second map shows a normal image of the latest week in colour, of the demo fields.
3)	Then come the maps per field, which show the status for three weeks ago, two weeks ago, last week, and this week, as well as a percentage difference map between last week and this week. The percentage difference maps shows the relative difference in growth over the last week, with positive numbers showing growth, and negative numbers showing decline.
4)  Below the maps are graphs that show how each pivot quadrant is doing, measured through the chlorophyll index.

```{r data, message=TRUE, warning=TRUE, include=FALSE}
# get latest CI index

# remove_pivots <- c("1.1", "1.12", "1.8", "1.9", "1.11", "1.14")
CI_quadrant <- readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds"))# %>%
 # rename(pivot_quadrant = field)

path_to_week_current = here(weekly_CI_mosaic, paste0("week_",week, "_", year, ".tif"))
path_to_week_minus_1 = here(weekly_CI_mosaic, paste0("week_",week_minus_1, "_", year_1, ".tif"))
path_to_week_minus_2 = here(weekly_CI_mosaic, paste0("week_",week_minus_2, "_", year_2, ".tif"))
path_to_week_minus_3 = here(weekly_CI_mosaic, paste0("week_",week_minus_3, "_", year_3, ".tif"))

print("required mosaic paths")
print(path_to_week_current)
print(path_to_week_minus_1)
print(path_to_week_minus_2)
print(path_to_week_minus_3)

CI    <- brick(path_to_week_current) %>% subset("CI")
CI_m1 <- brick(path_to_week_minus_1) %>% subset("CI")
CI_m2 <- brick(path_to_week_minus_2) %>% subset("CI")
CI_m3 <- brick(path_to_week_minus_3) %>% subset("CI")


# last_week_dif_raster <- ((CI - CI_m1) / CI_m1) * 100
last_week_dif_raster_abs <- (CI - CI_m1)
```
```{r data_129, message=TRUE, warning=TRUE, include=FALSE}
three_week_dif_raster_abs <- (CI - CI_m3)
```
```{r data_132, message=TRUE, warning=TRUE, include=FALSE}

# AllPivots0 <-st_read(here(data_dir_project, "pivot.geojson"))

# AllPivots0$pivot  <- factor(AllPivots0$pivot, levels = c("1.1", "1.2", "1.3", "1.4", "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12", "1.13", "1.14" , "1.16" , "1.17" , "1.18" ,"2.1", "2.2", "2.3" , "2.4", "2.5", "3.1", "3.2", "3.3", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "5.1" ,"5.2", "5.3", "5.4", "6.1", "6.2", "DL1.1", "DL1.3"))
AllPivots0 <- field_boundaries_sf

# joined_spans <-st_read(here(data_dir_project, "span.geojson")) %>% st_transform(crs(AllPivots0))

# pivots_dates <- readRDS(here(harvest_dir, "harvest_data_new"))
# pivots_dates$pivot  <- factor(pivots_dates$pivot, levels = c("1.1", "1.2", "1.3", "1.4", "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12", "1.13", "1.14" , "1.16" , "1.17" , "1.18" ,"2.1", "2.2", "2.3" , "2.4", "2.5", "3.1", "3.2", "3.3", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "5.1" ,"5.2", "5.3", "5.4", "6.1", "6.2", "DL1.1", "DL1.3"))

#AllPivots <- merge(AllPivots0, harvesting_data, by = c("field", "sub_field")) #%>%
#rename(field = pivot, sub_field = pivot_quadrant) #%>% select(-pivot.y)
#head(AllPivots)

#AllPivots_merged <- AllPivots %>% #dplyr::select(field, sub_field, sub_area) %>% unique() %>%
#   group_by(field) %>% summarise(sub_area = first(sub_area))

#AllPivots_merged <- st_transform(AllPivots_merged, crs = proj4string(CI))

#pivot_names <- unique(CI_quadrant$field)

```


```{r eval=FALSE, include=FALSE}
pivot_stats_q <- cbind(AllPivots0, round(exact_extract(CI, AllPivots0, c("coefficient_of_variation", "mean"), default_value = -9999),2)) %>%
  st_drop_geometry() %>% as_tibble()

hetero_pivots0 <- merge(AllPivots, pivot_stats_q %>% dplyr::select(-hectares, -radius, -pivot), by = "pivot_quadrant")

hetero_pivots <- hetero_pivots0 %>% #dplyr::filter(variable %in% "CV") %>%
  mutate(class = case_when(coefficient_of_variation       <= 0.001  ~ "Missing data",
                           coefficient_of_variation       >= 0.002 & coefficient_of_variation      < 0.1  ~ "Homogeneous",
                           coefficient_of_variation      >= 0.1 & coefficient_of_variation       < 0.2~ "Somewhat Homogeneous",
                           coefficient_of_variation       >= 0.2 & coefficient_of_variation       < 0.4 ~ "Somewhat Heterogeneous",
                           coefficient_of_variation      >= 0.4  ~ "Heterogeneous")) %>%
  mutate(class = as.factor(class))
# hetero_pivots %>% filter(pivot == "1.3")
hetero_pivots$class <- factor(hetero_pivots$class, levels = c("Missing data",  "Homogeneous","Somewhat Homogeneous", "Somewhat Heterogeneous", "Heterogeneous" ))
# hetero_pivots %>% select(pivot_quadrant, class, Age, coefficient_of_variation   , mean) %>% view()

# hetero_pivots %>% filter(class == "Somewhat Heterogeneous" ) %>% select(pivot_quadrant, class, Age, coefficient_of_variation   , mean)


Mypal <- c('#dcdcdc', '#008000','#8db600','#FFC300','#F22222')

hetero_plot <- function(data){
  # map <-
    tm_shape(data) + tm_polygons(col = "class", palette=Mypal) +
  tm_text("pivot_quadrant", size = 1/2) +
  tm_layout(main.title=paste0("Homogeneity of pivot quadrants, week ", week,  " 2022"),main.title.position = "center")+
  tm_compass(position = c("center", "top")) +
  tm_scale_bar(position = c("center", "top"))

  # print(map, width = 20, units = "cm")

}

```
\newpage
```{r eval=FALSE, fig.height=7, fig.width=10, message=FALSE, warning=FALSE, include=FALSE}
hetero_plot(hetero_pivots)
```


```{r function, message=FALSE, warning=FALSE, include=FALSE}

subchunkify <- function(g, fig_height=7, fig_width=5) {
  g_deparsed <- paste0(deparse(
    function() {g}
  ), collapse = '')

  sub_chunk <- paste0("
`","``{r sub_chunk_", floor(runif(1) * 10000), ", fig.height=", fig_height, ", fig.width=", fig_width, ", echo=FALSE}",
  "\n(",
    g_deparsed
    , ")()",
  "\n`","``
  ")

  cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE))
}

create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age){
  tm_shape(pivot_raster, unit = "m")+
  tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf), palette = "RdYlGn",legend.is.portrait = legend_is_portrait ,midpoint = NA) +
  tm_layout(main.title = paste0("\nMax CI week ", week,"\n",  age, " weeks old"),
            main.title.size = 0.7, legend.show = show_legend) # +
  # tm_shape(pivot_shape) +
  # tm_borders(lwd = 3) +  tm_text("sub_field", size = 1/2) +
  # tm_shape(pivot_spans) + tm_borders(lwd = 0.5, alpha=0.5)
}

create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age){
  tm_shape(pivot_raster, unit = "m")+
  tm_raster(breaks = c(-3,-2,-1,0,1,2, 3), palette = "RdYlGn",legend.is.portrait = legend_is_portrait ,midpoint = 0, title = "CI difference") +
  tm_layout(main.title =  paste0("CI change week ", week_1, "- week ",week_2, "\n", age," weeks old"),
            main.title.size = 0.7, legend.show = show_legend) # +
 # tm_shape(pivot_shape) +
 # tm_borders(lwd = 3) +  tm_text("sub_field", size = 1/2) +
 # tm_shape(pivot_spans) + tm_borders(lwd = 0.5, alpha=0.5)
}

ci_plot <- function(pivotName){
   # pivotName = "1.1"
   pivotShape <- AllPivots0 %>% terra::subset(field %in% pivotName)  %>% st_transform(crs(CI))
   age <- harvesting_data %>% dplyr::filter(field %in% pivotName) %>% dplyr::select(age) %>% unique()

   AllPivots2 <- AllPivots0 %>% dplyr::filter(field %in% pivotName)

   singlePivot <- CI %>% crop(., pivotShape) %>% mask(., pivotShape)

   singlePivot_m1 <- CI_m1 %>% crop(., pivotShape) %>% mask(., pivotShape)
   singlePivot_m2 <- CI_m2 %>% crop(., pivotShape) %>% mask(., pivotShape)
   # singlePivot_m3 <- CI_m3 %>% crop(., pivotShape) %>% mask(., pivotShape)

   abs_CI_last_week <- last_week_dif_raster_abs %>% crop(., pivotShape) %>% mask(., pivotShape)
   abs_CI_three_week <- three_week_dif_raster_abs %>% crop(., pivotShape) %>% mask(., pivotShape)

   planting_date <- harvesting_data %>% dplyr::filter(field %in% pivotName) %>% ungroup()  %>% dplyr::select(season_start)  %>% unique()

   joined_spans2 <- AllPivots0 %>% st_transform(crs(pivotShape))  %>%  dplyr::filter(field %in% pivotName) #%>% unique() %>% st_crop(., pivotShape)

   CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2,  joined_spans2, show_legend= T, legend_is_portrait = T, week = week_minus_2, age = age  -2)
   CImap_m1 <- create_CI_map(singlePivot_m1, AllPivots2,  joined_spans2, show_legend= F, legend_is_portrait = F, week = week_minus_1, age = age  -1)
   CImap <- create_CI_map(singlePivot, AllPivots2,  joined_spans2, show_legend= F, legend_is_portrait = F, week = week, age = age )


   CI_max_abs_last_week <- create_CI_diff_map(abs_CI_last_week,AllPivots2,  joined_spans2,  show_legend = T, legend_is_portrait = T, week_1 = week, week_2 = week_minus_1, age = age)
   CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2,  joined_spans2, show_legend = T, legend_is_portrait = T, week_1 = week, week_2 = week_minus_3, age = age)

   tst <- tmap_arrange(CImap_m2, CImap_m1, CImap,CI_max_abs_last_week, CI_max_abs_three_week, nrow = 1)

   cat(paste("## Pivot", pivotName, "-", age, "weeks after planting/harvest", "\n"))
   # cat("\n")
   # cat('<h2> Pivot', pivotName, '- week', week, '-', age$Age, 'weeks after planting/harvest <h2>')
 # cat(paste("# Pivot",pivots$pivot[i],"\n"))
   print(tst)

 }

cum_ci_plot <- function(pivotName){
  # pivotName <- "4042903"
  # Generate a sequence of dates for the last 12 months
  end_date <- Sys.Date()
  start_date <- end_date %m-% months(11) # 11 months ago from end_date
  date_seq <- seq.Date(from = start_date, to = end_date, by = "month")
  midpoint_date <- start_date + (end_date - start_date) / 2
  # Create the empty plot
  g <- ggplot() +
    scale_x_date(limits = c(start_date, end_date), date_breaks = "1 month", date_labels = "%m-%Y") +
    scale_y_continuous(limits = c(0, 4)) +
    labs(title = paste("14 day rolling MEAN CI rate - field ", pivotName),
         x = "Date", y = "CI Rate") +
    theme(axis.text.x = element_text(angle = 60, hjust = 1),
          legend.justification = c(1, 0), legend.position = c(1, 0),
          legend.title = element_text(size = 8),
          legend.text = element_text(size = 8)) +
    annotate("text", x = midpoint_date, y = 2, label = "No data available", size = 6, hjust = 0.5)
  subchunkify(g, 3.2, 10)
}


cum_ci_plot2 <- function(pivotName){

  # pivotName = "1.1"
  data_ci <- CI_quadrant %>% filter(field == pivotName)
  data_ci2 <- data_ci  %>% mutate(CI_rate = cumulative_CI/DOY,
                                  week = week(Date))%>% group_by(field) %>%
    mutate(mean_rolling10 = rollapplyr(CI_rate  , width = 10, FUN = mean, partial = TRUE)) #%>%

  date_preperation_perfect_pivot <- data_ci2 %>% group_by(season)  %>% summarise(min_date = min(Date),
                                                                                max_date = max(Date),
                                                                                days = max_date - min_date)

  unique_seasons <- unique(date_preperation_perfect_pivot$season)


  g <- ggplot(data= data_ci2)  +
          geom_line( aes(Date, mean_rolling10, col = sub_field)) +
          # geom_line(data= perfect_pivot, aes(Date , mean_rolling10, col = "Model CI (p5.1 Data 2022, \n date x axis is fictive)"), lty="11",size=1) +
          labs(title = paste("14 day rolling MEAN CI rate - Pivot ", pivotName))+
          # scale_y_continuous(limits=c(0.5,3), breaks = seq(0.5, 3, 0.5))+
          scale_x_date(date_breaks = "1 month", date_labels =  "%m-%Y") +
          theme(axis.text.x = element_text(angle = 60, hjust = 1),
                          legend.justification=c(1,0), legend.position = c(1, 0),
                          legend.title = element_text(size = 8),
                          legend.text = element_text(size = 8)) +
          guides(color = guide_legend(nrow = 2, byrow = TRUE))
  subchunkify(g, 3.2, 10)

}

```

```{r eval=FALSE, fig.height=7.2, fig.width=10, message=FALSE, warning=FALSE, include=FALSE}
 RGB_raster <- list.files(paste0(s2_dir,week),full.names = T, pattern = ".tiff", recursive = TRUE)[1] #use pattern = '.tif$' or something else if you have multiple files in this folder


RGB_raster    <- brick(RGB_raster)
# RGB_raster    <- brick(here("planet", paste0("week_",week, ".tif")))
tm_shape(RGB_raster, unit = "m") + tm_rgb(r=1, g=2, b=3, max.value = 255) +
  tm_layout(main.title = paste0("RGB image of the fields - week ", week),
            main.title.position =  'center') +
    tm_scale_bar(position = c("right", "top"), text.color = "white") +

   tm_compass(position = c("right", "top"), text.color = "white") +
  tm_shape(AllPivots0)+ tm_borders( col = "white") +
    tm_text("pivot_quadrant", size = .6, col = "white")

```
\newpage

```{r ci_overzicht_kaart, echo=FALSE, fig.height=7.3, fig.width=9, message=FALSE, warning=FALSE}
tm_shape(CI, unit = "m")+
    tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf), palette = "RdYlGn", midpoint = NA,legend.is.portrait = F) +
  tm_layout(legend.outside = TRUE,legend.outside.position = "bottom",legend.show = T, main.title = "Overview all fields (CI)")+
   tm_scale_bar(position = c("right", "top"), text.color = "black") +

   tm_compass(position = c("right", "top"), text.color = "black") +
  tm_shape(AllPivots0)+ tm_borders( col = "black") +
    tm_text("sub_field", size = .6, col = "black")

```
\newpage

```{r ci_diff_kaart, echo=FALSE, fig.height=7.3, fig.width=9, message=FALSE, warning=FALSE}

 tm_shape(last_week_dif_raster_abs, unit = "m")+
     tm_raster(breaks = c(-3,-2,-1,0,1,2, 3), palette = "RdYlGn", midpoint = NA,legend.is.portrait = F) +
   tm_layout(legend.outside = TRUE,legend.outside.position = "bottom",legend.show = T, main.title = "Overview all fields - CI difference")+
    tm_scale_bar(position = c("right", "top"), text.color = "black") +

    tm_compass(position = c("right", "top"), text.color = "black") +
   tm_shape(AllPivots0)+ tm_borders( col = "black") +
     tm_text("sub_field", size = .6, col = "black")

```

\newpage
```{r plots_ci_estate, eval=TRUE, fig.height=3.8, fig.width=10, message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
# # pivots <- AllPivots_merged %>% filter(pivot != c("1.1", "1.17"))
# pivots_estate <- AllPivots_merged  %>% filter(field %in% c("6.2")) #, "1.2", "1.3", "1.4")) #, "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12", "1.13", "1.14" , "1.16" , "1.17" , "1.18" , "6.1", "6.2", "DL1.1", "DL1.3")) %>% filter(pivot != "1.17")


# pivots <- AllPivots_merged %>% filter(pivot != c("1.1", "1.17"))
# pivots_estate <- AllPivots_merged %>% filter(pivot %in% c("1.1", "1.2", "1.7")) %>% filter(pivot != "1.17")


walk(AllPivots0$field, ~ {
 # cat("# Hello!!!")
  cat("\n")  # Add an empty line for better spacing
  ci_plot(.x)
 cat("\n")
 # cum_ci_plot(.x)
#  cat("\n")
   cum_ci_plot2(.x)
})
```

```{r looping_over_sub_area,  echo=FALSE, fig.height=3.8, fig.width=10, message=FALSE, warning=FALSE, results='asis', eval=FALSE}
pivots_grouped <- AllPivots0 # %>%
#  group_by(sub_area) %>%
#  arrange(sub_area)  # Optional: arrange the groups alphabetically by sub_area

# Iterate over each subgroup
for (subgroup in unique(pivots_grouped$sub_area)) {
cat("# HELLO!!!")
print(" PRINT")
#  cat("\n")
#  cat("# Subgroup: ", subgroup, "\n")  # Add a title for the subgroup
  subset_data <- filter(pivots_grouped, sub_area == subgroup)
#  cat("\\pagebreak")
  walk(subset_data$field, ~ {
  #  cat("\n")  # Add an empty line for better spacing
    ci_plot(.x)
  #  cat("\n")
    cum_ci_plot(.x)
  #  cat("\n")
  })
}
```

```{r eval=FALSE, fig.height=10, fig.width=14, include=FALSE}
CI_all2 <- readRDS(here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_whole_pivot_year.rds")) %>%
  mutate(#line = substr(pivot, 1 , 1),
         season = as.factor(season))

pivots_dates <- readRDS(here(harvest_dir, "harvest_data_new")) #%>%

pvt_age <- pivots_dates %>% ungroup() %>% select(pivot, Age) %>% unique()
CI_all2 <- left_join(CI_all2, pvt_age , by = "pivot") %>% mutate(month =  plyr::round_any((Age/4),2)) %>%
  mutate(month = case_when(month > 16 ~ 18,
                           TRUE ~month)) %>%
  group_by(pivot) %>% filter(Age == max(Age)) %>% ungroup()

CI_all2$season <- ordered(CI_all2$season, levels = c("Data_2021", "Data_2022"))
# CI_all2 <- CI_all2 %>% mutate(season_order = as.integer(season))
latest_model <- CI_all2 %>% group_by(pivot) %>% filter(season =="Data_2022")
# latest_model <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(DOY)) %>% slice(1)

# CI_all2 <- CI_all %>% group_by(pivot, DOY ) %>% mutate(pivot_cumulative_CI = mean(cumulative_CI))
# label_data <- CI_all2 %>% group_by(pivot) %>% arrange(season, desc(cumulative_CI)) %>% slice(1)  %>% mutate(label = paste(pivot, " - ", round(cumulative_CI)))
label_data <- latest_model %>% arrange(season, desc(cumulative_CI)) %>% slice(1)  %>% mutate(label = paste(pivot, " - ", round(cumulative_CI)))

max_day <- label_data %>% group_by(pivot) %>% summarise(max_day = max(DOY))

ggplot(data= CI_all2%>% filter(season =="Data_2022"), aes(DOY, cumulative_CI, col = pivot)) +
           facet_wrap(~month) +
           geom_line() +
           # scale_y_continuous(breaks = seq(0, max(label_data$cumulative_CI) + 100, by = 100)) +
           labs(title = "Cumulative CI values over the year per pivot, split per 2-month age (rounded down)", x = "Days after harvest/planting (scale is per 2 weeks)", y = "Cumulative CI value",
                color = "Line") +
           geom_label_repel(data = label_data %>% filter(model %in% latest_model$model), aes(DOY, cumulative_CI, label = label), box.padding = 1,
                            # ylim = c(1300, NA),
                            max.overlaps = Inf
                                # segment.linetype = 4,

    # segment.curvature = -1e-20,
    # arrow = arrow(length = unit(0.015, "npc"))
    ) +
  theme(legend.position="right", legend.text = element_text(size=8), legend.title = element_text(size = 8),
        plot.title = element_text(size=19)) +
  # geom_smooth()+
  guides(fill = guide_legend(nrow=2,byrow=TRUE)) +
  scale_y_continuous(breaks=seq(0,max(label_data$cumulative_CI),100)) +
  scale_x_continuous(breaks=seq(0,max(max_day$max_day),30)) + theme(axis.text.x = element_text(angle = 90)) +
  labs(x = "Weeks")+
  theme(legend.position = "none")

```
# Yield prediction
The below table shows estimates of the biomass if you would harvest them now.
```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE}
CI_quadrant <-  readRDS(here(cumulative_CI_vals_dir,"All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
  rename( pivot_quadrant = field)#All_pivots_Cumulative_CI.rds
ggplot(CI_quadrant %>% filter(pivot %in% "1.11")) +
  geom_line(aes(DOY, cumulative_CI, col = as.factor(season))) +
  facet_wrap(~pivot_quadrant)

pivots_dates0 <- pivots_dates0 %>% ungroup() %>% unique() %>%
  dplyr::select(field, sub_field, Tcha_2021, Tcha_2022 )  %>%
  pivot_longer(cols = c("Tcha_2021", "Tcha_2022"), names_to = "Tcha_Year", values_to = "Tcha") %>%
  filter(Tcha > 50) %>%
  mutate(season = as.integer(str_extract(Tcha_Year, "\\d+")))

CI_and_yield <- left_join(CI_quadrant , pivots_dates0, by = c("pivot", "pivot_quadrant", "season")) %>% filter(!is.na(Tcha)) %>%
  group_by(pivot_quadrant, season) %>% slice(which.max(DOY)) %>%
  dplyr::select(pivot, pivot_quadrant, Tcha_Year, Tcha, cumulative_CI, DOY, season) %>%
  mutate(CI_per_day = cumulative_CI/ DOY)

ggplot(CI_and_yield) +
  geom_point(aes(Tcha, CI_per_day, col = Tcha_Year ))


set.seed(20)
CI_and_yield_split <- initial_split(CI_and_yield, prop = 0.75, strata = pivot_quadrant)
CI_and_yield_test <- training(CI_and_yield_split)
CI_and_yield_validation <- testing(CI_and_yield_split)


predictors <- c(  "cumulative_CI" , "DOY" ,"CI_per_day"  )
response <- "Tcha"
CI_and_yield_test <- as.data.frame(CI_and_yield_test)

ctrl <- trainControl(method="cv",
                     savePredictions = TRUE,
                     allowParallel= TRUE,
                     number = 5,
                     verboseIter = TRUE)

set.seed(202)
model_ffs_rf <- ffs( CI_and_yield_test[,predictors],
                     CI_and_yield_test[,response],
                     method="rf" ,
                     trControl=ctrl,
                     importance=TRUE,
                     withinSE = TRUE,
                     tuneLength = 5,
                     na.rm = TRUE
)

pred_ffs_rf <-
  predict(model_ffs_rf, newdata = CI_and_yield_validation) %>%  as.data.frame() %>% rename(predicted_Tcha = ".") %>% mutate(
    pivot_quadrant = CI_and_yield_validation$pivot_quadrant,
    pivot = CI_and_yield_validation$pivot,
    Age_days = CI_and_yield_validation$DOY,
    total_CI = round(CI_and_yield_validation$cumulative_CI, 0),
    predicted_Tcha = round(predicted_Tcha, 0),
    season = CI_and_yield_validation$season
  )  %>%  dplyr::select(pivot , pivot_quadrant,  Age_days, total_CI, predicted_Tcha, season) %>%
  left_join(., CI_and_yield_validation, by = c("pivot",  "pivot_quadrant", "season")) %>%
  filter(Age_days > 250)


prediction_2023 <- CI_quadrant  %>% filter(season == "2023")  %>% group_by(pivot_quadrant) %>% slice(which.max(DOY))%>%
  mutate(CI_per_day = cumulative_CI/ DOY)

pred_rf_2023 <- predict(model_ffs_rf, newdata=prediction_2023) %>%
  as.data.frame() %>% rename(predicted_Tcha_2023 = ".") %>% mutate(pivot_quadrant = prediction_2023$pivot_quadrant,
                                                                   pivot = prediction_2023$pivot,
                                                                   Age_days = prediction_2023$DOY,
                                                                   total_CI = round(prediction_2023$cumulative_CI,0),
                                                                   predicted_Tcha_2023 = round(predicted_Tcha_2023, 0)) %>%
  filter(Age_days > 300) %>%
  dplyr::select(pivot ,pivot_quadrant,  Age_days, total_CI, predicted_Tcha_2023)%>%
  mutate(CI_per_day = round(total_CI/ Age_days, 1))


```

```{r yield_plaatjes, eval=FALSE, include=FALSE}
ggplot(pred_ffs_rf, aes(y = predicted_Tcha  , x = Tcha , col = pivot )) +
  geom_point()  +geom_abline() +
  scale_x_continuous(limits = c(50, 160))+
  scale_y_continuous(limits = c(50, 160)) +
  labs(title = "Model trained and tested on historical results - RF")

ggplot(pred_rf_2023, aes(total_CI , predicted_Tcha_2023 , col = pivot )) +
  geom_point() + labs(title = "2023 data (still to be harvested) - fields over 300 days old")

knitr::kable(pred_rf_2023)
```