ggplot2::labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha", x = "Actual tonnage/ha (Tcha)", y = "Predicted tonnage/ha (Tcha)") + ggplot2::theme_minimal() } if (nrow(pred_rf_current_season) > 0) { # Plot predicted yields by age ggplot2::ggplot(pred_rf_current_season, ggplot2::aes(x = Age_days, y = predicted_Tcha)) + ggplot2::geom_point(size = 2, alpha = 0.6) + ggplot2::labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested", x = "Age (days)", y = "Predicted tonnage/ha (Tcha)") + ggplot2::scale_y_continuous(limits = c(0, 200)) + ggplot2::theme_minimal() # Display prediction table knitr::kable(pred_rf_current_season, digits = 0, caption = "Predicted Tonnage/ha for Fields Over 300 Days Old") } else { cat("No fields over 300 days old without harvest data available for yield prediction.") } }, error = function(e) { safe_log(paste("Error in yield prediction visualization:", e$message), "ERROR") cat("Error generating yield prediction visualizations. See log for details.") }) # Load and prepare yield prediction data with error handling tryCatch({ # Load CI quadrant data and fill missing values CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% dplyr::group_by(model) %>% tidyr::fill(field, sub_field, .direction = "downup") %>% dplyr::ungroup() # Check if tonnage_ha is empty if (all(is.na(harvesting_data$tonnage_ha))) { safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING") knitr::knit_exit() # Exit the chunk if tonnage_ha is empty } # Rename year column to season for consistency harvesting_data <- harvesting_data %>% dplyr::rename(season = year) # Join CI and yield data CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>% dplyr::group_by(sub_field, season) %>% dplyr::slice(which.max(DOY)) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% dplyr::mutate(CI_per_day = cumulative_CI / DOY) # Define predictors and response variables predictors <- c("cumulative_CI", "DOY", "CI_per_day") response <- "tonnage_ha" # Prepare test and validation datasets CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(!is.na(tonnage_ha)) CI_and_yield_validation <- CI_and_yield_test # Prepare prediction dataset (fields without harvest data) prediction_yields <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(is.na(tonnage_ha)) # Configure model training parameters ctrl <- caret::trainControl( method = "cv", savePredictions = TRUE, allowParallel = TRUE, number = 5, verboseIter = TRUE ) # Train the model with feature selection set.seed(202) # For reproducibility model_ffs_rf <- CAST::ffs( CI_and_yield_test[, predictors], CI_and_yield_test[, response], method = "rf", trControl = ctrl, importance = TRUE, withinSE = TRUE, tuneLength = 5, na.rm = TRUE ) # Function to prepare predictions with consistent naming and formatting prepare_predictions <- function(predictions, newdata) { return(predictions %>% as.data.frame() %>% dplyr::rename(predicted_Tcha = ".") %>% dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, Age_days = newdata$DOY, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season ) %>% dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>% dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) } # Predict yields for the validation dataset pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) # Predict yields for the current season (focus on mature fields over 300 days) pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% dplyr::filter(Age_days > 1) %>% dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1)) safe_log("Successfully completed yield prediction calculations") }, error = function(e) { safe_log(paste("Error in yield prediction:", e$message), "ERROR") # Create empty dataframes to prevent errors in subsequent chunks pred_ffs_rf <- data.frame() pred_rf_current_season <- data.frame() }) # Load and prepare yield prediction data with error handling tryCatch({ # Load CI quadrant data and fill missing values CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% dplyr::group_by(model) %>% tidyr::fill(field, sub_field, .direction = "downup") %>% dplyr::ungroup() # Check if tonnage_ha is empty if (all(is.na(harvesting_data$tonnage_ha))) { safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING") knitr::knit_exit() # Exit the chunk if tonnage_ha is empty } # Rename year column to season for consistency harvesting_data <- harvesting_data %>% dplyr::rename(season = year) # Join CI and yield data CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>% dplyr::group_by(sub_field, season) %>% dplyr::slice(which.max(DOY)) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% dplyr::mutate(CI_per_day = cumulative_CI / DOY) # Define predictors and response variables predictors <- c("cumulative_CI", "DOY", "CI_per_day") response <- "tonnage_ha" # Prepare test and validation datasets CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(!is.na(tonnage_ha)) CI_and_yield_validation <- CI_and_yield_test # Prepare prediction dataset (fields without harvest data) prediction_yields <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(is.na(tonnage_ha)) # Configure model training parameters ctrl <- caret::trainControl( method = "cv", savePredictions = TRUE, allowParallel = TRUE, number = 5, verboseIter = TRUE ) # Train the model with feature selection set.seed(202) # For reproducibility model_ffs_rf <- CAST::ffs( CI_and_yield_test[, predictors], CI_and_yield_test[, response], method = "rf", trControl = ctrl, importance = TRUE, withinSE = TRUE, tuneLength = 5, na.rm = TRUE ) # Function to prepare predictions with consistent naming and formatting prepare_predictions <- function(predictions, newdata) { return(predictions %>% as.data.frame() %>% dplyr::rename(predicted_Tcha = ".") %>% dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, Age_days = newdata$DOY, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season ) %>% dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>% dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) } # Predict yields for the validation dataset pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) # Predict yields for the current season (focus on mature fields over 300 days) pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% dplyr::filter(Age_days > 1) %>% dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1)) safe_log("Successfully completed yield prediction calculations") }, error = function(e) { safe_log(paste("Error in yield prediction:", e$message), "ERROR") # Create empty dataframes to prevent errors in subsequent chunks pred_ffs_rf <- data.frame() pred_rf_current_season <- data.frame() }) # Load CI quadrant data and fill missing values CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% dplyr::group_by(model) %>% tidyr::fill(field, sub_field, .direction = "downup") %>% dplyr::ungroup() # Check if tonnage_ha is empty if (all(is.na(harvesting_data$tonnage_ha))) { safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING") knitr::knit_exit() # Exit the chunk if tonnage_ha is empty } # Rename year column to season for consistency harvesting_data <- harvesting_data %>% dplyr::rename(season = year) # Join CI and yield data CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>% dplyr::group_by(sub_field, season) %>% dplyr::slice(which.max(DOY)) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% dplyr::mutate(CI_per_day = cumulative_CI / DOY) # Define predictors and response variables predictors <- c("cumulative_CI", "DOY", "CI_per_day") response <- "tonnage_ha" # Prepare test and validation datasets CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(!is.na(tonnage_ha)) CI_and_yield_validation <- CI_and_yield_test # Prepare prediction dataset (fields without harvest data) prediction_yields <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(is.na(tonnage_ha)) # Configure model training parameters ctrl <- caret::trainControl( method = "cv", savePredictions = TRUE, allowParallel = TRUE, number = 5, verboseIter = TRUE ) # Train the model with feature selection set.seed(202) # For reproducibility model_ffs_rf <- CAST::ffs( CI_and_yield_test[, predictors], CI_and_yield_test[, response], method = "rf", trControl = ctrl, importance = TRUE, withinSE = TRUE, tuneLength = 5, na.rm = TRUE ) # Function to prepare predictions with consistent naming and formatting prepare_predictions <- function(predictions, newdata) { return(predictions %>% as.data.frame() %>% dplyr::rename(predicted_Tcha = ".") %>% dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, Age_days = newdata$DOY, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season ) %>% dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>% dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) } # Predict yields for the validation dataset pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) # Predict yields for the current season (focus on mature fields over 300 days) pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% dplyr::filter(Age_days > 300) %>% dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1)) # Predict yields for the current season (focus on mature fields over 300 days) pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% dplyr::filter(Age_days > 1) %>% dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1)) safe_log("Successfully completed yield prediction calculations") # Load and prepare yield prediction data with error handling tryCatch({ # Load CI quadrant data and fill missing values CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% dplyr::group_by(model) %>% tidyr::fill(field, sub_field, .direction = "downup") %>% dplyr::ungroup() # Check if tonnage_ha is empty if (all(is.na(harvesting_data$tonnage_ha))) { safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING") knitr::knit_exit() # Exit the chunk if tonnage_ha is empty } # Rename year column to season for consistency harvesting_data <- harvesting_data %>% dplyr::rename(season = year) # Join CI and yield data CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>% dplyr::group_by(sub_field, season) %>% dplyr::slice(which.max(DOY)) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% dplyr::mutate(CI_per_day = cumulative_CI / DOY) # Define predictors and response variables predictors <- c("cumulative_CI", "DOY", "CI_per_day") response <- "tonnage_ha" # Prepare test and validation datasets CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(!is.na(tonnage_ha)) CI_and_yield_validation <- CI_and_yield_test # Prepare prediction dataset (fields without harvest data) prediction_yields <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(is.na(tonnage_ha)) # Configure model training parameters ctrl <- caret::trainControl( method = "cv", savePredictions = TRUE, allowParallel = TRUE, number = 5, verboseIter = TRUE ) # Train the model with feature selection set.seed(202) # For reproducibility model_ffs_rf <- CAST::ffs( CI_and_yield_test[, predictors], CI_and_yield_test[, response], method = "rf", trControl = ctrl, importance = TRUE, withinSE = TRUE, tuneLength = 5, na.rm = TRUE ) # Function to prepare predictions with consistent naming and formatting prepare_predictions <- function(predictions, newdata) { return(predictions %>% as.data.frame() %>% dplyr::rename(predicted_Tcha = ".") %>% dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, Age_days = newdata$DOY, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season ) %>% dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>% dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) } # Predict yields for the validation dataset pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) # Predict yields for the current season (focus on mature fields over 300 days) pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% dplyr::filter(Age_days > 1) %>% dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1)) safe_log("Successfully completed yield prediction calculations") }, error = function(e) { safe_log(paste("Error in yield prediction:", e$message), "ERROR") # Create empty dataframes to prevent errors in subsequent chunks pred_ffs_rf <- data.frame() pred_rf_current_season <- data.frame() }) # Display yield prediction visualizations with error handling tryCatch({ if (nrow(pred_ffs_rf) > 0) { # Plot model performance (predicted vs actual) ggplot2::ggplot(pred_ffs_rf, ggplot2::aes(y = predicted_Tcha, x = tonnage_ha)) + ggplot2::geom_point(size = 2, alpha = 0.6) + ggplot2::geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") + ggplot2::scale_x_continuous(limits = c(0, 200)) + ggplot2::scale_y_continuous(limits = c(0, 200)) + ggplot2::labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha", x = "Actual tonnage/ha (Tcha)", y = "Predicted tonnage/ha (Tcha)") + ggplot2::theme_minimal() } if (nrow(pred_rf_current_season) > 0) { # Plot predicted yields by age ggplot2::ggplot(pred_rf_current_season, ggplot2::aes(x = Age_days, y = predicted_Tcha)) + ggplot2::geom_point(size = 2, alpha = 0.6) + ggplot2::labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested", x = "Age (days)", y = "Predicted tonnage/ha (Tcha)") + ggplot2::scale_y_continuous(limits = c(0, 200)) + ggplot2::theme_minimal() # Display prediction table knitr::kable(pred_rf_current_season, digits = 0, caption = "Predicted Tonnage/ha for Fields Over 300 Days Old") } else { cat("No fields over 300 days old without harvest data available for yield prediction.") } }, error = function(e) { safe_log(paste("Error in yield prediction visualization:", e$message), "ERROR") cat("Error generating yield prediction visualizations. See log for details.") }) AllPivots_merged$field[1:10] CI path_to_week_current terra::rast(path_to_week_current) map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m") # Add raster layer with continuous spectrum (centered at 0 for difference maps) map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "RdYlGn", midpoint = 0), col.legend = tm_legend(title = "Chlorophyll Index (CI) Change", orientation = "landscape", position = tm_pos_out("center", "bottom"))) # Complete the map with layout and other elements map <- map + tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") + tmap::tm_compass(position = c("right", "bottom"), text.color = "black") + tmap::tm_shape(AllPivots0) + tmap::tm_borders(col = "black") + tmap::tm_text("sub_field", size = 0.6, col = "black") # Print the map print(map) # Get versions of specific packages you're using packages <- c("here", "sf", "terra", "exactextractr", "tidyverse", "tmap", "lubridate", "magrittr", "dplyr", "readr", "readxl", "knitr", "rmarkdown", "officedown", "officer") package_versions <- sapply(packages, function(x) as.character(packageVersion(x))) sessionInfo() # Chunk 1: setup_parameters # Set up basic report parameters from input values report_date <- params$report_date mail_day <- params$mail_day borders <- params$borders # Environment setup notes (commented out) # # Activeer de renv omgeving # renv::activate() # renv::deactivate() # # Optioneel: Herstel de omgeving als dat nodig is # # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren # renv::restore() # Chunk 2: load_libraries # Configure knitr options knitr::opts_chunk$set(warning = FALSE, message = FALSE) # Load all packages at once with suppressPackageStartupMessages suppressPackageStartupMessages({ library(here) library(sf) library(terra) library(exactextractr) library(tidyverse) library(tmap) library(lubridate) library(zoo) library(rsample) library(caret) library(randomForest) library(CAST) }) # Load custom utility functions tryCatch({ source("report_utils.R") }, error = function(e) { message(paste("Error loading report_utils.R:", e$message)) # Try alternative path if the first one fails tryCatch({ source(here::here("r_app", "report_utils.R")) }, error = function(e) { stop("Could not load report_utils.R from either location: ", e$message) }) }) # Chunk 3: initialize_project_config # Set the project directory from parameters project_dir <- params$data_dir # Source project parameters with error handling tryCatch({ source(here::here("r_app", "parameters_project.R")) }, error = function(e) { stop("Error loading parameters_project.R: ", e$message) }) # Log initial configuration safe_log("Starting the R Markdown script") safe_log(paste("mail_day params:", params$mail_day)) safe_log(paste("report_date params:", params$report_date)) safe_log(paste("mail_day variable:", mail_day)) # Chunk 4: calculate_dates_and_weeks # Set locale for consistent date formatting Sys.setlocale("LC_TIME", "C") # Initialize date variables from parameters today <- as.character(report_date) mail_day_as_character <- as.character(mail_day) # Calculate week days report_date_as_week_day <- weekdays(lubridate::ymd(today)) days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday") # Calculate initial week number week <- lubridate::week(today) safe_log(paste("Initial week calculation:", week, "today:", today)) # Calculate previous dates for comparisons today_minus_1 <- as.character(lubridate::ymd(today) - 7) today_minus_2 <- as.character(lubridate::ymd(today) - 14) today_minus_3 <- as.character(lubridate::ymd(today) - 21) # Log the weekday calculations for debugging safe_log(paste("Report date weekday:", report_date_as_week_day)) safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day))) safe_log(paste("Mail day:", mail_day_as_character)) safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character))) # Adjust week calculation based on mail day if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) { safe_log("Adjusting weeks because of mail day") week <- lubridate::week(today) + 1 today_minus_1 <- as.character(lubridate::ymd(today)) today_minus_2 <- as.character(lubridate::ymd(today) - 7) today_minus_3 <- as.character(lubridate::ymd(today) - 14) } # Calculate week numbers for previous weeks week_minus_1 <- week - 1 week_minus_2 <- week - 2 week_minus_3 <- week - 3 # Format current week with leading zeros week <- sprintf("%02d", week) # Get years for each date year <- lubridate::year(today) year_1 <- lubridate::year(today_minus_1) year_2 <- lubridate::year(today_minus_2) year_3 <- lubridate::year(today_minus_3) sessionInfo() source("r_app/extract_current_versions.R") source("r_app/package_manager.R") source("r_app/package_manager.R") source("r_app/package_manager.R") source("r_app/package_manager.R") source("r_app/package_manager.R") source("r_app/package_manager.R")