SmartCane/.Rhistory

513 lines
20 KiB
R

ggplot2::labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha",
x = "Actual tonnage/ha (Tcha)",
y = "Predicted tonnage/ha (Tcha)") +
ggplot2::theme_minimal()
}
if (nrow(pred_rf_current_season) > 0) {
# Plot predicted yields by age
ggplot2::ggplot(pred_rf_current_season, ggplot2::aes(x = Age_days, y = predicted_Tcha)) +
ggplot2::geom_point(size = 2, alpha = 0.6) +
ggplot2::labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested",
x = "Age (days)",
y = "Predicted tonnage/ha (Tcha)") +
ggplot2::scale_y_continuous(limits = c(0, 200)) +
ggplot2::theme_minimal()
# Display prediction table
knitr::kable(pred_rf_current_season,
digits = 0,
caption = "Predicted Tonnage/ha for Fields Over 300 Days Old")
} else {
cat("No fields over 300 days old without harvest data available for yield prediction.")
}
}, error = function(e) {
safe_log(paste("Error in yield prediction visualization:", e$message), "ERROR")
cat("Error generating yield prediction visualizations. See log for details.")
})
# Load and prepare yield prediction data with error handling
tryCatch({
# Load CI quadrant data and fill missing values
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup()
# Check if tonnage_ha is empty
if (all(is.na(harvesting_data$tonnage_ha))) {
safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING")
knitr::knit_exit() # Exit the chunk if tonnage_ha is empty
}
# Rename year column to season for consistency
harvesting_data <- harvesting_data %>% dplyr::rename(season = year)
# Join CI and yield data
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
# Define predictors and response variables
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
response <- "tonnage_ha"
# Prepare test and validation datasets
CI_and_yield_test <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
CI_and_yield_validation <- CI_and_yield_test
# Prepare prediction dataset (fields without harvest data)
prediction_yields <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha))
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = TRUE
)
# Train the model with feature selection
set.seed(202) # For reproducibility
model_ffs_rf <- CAST::ffs(
CI_and_yield_test[, predictors],
CI_and_yield_test[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5,
na.rm = TRUE
)
# Function to prepare predictions with consistent naming and formatting
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
# Predict yields for the validation dataset
pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
# Predict yields for the current season (focus on mature fields over 300 days)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days > 1) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
safe_log("Successfully completed yield prediction calculations")
}, error = function(e) {
safe_log(paste("Error in yield prediction:", e$message), "ERROR")
# Create empty dataframes to prevent errors in subsequent chunks
pred_ffs_rf <- data.frame()
pred_rf_current_season <- data.frame()
})
# Load and prepare yield prediction data with error handling
tryCatch({
# Load CI quadrant data and fill missing values
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup()
# Check if tonnage_ha is empty
if (all(is.na(harvesting_data$tonnage_ha))) {
safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING")
knitr::knit_exit() # Exit the chunk if tonnage_ha is empty
}
# Rename year column to season for consistency
harvesting_data <- harvesting_data %>% dplyr::rename(season = year)
# Join CI and yield data
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
# Define predictors and response variables
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
response <- "tonnage_ha"
# Prepare test and validation datasets
CI_and_yield_test <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
CI_and_yield_validation <- CI_and_yield_test
# Prepare prediction dataset (fields without harvest data)
prediction_yields <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha))
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = TRUE
)
# Train the model with feature selection
set.seed(202) # For reproducibility
model_ffs_rf <- CAST::ffs(
CI_and_yield_test[, predictors],
CI_and_yield_test[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5,
na.rm = TRUE
)
# Function to prepare predictions with consistent naming and formatting
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
# Predict yields for the validation dataset
pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
# Predict yields for the current season (focus on mature fields over 300 days)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days > 1) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
safe_log("Successfully completed yield prediction calculations")
}, error = function(e) {
safe_log(paste("Error in yield prediction:", e$message), "ERROR")
# Create empty dataframes to prevent errors in subsequent chunks
pred_ffs_rf <- data.frame()
pred_rf_current_season <- data.frame()
})
# Load CI quadrant data and fill missing values
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup()
# Check if tonnage_ha is empty
if (all(is.na(harvesting_data$tonnage_ha))) {
safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING")
knitr::knit_exit() # Exit the chunk if tonnage_ha is empty
}
# Rename year column to season for consistency
harvesting_data <- harvesting_data %>% dplyr::rename(season = year)
# Join CI and yield data
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
# Define predictors and response variables
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
response <- "tonnage_ha"
# Prepare test and validation datasets
CI_and_yield_test <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
CI_and_yield_validation <- CI_and_yield_test
# Prepare prediction dataset (fields without harvest data)
prediction_yields <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha))
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = TRUE
)
# Train the model with feature selection
set.seed(202) # For reproducibility
model_ffs_rf <- CAST::ffs(
CI_and_yield_test[, predictors],
CI_and_yield_test[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5,
na.rm = TRUE
)
# Function to prepare predictions with consistent naming and formatting
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
# Predict yields for the validation dataset
pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
# Predict yields for the current season (focus on mature fields over 300 days)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days > 300) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
# Predict yields for the current season (focus on mature fields over 300 days)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days > 1) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
safe_log("Successfully completed yield prediction calculations")
# Load and prepare yield prediction data with error handling
tryCatch({
# Load CI quadrant data and fill missing values
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup()
# Check if tonnage_ha is empty
if (all(is.na(harvesting_data$tonnage_ha))) {
safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING")
knitr::knit_exit() # Exit the chunk if tonnage_ha is empty
}
# Rename year column to season for consistency
harvesting_data <- harvesting_data %>% dplyr::rename(season = year)
# Join CI and yield data
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
# Define predictors and response variables
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
response <- "tonnage_ha"
# Prepare test and validation datasets
CI_and_yield_test <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
CI_and_yield_validation <- CI_and_yield_test
# Prepare prediction dataset (fields without harvest data)
prediction_yields <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha))
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = TRUE
)
# Train the model with feature selection
set.seed(202) # For reproducibility
model_ffs_rf <- CAST::ffs(
CI_and_yield_test[, predictors],
CI_and_yield_test[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5,
na.rm = TRUE
)
# Function to prepare predictions with consistent naming and formatting
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
# Predict yields for the validation dataset
pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
# Predict yields for the current season (focus on mature fields over 300 days)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days > 1) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
safe_log("Successfully completed yield prediction calculations")
}, error = function(e) {
safe_log(paste("Error in yield prediction:", e$message), "ERROR")
# Create empty dataframes to prevent errors in subsequent chunks
pred_ffs_rf <- data.frame()
pred_rf_current_season <- data.frame()
})
# Display yield prediction visualizations with error handling
tryCatch({
if (nrow(pred_ffs_rf) > 0) {
# Plot model performance (predicted vs actual)
ggplot2::ggplot(pred_ffs_rf, ggplot2::aes(y = predicted_Tcha, x = tonnage_ha)) +
ggplot2::geom_point(size = 2, alpha = 0.6) +
ggplot2::geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
ggplot2::scale_x_continuous(limits = c(0, 200)) +
ggplot2::scale_y_continuous(limits = c(0, 200)) +
ggplot2::labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha",
x = "Actual tonnage/ha (Tcha)",
y = "Predicted tonnage/ha (Tcha)") +
ggplot2::theme_minimal()
}
if (nrow(pred_rf_current_season) > 0) {
# Plot predicted yields by age
ggplot2::ggplot(pred_rf_current_season, ggplot2::aes(x = Age_days, y = predicted_Tcha)) +
ggplot2::geom_point(size = 2, alpha = 0.6) +
ggplot2::labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested",
x = "Age (days)",
y = "Predicted tonnage/ha (Tcha)") +
ggplot2::scale_y_continuous(limits = c(0, 200)) +
ggplot2::theme_minimal()
# Display prediction table
knitr::kable(pred_rf_current_season,
digits = 0,
caption = "Predicted Tonnage/ha for Fields Over 300 Days Old")
} else {
cat("No fields over 300 days old without harvest data available for yield prediction.")
}
}, error = function(e) {
safe_log(paste("Error in yield prediction visualization:", e$message), "ERROR")
cat("Error generating yield prediction visualizations. See log for details.")
})
AllPivots_merged$field[1:10]
CI
path_to_week_current
terra::rast(path_to_week_current)
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m") # Add raster layer with continuous spectrum (centered at 0 for difference maps)
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "RdYlGn",
midpoint = 0),
col.legend = tm_legend(title = "Chlorophyll Index (CI) Change",
orientation = "landscape",
position = tm_pos_out("center", "bottom")))
# Complete the map with layout and other elements
map <- map +
tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") +
tmap::tm_compass(position = c("right", "bottom"), text.color = "black") +
tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "black") +
tmap::tm_text("sub_field", size = 0.6, col = "black")
# Print the map
print(map)
# Get versions of specific packages you're using
packages <- c("here", "sf", "terra", "exactextractr", "tidyverse",
"tmap", "lubridate", "magrittr", "dplyr", "readr",
"readxl", "knitr", "rmarkdown", "officedown", "officer")
package_versions <- sapply(packages, function(x) as.character(packageVersion(x)))
sessionInfo()
# Chunk 1: setup_parameters
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
# Environment setup notes (commented out)
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# # Optioneel: Herstel de omgeving als dat nodig is
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
# Chunk 2: load_libraries
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Load all packages at once with suppressPackageStartupMessages
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(exactextractr)
library(tidyverse)
library(tmap)
library(lubridate)
library(zoo)
library(rsample)
library(caret)
library(randomForest)
library(CAST)
})
# Load custom utility functions
tryCatch({
source("report_utils.R")
}, error = function(e) {
message(paste("Error loading report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "report_utils.R"))
}, error = function(e) {
stop("Could not load report_utils.R from either location: ", e$message)
})
})
# Chunk 3: initialize_project_config
# Set the project directory from parameters
project_dir <- params$data_dir
# Source project parameters with error handling
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Log initial configuration
safe_log("Starting the R Markdown script")
safe_log(paste("mail_day params:", params$mail_day))
safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
# Chunk 4: calculate_dates_and_weeks
# Set locale for consistent date formatting
Sys.setlocale("LC_TIME", "C")
# Initialize date variables from parameters
today <- as.character(report_date)
mail_day_as_character <- as.character(mail_day)
# Calculate week days
report_date_as_week_day <- weekdays(lubridate::ymd(today))
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Calculate initial week number
week <- lubridate::week(today)
safe_log(paste("Initial week calculation:", week, "today:", today))
# Calculate previous dates for comparisons
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
# Log the weekday calculations for debugging
safe_log(paste("Report date weekday:", report_date_as_week_day))
safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
safe_log(paste("Mail day:", mail_day_as_character))
safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
# Adjust week calculation based on mail day
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
safe_log("Adjusting weeks because of mail day")
week <- lubridate::week(today) + 1
today_minus_1 <- as.character(lubridate::ymd(today))
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
}
# Calculate week numbers for previous weeks
week_minus_1 <- week - 1
week_minus_2 <- week - 2
week_minus_3 <- week - 3
# Format current week with leading zeros
week <- sprintf("%02d", week)
# Get years for each date
year <- lubridate::year(today)
year_1 <- lubridate::year(today_minus_1)
year_2 <- lubridate::year(today_minus_2)
year_3 <- lubridate::year(today_minus_3)
sessionInfo()
source("r_app/extract_current_versions.R")
source("r_app/package_manager.R")
source("r_app/package_manager.R")
source("r_app/package_manager.R")
source("r_app/package_manager.R")
source("r_app/package_manager.R")
source("r_app/package_manager.R")