SmartCane/r_app/12_temporal_yield_forecasting.R
2026-01-06 14:17:37 +01:00

688 lines
25 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 12_TEMPORAL_YIELD_FORECASTING.R
# ==================================
# Progressive yield forecasting using full CI time series
# Predicts yield at multiple time points (DOY 300, 330, 360, 390) to show
# how forecast accuracy improves as the season progresses
#
# Key differences from 11_yield_prediction_comparison.R:
# - Uses FULL CI time series, not just final cumulative value
# - Creates sequential features (lagged values, rolling statistics)
# - Trains separate models for each forecast horizon
# - Visualizes prediction improvement over time
#
# Usage: Rscript 12_temporal_yield_forecasting.R [project_dir]
# 1. Load required libraries
# -------------------------
suppressPackageStartupMessages({
library(here)
library(sf)
library(dplyr)
library(tidyr)
library(lubridate)
library(readr)
library(readxl)
library(caret)
library(CAST) # For ffs (Forward Feature Selection)
library(randomForest)
library(ggplot2)
library(gridExtra)
library(purrr)
})
# 2. Helper Functions
# -----------------
#' Safe logging function
safe_log <- function(message, level = "INFO") {
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
cat(sprintf("[%s] %s: %s\n", timestamp, level, message))
}
#' Calculate temporal features from CI time series
#' @param ci_data Time series of CI values for a field
#' @param target_doy The DOY at which to calculate features
calculate_temporal_features <- function(ci_data, target_doy) {
# Filter to data up to target DOY
data_up_to_target <- ci_data %>%
dplyr::filter(DOY <= target_doy) %>%
dplyr::arrange(DOY)
if (nrow(data_up_to_target) < 5) {
return(NULL) # Not enough data
}
# Calculate features
features <- data.frame(
# Current state
current_CI = tail(data_up_to_target$cumulative_CI, 1),
current_DOY = target_doy,
days_observed = nrow(data_up_to_target),
# Growth metrics
total_CI_accumulated = tail(data_up_to_target$cumulative_CI, 1),
avg_CI_per_day = tail(data_up_to_target$cumulative_CI, 1) / target_doy,
# Recent growth (last 30 days)
recent_CI_30d = ifelse(nrow(data_up_to_target) >= 30,
tail(data_up_to_target$cumulative_CI, 1) -
data_up_to_target$cumulative_CI[max(1, nrow(data_up_to_target) - 30)],
NA),
# Growth trajectory
CI_growth_rate = ifelse(nrow(data_up_to_target) > 2,
coef(lm(cumulative_CI ~ DOY, data = data_up_to_target))[2],
NA),
# Early season performance (first 150 days)
early_season_CI = sum(data_up_to_target$cumulative_CI[data_up_to_target$DOY <= 150]),
# Growth stability
CI_variability = sd(diff(data_up_to_target$cumulative_CI), na.rm = TRUE)
)
return(features)
}
#' Create prediction plot for specific forecast horizon
create_forecast_plot <- function(predicted, actual, forecast_doy, metrics, title_suffix = "") {
plot_data <- data.frame(
Predicted = predicted,
Actual = actual
) %>% filter(!is.na(Predicted) & !is.na(Actual))
if (nrow(plot_data) == 0) return(NULL)
min_val <- min(c(plot_data$Predicted, plot_data$Actual))
max_val <- max(c(plot_data$Predicted, plot_data$Actual))
p <- ggplot(plot_data, aes(x = Actual, y = Predicted)) +
geom_point(alpha = 0.6, size = 2.5, color = "#2E86AB") +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red", linewidth = 1) +
geom_smooth(method = "lm", se = TRUE, color = "#A23B72", fill = "#A23B72", alpha = 0.2) +
coord_fixed(xlim = c(min_val, max_val), ylim = c(min_val, max_val)) +
labs(
title = sprintf("Forecast at DOY %d%s", forecast_doy, title_suffix),
subtitle = sprintf("RMSE: %.2f t/ha | MAE: %.2f t/ha | R²: %.3f | n: %d",
metrics$RMSE, metrics$MAE, metrics$R2, metrics$n),
x = "Actual TCH (t/ha)",
y = "Predicted TCH (t/ha)"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 10),
plot.subtitle = element_text(size = 9, color = "gray40"),
axis.title = element_text(size = 10),
axis.text = element_text(size = 9),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 1)
)
return(p)
}
#' Calculate model performance metrics
calculate_metrics <- function(predicted, actual) {
valid_idx <- !is.na(predicted) & !is.na(actual)
predicted <- predicted[valid_idx]
actual <- actual[valid_idx]
if (length(predicted) == 0) {
return(list(RMSE = NA, MAE = NA, R2 = NA, n = 0))
}
rmse <- sqrt(mean((predicted - actual)^2))
mae <- mean(abs(predicted - actual))
r2 <- cor(predicted, actual)^2
return(list(
RMSE = round(rmse, 2),
MAE = round(mae, 2),
R2 = round(r2, 3),
n = length(predicted)
))
}
# 3. Main Function
# --------------
main <- function() {
# Process command line arguments
args <- commandArgs(trailingOnly = TRUE)
if (length(args) >= 1 && !is.na(args[1])) {
project_dir <- as.character(args[1])
} else {
project_dir <- "esa" # Default project
}
assign("project_dir", project_dir, envir = .GlobalEnv)
safe_log("=== TEMPORAL YIELD FORECASTING ===")
safe_log(paste("Project:", project_dir))
# Load project configuration
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# 4. Load yield data
# ----------------
yield_excel_path <- file.path(
"laravel_app", "storage", "app", project_dir, "Data",
paste0(project_dir, "_yield_data.xlsx")
)
safe_log("Loading yield data...")
sheet_names <- readxl::excel_sheets(here(yield_excel_path))
yield_data_list <- lapply(sheet_names, function(sheet_name) {
year_matches <- regmatches(sheet_name, gregexpr("[0-9]{4}|[0-9]{2}", sheet_name))[[1]]
if (length(year_matches) >= 2) {
second_year <- year_matches[2]
if (nchar(second_year) == 2) {
year_value <- as.numeric(paste0("20", second_year))
} else {
year_value <- as.numeric(second_year)
}
} else if (length(year_matches) == 1) {
year_value <- as.numeric(year_matches[1])
} else {
year_value <- NA
}
data <- readxl::read_excel(here(yield_excel_path), sheet = sheet_name)
data$season <- year_value
data
})
yield_data <- dplyr::bind_rows(yield_data_list) %>%
dplyr::rename(sub_field = Field) %>%
dplyr::select(sub_field, season, TCH, Ratoon, Cane_Variety, Irrig_Type) %>%
dplyr::rename(tonnage_ha = TCH) %>%
dplyr::filter(!is.na(tonnage_ha))
safe_log(sprintf("Loaded %d yield records", nrow(yield_data)))
# 5. Load CI time series data
# -------------------------
safe_log("Loading CI time series...")
CI_data <- readRDS(here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup() %>%
dplyr::select(sub_field, season, DOY, cumulative_CI) %>%
dplyr::filter(!is.na(cumulative_CI), DOY > 0)
safe_log(sprintf("Loaded CI data: %d observations", nrow(CI_data)))
# 6. Define forecast horizons (in DOY)
# ----------------------------------
forecast_horizons <- c(300, 330, 360, 390)
safe_log(paste("Forecast horizons (DOY):", paste(forecast_horizons, collapse = ", ")))
# 7. Prepare training data for each horizon
# ---------------------------------------
safe_log("\nPreparing temporal features for each forecast horizon...")
forecast_data_list <- list()
for (horizon_doy in forecast_horizons) {
safe_log(sprintf("\n=== Processing DOY %d ===", horizon_doy))
# For each field-season, calculate features up to this DOY
horizon_features <- CI_data %>%
dplyr::group_by(sub_field, season) %>%
dplyr::group_modify(~ {
features <- calculate_temporal_features(.x, horizon_doy)
if (!is.null(features)) {
return(features)
} else {
return(data.frame())
}
}) %>%
dplyr::ungroup()
# Join with yield data
horizon_data <- horizon_features %>%
dplyr::inner_join(yield_data, by = c("sub_field", "season")) %>%
dplyr::filter(!is.na(tonnage_ha))
safe_log(sprintf(" Features calculated for %d field-seasons", nrow(horizon_data)))
forecast_data_list[[as.character(horizon_doy)]] <- horizon_data
}
# 8. Train models for each forecast horizon
# ---------------------------------------
safe_log("\n=== TRAINING FORECAST MODELS ===")
set.seed(206)
ctrl <- caret::trainControl(
method = "cv",
number = 5,
savePredictions = "final"
)
models_all_vars <- list()
models_ffs <- list()
predictions_all_vars <- list()
predictions_ffs <- list()
metrics_all_vars <- list()
metrics_ffs <- list()
importance_all_vars <- list()
importance_ffs <- list()
selected_features <- list()
for (horizon_doy in forecast_horizons) {
safe_log(sprintf("\n=== TRAINING MODELS FOR DOY %d ===", horizon_doy))
train_data <- forecast_data_list[[as.character(horizon_doy)]]
# Select predictors (remove NAs and select numeric features)
predictor_cols <- c("current_CI", "current_DOY", "avg_CI_per_day",
"recent_CI_30d", "CI_growth_rate", "early_season_CI",
"CI_variability", "Ratoon")
# Filter complete cases
train_data_clean <- train_data %>%
dplyr::select(all_of(c(predictor_cols, "tonnage_ha"))) %>%
tidyr::drop_na()
safe_log(sprintf(" Training records: %d", nrow(train_data_clean)))
if (nrow(train_data_clean) < 20) {
safe_log(" Insufficient data, skipping", "WARNING")
next
}
# ===== MODEL 1: ALL VARIABLES =====
safe_log(" Training Model 1: All Variables...")
model_all <- caret::train(
tonnage_ha ~ .,
data = train_data_clean,
method = "rf",
trControl = ctrl,
importance = TRUE,
tuneLength = 3
)
# Get predictions
preds_all <- predict(model_all, newdata = train_data_clean)
metrics_all <- calculate_metrics(preds_all, train_data_clean$tonnage_ha)
# Extract variable importance
var_imp_all <- caret::varImp(model_all)$importance
var_imp_all_df <- data.frame(
Variable = rownames(var_imp_all),
Importance = var_imp_all$Overall,
DOY = horizon_doy,
Model = "All Variables"
) %>%
dplyr::arrange(desc(Importance)) %>%
dplyr::mutate(Rank = row_number())
safe_log(sprintf(" RMSE: %.2f | MAE: %.2f | R²: %.3f",
metrics_all$RMSE, metrics_all$MAE, metrics_all$R2))
safe_log(" Top 3 variables:")
for (i in 1:min(3, nrow(var_imp_all_df))) {
safe_log(sprintf(" %d. %s (Importance: %.1f)",
i, var_imp_all_df$Variable[i], var_imp_all_df$Importance[i]))
}
# ===== MODEL 2: FORWARD FEATURE SELECTION =====
safe_log(" Training Model 2: Forward Feature Selection (ffs)...")
ffs_success <- FALSE
tryCatch({
# Use faster feature selection with smaller tuneLength
model_ffs <- CAST::ffs(
predictors = train_data_clean[, predictor_cols],
response = train_data_clean$tonnage_ha,
method = "rf",
trControl = trainControl(method = "cv", number = 3), # Faster: 3-fold instead of 5
tuneLength = 1, # Faster: only 1 tuning parameter
verbose = FALSE
)
# Get selected features
selected_vars <- model_ffs$selectedvars
safe_log(sprintf(" Selected %d features: %s",
length(selected_vars), paste(selected_vars, collapse = ", ")))
# Get predictions
preds_ffs <- predict(model_ffs, newdata = train_data_clean)
# Calculate metrics
metrics_ffs_result <- calculate_metrics(preds_ffs, train_data_clean$tonnage_ha)
# Extract variable importance (only for selected variables)
var_imp_ffs <- caret::varImp(model_ffs)$importance
var_imp_ffs_df <- data.frame(
Variable = rownames(var_imp_ffs),
Importance = var_imp_ffs$Overall,
DOY = horizon_doy,
Model = "FFS"
) %>%
dplyr::arrange(desc(Importance)) %>%
dplyr::mutate(Rank = row_number())
safe_log(sprintf(" RMSE: %.2f | MAE: %.2f | R²: %.3f",
metrics_ffs_result$RMSE, metrics_ffs_result$MAE, metrics_ffs_result$R2))
# Calculate improvement
improvement <- ((metrics_all$RMSE - metrics_ffs_result$RMSE) / metrics_all$RMSE) * 100
if (improvement > 0) {
safe_log(sprintf(" ✓ FFS improved RMSE by %.1f%%", improvement))
} else {
safe_log(sprintf(" ✗ FFS increased RMSE by %.1f%%", abs(improvement)))
}
# Store results - explicitly assign to list position
models_ffs[[as.character(horizon_doy)]] <- model_ffs
predictions_ffs[[as.character(horizon_doy)]] <- preds_ffs
metrics_ffs[[as.character(horizon_doy)]] <- metrics_ffs_result
importance_ffs[[as.character(horizon_doy)]] <- var_imp_ffs_df
selected_features[[as.character(horizon_doy)]] <- selected_vars
ffs_success <- TRUE
safe_log(" ✓ FFS model stored successfully")
}, error = function(e) {
safe_log(sprintf(" ERROR in ffs: %s", e$message), "ERROR")
# Don't set to NULL - just skip assignment so they remain undefined
})
if (!ffs_success) {
safe_log(" FFS failed - using All Variables model only for this horizon", "WARNING")
}
# Store Model 1 results
models_all_vars[[as.character(horizon_doy)]] <- model_all
predictions_all_vars[[as.character(horizon_doy)]] <- preds_all
metrics_all_vars[[as.character(horizon_doy)]] <- metrics_all
importance_all_vars[[as.character(horizon_doy)]] <- var_imp_all_df
}
# 9. Create visualizations
# ----------------------
safe_log("\n=== CREATING VISUALIZATIONS ===")
output_dir <- file.path(reports_dir, "yield_prediction")
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Create comparison plots for each horizon (All Variables vs FFS)
plots_comparison <- list()
plot_idx <- 1
for (horizon_doy in forecast_horizons) {
if (!is.null(models_all_vars[[as.character(horizon_doy)]])) {
train_data_clean <- forecast_data_list[[as.character(horizon_doy)]] %>%
dplyr::select(current_CI, current_DOY, avg_CI_per_day, recent_CI_30d,
CI_growth_rate, early_season_CI, CI_variability,
Ratoon, tonnage_ha) %>%
tidyr::drop_na()
# Plot 1: All Variables
plot_all <- create_forecast_plot(
predictions_all_vars[[as.character(horizon_doy)]],
train_data_clean$tonnage_ha,
horizon_doy,
metrics_all_vars[[as.character(horizon_doy)]],
" - All Variables"
)
plots_comparison[[plot_idx]] <- plot_all
plot_idx <- plot_idx + 1
# Plot 2: FFS (if available)
if (!is.null(models_ffs[[as.character(horizon_doy)]])) {
selected_vars <- selected_features[[as.character(horizon_doy)]]
plot_ffs <- create_forecast_plot(
predictions_ffs[[as.character(horizon_doy)]],
train_data_clean$tonnage_ha,
horizon_doy,
metrics_ffs[[as.character(horizon_doy)]],
sprintf(" - FFS (%d vars)", length(selected_vars))
)
plots_comparison[[plot_idx]] <- plot_ffs
plot_idx <- plot_idx + 1
}
}
}
# Create RMSE comparison plot
rmse_comparison_data <- data.frame(
DOY = forecast_horizons[sapply(forecast_horizons, function(x)
!is.null(metrics_all_vars[[as.character(x)]]))],
RMSE_All = sapply(forecast_horizons[sapply(forecast_horizons, function(x)
!is.null(metrics_all_vars[[as.character(x)]]))], function(x)
metrics_all_vars[[as.character(x)]]$RMSE),
RMSE_FFS = sapply(forecast_horizons[sapply(forecast_horizons, function(x)
!is.null(metrics_ffs[[as.character(x)]]))], function(x)
metrics_ffs[[as.character(x)]]$RMSE)
) %>%
tidyr::pivot_longer(cols = starts_with("RMSE"),
names_to = "Model", values_to = "RMSE") %>%
dplyr::mutate(Model = ifelse(Model == "RMSE_All", "All Variables", "FFS"))
rmse_comparison_plot <- ggplot(rmse_comparison_data, aes(x = DOY, y = RMSE, color = Model, group = Model)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
geom_text(aes(label = sprintf("%.1f", RMSE)), vjust = -0.8, size = 3) +
scale_color_manual(values = c("All Variables" = "#E63946", "FFS" = "#06A77D")) +
labs(
title = "Model Comparison: All Variables vs Feature Selection",
subtitle = "RMSE across forecast horizons",
x = "Days from Planting (DOY)",
y = "RMSE (t/ha)"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 10),
plot.subtitle = element_text(size = 9, color = "gray40"),
axis.title = element_text(size = 10),
legend.position = "bottom",
panel.grid.minor = element_blank()
)
# Create feature selection summary table
ffs_summary <- data.frame(
DOY = forecast_horizons[sapply(forecast_horizons, function(x)
!is.null(selected_features[[as.character(x)]]))],
Num_Features = sapply(forecast_horizons[sapply(forecast_horizons, function(x)
!is.null(selected_features[[as.character(x)]]))], function(x)
length(selected_features[[as.character(x)]])),
Selected_Features = sapply(forecast_horizons[sapply(forecast_horizons, function(x)
!is.null(selected_features[[as.character(x)]]))], function(x)
paste(selected_features[[as.character(x)]], collapse = ", "))
)
# Create table grob
ffs_table_grob <- gridExtra::tableGrob(
ffs_summary,
rows = NULL,
theme = gridExtra::ttheme_minimal(
base_size = 8,
core = list(fg_params = list(hjust = 0, x = 0.05)),
colhead = list(fg_params = list(fontface = "bold"))
)
)
ffs_table_plot <- gridExtra::grid.arrange(
ffs_table_grob,
top = grid::textGrob("Features Selected by FFS at Each Horizon",
gp = grid::gpar(fontface = "bold", fontsize = 10))
)
# Create variable importance comparison plot
# Bind all importance data frames from both models
all_importance_list <- c(importance_all_vars, importance_ffs)
all_importance_list <- all_importance_list[!sapply(all_importance_list, is.null)]
all_importance <- dplyr::bind_rows(all_importance_list)
# Get top 5 variables overall
top_vars <- all_importance %>%
dplyr::group_by(Variable) %>%
dplyr::summarise(AvgImportance = mean(Importance)) %>%
dplyr::arrange(desc(AvgImportance)) %>%
dplyr::slice(1:5) %>%
dplyr::pull(Variable)
importance_plot <- all_importance %>%
dplyr::filter(Variable %in% top_vars) %>%
ggplot(aes(x = factor(DOY), y = Importance, fill = Model)) +
geom_col(position = "dodge", width = 0.8) +
scale_fill_manual(values = c("All Variables" = "#457B9D", "FFS" = "#06A77D")) +
facet_wrap(~ Variable, ncol = 5, scales = "free_y") +
labs(
title = "Variable Importance: All Variables vs FFS",
subtitle = "Top 5 most important predictors",
x = "Days from Planting (DOY)",
y = "Importance",
fill = "Model Type"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 10),
plot.subtitle = element_text(size = 9, color = "gray40"),
axis.title = element_text(size = 9),
axis.text = element_text(size = 7),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.text = element_text(size = 8),
strip.text = element_text(size = 8, face = "bold"),
panel.grid.minor = element_blank()
)
# Combine all plots in a larger grid (4 horizons × 2 models = 8 plots + 2 summary plots)
if (length(plots_comparison) == 8) {
combined_plot <- gridExtra::grid.arrange(
grobs = c(plots_comparison, list(rmse_comparison_plot, ffs_table_plot)),
ncol = 2,
nrow = 5,
heights = c(1.1, 1.1, 1.1, 1.1, 0.9),
layout_matrix = rbind(
c(1, 2), # DOY 300: All vs FFS
c(3, 4), # DOY 330: All vs FFS
c(5, 6), # DOY 360: All vs FFS
c(7, 8), # DOY 390: All vs FFS
c(9, 10) # RMSE comparison + FFS table
)
)
}
# Save main comparison plot
plot_file <- file.path(output_dir, paste0(project_dir, "_temporal_yield_forecasting_comparison.png"))
ggsave(plot_file, combined_plot, width = 12, height = 18, dpi = 300)
safe_log(paste("Comparison plot saved to:", plot_file))
# Save importance comparison plot separately
importance_file <- file.path(output_dir, paste0(project_dir, "_variable_importance_comparison.png"))
ggsave(importance_file, importance_plot, width = 14, height = 6, dpi = 300)
safe_log(paste("Importance plot saved to:", importance_file))
# 10. Save results
# --------------
results_file <- file.path(output_dir, paste0(project_dir, "_temporal_forecast_models.rds"))
saveRDS(list(
models_all_vars = models_all_vars,
models_ffs = models_ffs,
metrics_all_vars = metrics_all_vars,
metrics_ffs = metrics_ffs,
importance_all_vars = importance_all_vars,
importance_ffs = importance_ffs,
selected_features = selected_features,
forecast_horizons = forecast_horizons
), results_file)
safe_log(paste("Models saved to:", results_file))
# Save variable importance to CSV
importance_csv <- file.path(output_dir, paste0(project_dir, "_variable_importance.csv"))
write.csv(all_importance, importance_csv, row.names = FALSE)
safe_log(paste("Variable importance saved to:", importance_csv))
# Save selected features summary
ffs_csv <- file.path(output_dir, paste0(project_dir, "_ffs_selected_features.csv"))
write.csv(ffs_summary, ffs_csv, row.names = FALSE)
safe_log(paste("FFS summary saved to:", ffs_csv))
# Save model comparison
comparison_csv <- file.path(output_dir, paste0(project_dir, "_model_comparison.csv"))
comparison_data <- data.frame(
DOY = forecast_horizons,
RMSE_All_Vars = sapply(forecast_horizons, function(x)
ifelse(!is.null(metrics_all_vars[[as.character(x)]]),
metrics_all_vars[[as.character(x)]]$RMSE, NA)),
RMSE_FFS = sapply(forecast_horizons, function(x)
ifelse(!is.null(metrics_ffs[[as.character(x)]]),
metrics_ffs[[as.character(x)]]$RMSE, NA)),
MAE_All_Vars = sapply(forecast_horizons, function(x)
ifelse(!is.null(metrics_all_vars[[as.character(x)]]),
metrics_all_vars[[as.character(x)]]$MAE, NA)),
MAE_FFS = sapply(forecast_horizons, function(x)
ifelse(!is.null(metrics_ffs[[as.character(x)]]),
metrics_ffs[[as.character(x)]]$MAE, NA)),
R2_All_Vars = sapply(forecast_horizons, function(x)
ifelse(!is.null(metrics_all_vars[[as.character(x)]]),
metrics_all_vars[[as.character(x)]]$R2, NA)),
R2_FFS = sapply(forecast_horizons, function(x)
ifelse(!is.null(metrics_ffs[[as.character(x)]]),
metrics_ffs[[as.character(x)]]$R2, NA)),
Num_Features_All = 8,
Num_Features_FFS = sapply(forecast_horizons, function(x)
ifelse(!is.null(selected_features[[as.character(x)]]),
length(selected_features[[as.character(x)]]), NA))
)
write.csv(comparison_data, comparison_csv, row.names = FALSE)
safe_log(paste("Model comparison saved to:", comparison_csv))
# Print summary
cat("\n=== MODEL COMPARISON SUMMARY ===\n")
print(comparison_data)
cat("\n=== SELECTED FEATURES BY FFS ===\n")
print(ffs_summary)
cat("\n=== AVERAGE VARIABLE IMPORTANCE ===\n")
avg_importance <- all_importance %>%
dplyr::group_by(Variable, Model) %>%
dplyr::summarise(AvgImportance = mean(Importance), .groups = "drop") %>%
dplyr::arrange(Model, desc(AvgImportance))
print(avg_importance)
cat("\n=== PERFORMANCE IMPROVEMENT ===\n")
for (doy in forecast_horizons) {
if (!is.null(metrics_all_vars[[as.character(doy)]]) &&
!is.null(metrics_ffs[[as.character(doy)]])) {
improvement <- ((metrics_all_vars[[as.character(doy)]]$RMSE -
metrics_ffs[[as.character(doy)]]$RMSE) /
metrics_all_vars[[as.character(doy)]]$RMSE) * 100
if (improvement > 0) {
cat(sprintf("DOY %d: FFS improved RMSE by %.1f%% (%.2f → %.2f t/ha)\n",
doy, improvement,
metrics_all_vars[[as.character(doy)]]$RMSE,
metrics_ffs[[as.character(doy)]]$RMSE))
} else {
cat(sprintf("DOY %d: All Variables better by %.1f%% (%.2f vs %.2f t/ha)\n",
doy, abs(improvement),
metrics_all_vars[[as.character(doy)]]$RMSE,
metrics_ffs[[as.character(doy)]]$RMSE))
}
}
}
safe_log("\n=== TEMPORAL YIELD FORECASTING COMPLETED ===")
}
# 4. Script execution
# -----------------
if (sys.nframe() == 0) {
main()
}