SmartCane/r_app/check_cv_results.R
2026-01-06 14:17:37 +01:00

79 lines
3.8 KiB
R

s#!/usr/bin/env Rscript
# Script to examine cross-validation fold results
library(dplyr)
library(caret)
# Load the saved models
models <- readRDS("laravel_app/storage/app/esa/reports/yield_prediction/esa_yield_models.rds")
# Model 1: CI Only
cat("\n=== MODEL 1: CI ONLY ===\n")
cat("Best mtry:", models$model1$bestTune$mtry, "\n\n")
cat("Cross-validation results (5 folds):\n")
print(models$model1$resample)
cat("\nFold Performance Summary:\n")
cat("RMSE - Mean:", round(mean(models$model1$resample$RMSE), 2),
"± SD:", round(sd(models$model1$resample$RMSE), 2),
"(CV:", round((sd(models$model1$resample$RMSE) / mean(models$model1$resample$RMSE)) * 100, 1), "%)\n")
cat("MAE - Mean:", round(mean(models$model1$resample$MAE), 2),
"± SD:", round(sd(models$model1$resample$MAE), 2), "\n")
cat("R² - Mean:", round(mean(models$model1$resample$Rsquared), 3),
"± SD:", round(sd(models$model1$resample$Rsquared), 3), "\n")
cat("\nRange across folds:\n")
cat("RMSE: [", round(min(models$model1$resample$RMSE), 2), "-",
round(max(models$model1$resample$RMSE), 2), "]\n")
cat("R²: [", round(min(models$model1$resample$Rsquared), 3), "-",
round(max(models$model1$resample$Rsquared), 3), "]\n")
# Model 2: CI + Ratoon
cat("\n\n=== MODEL 2: CI + RATOON ===\n")
cat("Best mtry:", models$model2$bestTune$mtry, "\n\n")
cat("Cross-validation results (5 folds):\n")
print(models$model2$resample)
cat("\nFold Performance Summary:\n")
cat("RMSE - Mean:", round(mean(models$model2$resample$RMSE), 2),
"± SD:", round(sd(models$model2$resample$RMSE), 2),
"(CV:", round((sd(models$model2$resample$RMSE) / mean(models$model2$resample$RMSE)) * 100, 1), "%)\n")
cat("MAE - Mean:", round(mean(models$model2$resample$MAE), 2),
"± SD:", round(sd(models$model2$resample$MAE), 2), "\n")
cat("R² - Mean:", round(mean(models$model2$resample$Rsquared), 3),
"± SD:", round(sd(models$model2$resample$Rsquared), 3), "\n")
cat("\nRange across folds:\n")
cat("RMSE: [", round(min(models$model2$resample$RMSE), 2), "-",
round(max(models$model2$resample$RMSE), 2), "]\n")
cat("R²: [", round(min(models$model2$resample$Rsquared), 3), "-",
round(max(models$model2$resample$Rsquared), 3), "]\n")
# Model 3: Full
cat("\n\n=== MODEL 3: FULL MODEL ===\n")
cat("Best mtry:", models$model3$bestTune$mtry, "\n\n")
cat("Cross-validation results (5 folds):\n")
print(models$model3$resample)
cat("\nFold Performance Summary:\n")
cat("RMSE - Mean:", round(mean(models$model3$resample$RMSE), 2),
"± SD:", round(sd(models$model3$resample$RMSE), 2),
"(CV:", round((sd(models$model3$resample$RMSE) / mean(models$model3$resample$RMSE)) * 100, 1), "%)\n")
cat("MAE - Mean:", round(mean(models$model3$resample$MAE), 2),
"± SD:", round(sd(models$model3$resample$MAE), 2), "\n")
cat("R² - Mean:", round(mean(models$model3$resample$Rsquared), 3),
"± SD:", round(sd(models$model3$resample$Rsquared), 3), "\n")
cat("\nRange across folds:\n")
cat("RMSE: [", round(min(models$model3$resample$RMSE), 2), "-",
round(max(models$model3$resample$RMSE), 2), "]\n")
cat("R²: [", round(min(models$model3$resample$Rsquared), 3), "-",
round(max(models$model3$resample$Rsquared), 3), "]\n")
# Check seed info
cat("\n\n=== SEED INFORMATION ===\n")
cat("Note: The script uses set.seed(123) for reproducibility\n")
cat("This ensures the same fold splits and randomForest initialization\n")
cat("Different seeds WILL produce different results because:\n")
cat(" 1. Different fold assignments in cross-validation\n")
cat(" 2. Different bootstrap samples in randomForest\n")
cat(" 3. Different random splits at each tree node\n")
cat("\nExpected seed sensitivity:\n")
cat(" - RMSE variation: ±1-3 t/ha (typical)\n")
cat(" - R² variation: ±0.02-0.05 (typical)\n")
cat(" - Fold-to-fold variation within single seed: see CV above\n")