79 lines
3.8 KiB
R
79 lines
3.8 KiB
R
s#!/usr/bin/env Rscript
|
|
# Script to examine cross-validation fold results
|
|
|
|
library(dplyr)
|
|
library(caret)
|
|
|
|
# Load the saved models
|
|
models <- readRDS("laravel_app/storage/app/esa/reports/yield_prediction/esa_yield_models.rds")
|
|
|
|
# Model 1: CI Only
|
|
cat("\n=== MODEL 1: CI ONLY ===\n")
|
|
cat("Best mtry:", models$model1$bestTune$mtry, "\n\n")
|
|
cat("Cross-validation results (5 folds):\n")
|
|
print(models$model1$resample)
|
|
cat("\nFold Performance Summary:\n")
|
|
cat("RMSE - Mean:", round(mean(models$model1$resample$RMSE), 2),
|
|
"± SD:", round(sd(models$model1$resample$RMSE), 2),
|
|
"(CV:", round((sd(models$model1$resample$RMSE) / mean(models$model1$resample$RMSE)) * 100, 1), "%)\n")
|
|
cat("MAE - Mean:", round(mean(models$model1$resample$MAE), 2),
|
|
"± SD:", round(sd(models$model1$resample$MAE), 2), "\n")
|
|
cat("R² - Mean:", round(mean(models$model1$resample$Rsquared), 3),
|
|
"± SD:", round(sd(models$model1$resample$Rsquared), 3), "\n")
|
|
cat("\nRange across folds:\n")
|
|
cat("RMSE: [", round(min(models$model1$resample$RMSE), 2), "-",
|
|
round(max(models$model1$resample$RMSE), 2), "]\n")
|
|
cat("R²: [", round(min(models$model1$resample$Rsquared), 3), "-",
|
|
round(max(models$model1$resample$Rsquared), 3), "]\n")
|
|
|
|
# Model 2: CI + Ratoon
|
|
cat("\n\n=== MODEL 2: CI + RATOON ===\n")
|
|
cat("Best mtry:", models$model2$bestTune$mtry, "\n\n")
|
|
cat("Cross-validation results (5 folds):\n")
|
|
print(models$model2$resample)
|
|
cat("\nFold Performance Summary:\n")
|
|
cat("RMSE - Mean:", round(mean(models$model2$resample$RMSE), 2),
|
|
"± SD:", round(sd(models$model2$resample$RMSE), 2),
|
|
"(CV:", round((sd(models$model2$resample$RMSE) / mean(models$model2$resample$RMSE)) * 100, 1), "%)\n")
|
|
cat("MAE - Mean:", round(mean(models$model2$resample$MAE), 2),
|
|
"± SD:", round(sd(models$model2$resample$MAE), 2), "\n")
|
|
cat("R² - Mean:", round(mean(models$model2$resample$Rsquared), 3),
|
|
"± SD:", round(sd(models$model2$resample$Rsquared), 3), "\n")
|
|
cat("\nRange across folds:\n")
|
|
cat("RMSE: [", round(min(models$model2$resample$RMSE), 2), "-",
|
|
round(max(models$model2$resample$RMSE), 2), "]\n")
|
|
cat("R²: [", round(min(models$model2$resample$Rsquared), 3), "-",
|
|
round(max(models$model2$resample$Rsquared), 3), "]\n")
|
|
|
|
# Model 3: Full
|
|
cat("\n\n=== MODEL 3: FULL MODEL ===\n")
|
|
cat("Best mtry:", models$model3$bestTune$mtry, "\n\n")
|
|
cat("Cross-validation results (5 folds):\n")
|
|
print(models$model3$resample)
|
|
cat("\nFold Performance Summary:\n")
|
|
cat("RMSE - Mean:", round(mean(models$model3$resample$RMSE), 2),
|
|
"± SD:", round(sd(models$model3$resample$RMSE), 2),
|
|
"(CV:", round((sd(models$model3$resample$RMSE) / mean(models$model3$resample$RMSE)) * 100, 1), "%)\n")
|
|
cat("MAE - Mean:", round(mean(models$model3$resample$MAE), 2),
|
|
"± SD:", round(sd(models$model3$resample$MAE), 2), "\n")
|
|
cat("R² - Mean:", round(mean(models$model3$resample$Rsquared), 3),
|
|
"± SD:", round(sd(models$model3$resample$Rsquared), 3), "\n")
|
|
cat("\nRange across folds:\n")
|
|
cat("RMSE: [", round(min(models$model3$resample$RMSE), 2), "-",
|
|
round(max(models$model3$resample$RMSE), 2), "]\n")
|
|
cat("R²: [", round(min(models$model3$resample$Rsquared), 3), "-",
|
|
round(max(models$model3$resample$Rsquared), 3), "]\n")
|
|
|
|
# Check seed info
|
|
cat("\n\n=== SEED INFORMATION ===\n")
|
|
cat("Note: The script uses set.seed(123) for reproducibility\n")
|
|
cat("This ensures the same fold splits and randomForest initialization\n")
|
|
cat("Different seeds WILL produce different results because:\n")
|
|
cat(" 1. Different fold assignments in cross-validation\n")
|
|
cat(" 2. Different bootstrap samples in randomForest\n")
|
|
cat(" 3. Different random splits at each tree node\n")
|
|
cat("\nExpected seed sensitivity:\n")
|
|
cat(" - RMSE variation: ±1-3 t/ha (typical)\n")
|
|
cat(" - R² variation: ±0.02-0.05 (typical)\n")
|
|
cat(" - Fold-to-fold variation within single seed: see CV above\n")
|