s#!/usr/bin/env Rscript # Script to examine cross-validation fold results library(dplyr) library(caret) # Load the saved models models <- readRDS("laravel_app/storage/app/esa/reports/yield_prediction/esa_yield_models.rds") # Model 1: CI Only cat("\n=== MODEL 1: CI ONLY ===\n") cat("Best mtry:", models$model1$bestTune$mtry, "\n\n") cat("Cross-validation results (5 folds):\n") print(models$model1$resample) cat("\nFold Performance Summary:\n") cat("RMSE - Mean:", round(mean(models$model1$resample$RMSE), 2), "± SD:", round(sd(models$model1$resample$RMSE), 2), "(CV:", round((sd(models$model1$resample$RMSE) / mean(models$model1$resample$RMSE)) * 100, 1), "%)\n") cat("MAE - Mean:", round(mean(models$model1$resample$MAE), 2), "± SD:", round(sd(models$model1$resample$MAE), 2), "\n") cat("R² - Mean:", round(mean(models$model1$resample$Rsquared), 3), "± SD:", round(sd(models$model1$resample$Rsquared), 3), "\n") cat("\nRange across folds:\n") cat("RMSE: [", round(min(models$model1$resample$RMSE), 2), "-", round(max(models$model1$resample$RMSE), 2), "]\n") cat("R²: [", round(min(models$model1$resample$Rsquared), 3), "-", round(max(models$model1$resample$Rsquared), 3), "]\n") # Model 2: CI + Ratoon cat("\n\n=== MODEL 2: CI + RATOON ===\n") cat("Best mtry:", models$model2$bestTune$mtry, "\n\n") cat("Cross-validation results (5 folds):\n") print(models$model2$resample) cat("\nFold Performance Summary:\n") cat("RMSE - Mean:", round(mean(models$model2$resample$RMSE), 2), "± SD:", round(sd(models$model2$resample$RMSE), 2), "(CV:", round((sd(models$model2$resample$RMSE) / mean(models$model2$resample$RMSE)) * 100, 1), "%)\n") cat("MAE - Mean:", round(mean(models$model2$resample$MAE), 2), "± SD:", round(sd(models$model2$resample$MAE), 2), "\n") cat("R² - Mean:", round(mean(models$model2$resample$Rsquared), 3), "± SD:", round(sd(models$model2$resample$Rsquared), 3), "\n") cat("\nRange across folds:\n") cat("RMSE: [", round(min(models$model2$resample$RMSE), 2), "-", round(max(models$model2$resample$RMSE), 2), "]\n") cat("R²: [", round(min(models$model2$resample$Rsquared), 3), "-", round(max(models$model2$resample$Rsquared), 3), "]\n") # Model 3: Full cat("\n\n=== MODEL 3: FULL MODEL ===\n") cat("Best mtry:", models$model3$bestTune$mtry, "\n\n") cat("Cross-validation results (5 folds):\n") print(models$model3$resample) cat("\nFold Performance Summary:\n") cat("RMSE - Mean:", round(mean(models$model3$resample$RMSE), 2), "± SD:", round(sd(models$model3$resample$RMSE), 2), "(CV:", round((sd(models$model3$resample$RMSE) / mean(models$model3$resample$RMSE)) * 100, 1), "%)\n") cat("MAE - Mean:", round(mean(models$model3$resample$MAE), 2), "± SD:", round(sd(models$model3$resample$MAE), 2), "\n") cat("R² - Mean:", round(mean(models$model3$resample$Rsquared), 3), "± SD:", round(sd(models$model3$resample$Rsquared), 3), "\n") cat("\nRange across folds:\n") cat("RMSE: [", round(min(models$model3$resample$RMSE), 2), "-", round(max(models$model3$resample$RMSE), 2), "]\n") cat("R²: [", round(min(models$model3$resample$Rsquared), 3), "-", round(max(models$model3$resample$Rsquared), 3), "]\n") # Check seed info cat("\n\n=== SEED INFORMATION ===\n") cat("Note: The script uses set.seed(123) for reproducibility\n") cat("This ensures the same fold splits and randomForest initialization\n") cat("Different seeds WILL produce different results because:\n") cat(" 1. Different fold assignments in cross-validation\n") cat(" 2. Different bootstrap samples in randomForest\n") cat(" 3. Different random splits at each tree node\n") cat("\nExpected seed sensitivity:\n") cat(" - RMSE variation: ±1-3 t/ha (typical)\n") cat(" - R² variation: ±0.02-0.05 (typical)\n") cat(" - Fold-to-fold variation within single seed: see CV above\n")