SmartCane/r_app/check_cv_results.R

s#!/usr/bin/env Rscript
# Script to examine cross-validation fold results

library(dplyr)
library(caret)

# Load the saved models
models <- readRDS("laravel_app/storage/app/esa/reports/yield_prediction/esa_yield_models.rds")

# Model 1: CI Only
cat("\n=== MODEL 1: CI ONLY ===\n")
cat("Best mtry:", models$model1$bestTune$mtry, "\n\n")
cat("Cross-validation results (5 folds):\n")
print(models$model1$resample)
cat("\nFold Performance Summary:\n")
cat("RMSE - Mean:", round(mean(models$model1$resample$RMSE), 2),
    "± SD:", round(sd(models$model1$resample$RMSE), 2),
    "(CV:", round((sd(models$model1$resample$RMSE) / mean(models$model1$resample$RMSE)) * 100, 1), "%)\n")
cat("MAE  - Mean:", round(mean(models$model1$resample$MAE), 2),
    "± SD:", round(sd(models$model1$resample$MAE), 2), "\n")
cat("R²   - Mean:", round(mean(models$model1$resample$Rsquared), 3),
    "± SD:", round(sd(models$model1$resample$Rsquared), 3), "\n")
cat("\nRange across folds:\n")
cat("RMSE: [", round(min(models$model1$resample$RMSE), 2), "-",
    round(max(models$model1$resample$RMSE), 2), "]\n")
cat("R²:   [", round(min(models$model1$resample$Rsquared), 3), "-",
    round(max(models$model1$resample$Rsquared), 3), "]\n")

# Model 2: CI + Ratoon
cat("\n\n=== MODEL 2: CI + RATOON ===\n")
cat("Best mtry:", models$model2$bestTune$mtry, "\n\n")
cat("Cross-validation results (5 folds):\n")
print(models$model2$resample)
cat("\nFold Performance Summary:\n")
cat("RMSE - Mean:", round(mean(models$model2$resample$RMSE), 2),
    "± SD:", round(sd(models$model2$resample$RMSE), 2),
    "(CV:", round((sd(models$model2$resample$RMSE) / mean(models$model2$resample$RMSE)) * 100, 1), "%)\n")
cat("MAE  - Mean:", round(mean(models$model2$resample$MAE), 2),
    "± SD:", round(sd(models$model2$resample$MAE), 2), "\n")
cat("R²   - Mean:", round(mean(models$model2$resample$Rsquared), 3),
    "± SD:", round(sd(models$model2$resample$Rsquared), 3), "\n")
cat("\nRange across folds:\n")
cat("RMSE: [", round(min(models$model2$resample$RMSE), 2), "-",
    round(max(models$model2$resample$RMSE), 2), "]\n")
cat("R²:   [", round(min(models$model2$resample$Rsquared), 3), "-",
    round(max(models$model2$resample$Rsquared), 3), "]\n")

# Model 3: Full
cat("\n\n=== MODEL 3: FULL MODEL ===\n")
cat("Best mtry:", models$model3$bestTune$mtry, "\n\n")
cat("Cross-validation results (5 folds):\n")
print(models$model3$resample)
cat("\nFold Performance Summary:\n")
cat("RMSE - Mean:", round(mean(models$model3$resample$RMSE), 2),
    "± SD:", round(sd(models$model3$resample$RMSE), 2),
    "(CV:", round((sd(models$model3$resample$RMSE) / mean(models$model3$resample$RMSE)) * 100, 1), "%)\n")
cat("MAE  - Mean:", round(mean(models$model3$resample$MAE), 2),
    "± SD:", round(sd(models$model3$resample$MAE), 2), "\n")
cat("R²   - Mean:", round(mean(models$model3$resample$Rsquared), 3),
    "± SD:", round(sd(models$model3$resample$Rsquared), 3), "\n")
cat("\nRange across folds:\n")
cat("RMSE: [", round(min(models$model3$resample$RMSE), 2), "-",
    round(max(models$model3$resample$RMSE), 2), "]\n")
cat("R²:   [", round(min(models$model3$resample$Rsquared), 3), "-",
    round(max(models$model3$resample$Rsquared), 3), "]\n")

# Check seed info
cat("\n\n=== SEED INFORMATION ===\n")
cat("Note: The script uses set.seed(123) for reproducibility\n")
cat("This ensures the same fold splits and randomForest initialization\n")
cat("Different seeds WILL produce different results because:\n")
cat("  1. Different fold assignments in cross-validation\n")
cat("  2. Different bootstrap samples in randomForest\n")
cat("  3. Different random splits at each tree node\n")
cat("\nExpected seed sensitivity:\n")
cat("  - RMSE variation: ±1-3 t/ha (typical)\n")
cat("  - R² variation: ±0.02-0.05 (typical)\n")
cat("  - Fold-to-fold variation within single seed: see CV above\n")