144 lines
5.1 KiB
JSON
144 lines
5.1 KiB
JSON
{
|
|
"name": "307_dropout02_with_doy",
|
|
"description": "Production Model 307: LSTM-based harvest detection (Phase 3, minimal regularization)",
|
|
"model_info": {
|
|
"type": "LSTM",
|
|
"architecture": "Unidirectional LSTM with dual output heads (imminent + detected)",
|
|
"total_parameters": 105120,
|
|
"input_features": 14,
|
|
"hidden_units": 256,
|
|
"output_heads": 2,
|
|
"training_data": "Historical multi-season CI data from multiple estates",
|
|
"validation_method": "5-fold cross-validation",
|
|
"device": "GPU (CUDA) or CPU fallback"
|
|
},
|
|
"production_scripts": {
|
|
"baseline": {
|
|
"script": "01_harvest_baseline_prediction.py",
|
|
"frequency": "Run ONCE during setup",
|
|
"purpose": "Predict all harvest dates (ground truth baseline)",
|
|
"input": "ci_data_for_python.csv (complete historical data)",
|
|
"output": "harvest_production_export.xlsx",
|
|
"time_estimate": "5-30 minutes depending on data volume"
|
|
},
|
|
"monitoring": {
|
|
"script": "02_harvest_imminent_weekly.py",
|
|
"frequency": "Run WEEKLY (or daily if required)",
|
|
"purpose": "Real-time harvest status and imminent alerts",
|
|
"input": "ci_data_for_python.csv (recent data)",
|
|
"output": "harvest_imminent_weekly.csv",
|
|
"time_estimate": "1-5 minutes"
|
|
}
|
|
},
|
|
"features": [
|
|
"CI_raw",
|
|
"7d_MA",
|
|
"14d_MA",
|
|
"21d_MA",
|
|
"7d_velocity",
|
|
"14d_velocity",
|
|
"21d_velocity",
|
|
"7d_min",
|
|
"14d_min",
|
|
"21d_min",
|
|
"7d_std",
|
|
"14d_std",
|
|
"21d_std",
|
|
"DAH_normalized"
|
|
],
|
|
"model": {
|
|
"type": "LSTM",
|
|
"hidden_size": 256,
|
|
"num_layers": 1,
|
|
"dropout": 0.2
|
|
},
|
|
"training": {
|
|
"imminent_days_before": 28,
|
|
"imminent_days_before_end": 1,
|
|
"detected_days_after_start": 1,
|
|
"detected_days_after_end": 21,
|
|
"k_folds": 5,
|
|
"num_epochs": 150,
|
|
"patience": 20,
|
|
"learning_rate": 0.001,
|
|
"batch_size": 4
|
|
},
|
|
"data": {
|
|
"csv_path": "../lstm_complete_data.csv",
|
|
"ci_column": "FitData",
|
|
"test_fraction": 0.15,
|
|
"seed": 42
|
|
},
|
|
"workflow_instructions": {
|
|
"overview": "Model 307 uses a two-script approach: baseline setup + weekly monitoring",
|
|
"step_1_baseline": {
|
|
"description": "Establish historical harvest date reference for all fields",
|
|
"script": "01_harvest_baseline_prediction.py",
|
|
"when": "Run once after setting up CI extraction pipeline",
|
|
"command": "conda activate python_gpu && python 01_harvest_baseline_prediction.py",
|
|
"input_data": "ci_data_for_python.csv (all available historical CI data)",
|
|
"output_file": "harvest_production_export.xlsx (ground truth baseline)",
|
|
"columns": [
|
|
"field - Field ID",
|
|
"sub_field - Sub-field designation",
|
|
"season - Season number (1, 2, 3...)",
|
|
"year - Year of harvest",
|
|
"season_start_date - Start of growing season",
|
|
"season_end_date - End of season (harvest date)",
|
|
"phase1_harvest_date - Refined harvest prediction"
|
|
],
|
|
"notes": "This becomes your reference - compare all weekly monitoring against this"
|
|
},
|
|
"step_2_monitoring": {
|
|
"description": "Weekly real-time harvest status and imminent alerts",
|
|
"script": "02_harvest_imminent_weekly.py",
|
|
"when": "Run every week (e.g., Mondays) or daily if near harvest",
|
|
"command": "conda activate python_gpu && python 02_harvest_imminent_weekly.py",
|
|
"input_data": "ci_data_for_python.csv (latest CI data from 02b conversion)",
|
|
"output_file": "harvest_imminent_weekly.csv",
|
|
"columns": [
|
|
"field - Field ID",
|
|
"sub_field - Sub-field designation",
|
|
"imminent_prob - Likelihood of harvest readiness in next 28 days (0.0-1.0)",
|
|
"detected_prob - Current harvest probability (0.0-1.0)",
|
|
"week - ISO week number",
|
|
"year - Year",
|
|
"as_of_date - Latest date in dataset",
|
|
"num_days - Days of history used"
|
|
],
|
|
"alert_thresholds": {
|
|
"imminent_high": "imminent_prob > 0.7 (prepare harvest)",
|
|
"imminent_medium": "imminent_prob 0.5-0.7 (monitor closely)",
|
|
"detected_high": "detected_prob > 0.6 (active harvesting)"
|
|
}
|
|
},
|
|
"integration_with_r_pipeline": {
|
|
"before_model_307": [
|
|
"Planet 8-band download: download_8band_pu_optimized.ipynb",
|
|
"CI extraction: 02_ci_extraction.R",
|
|
"Convert to CSV: 02b_convert_rds_to_csv.R (outputs ci_data_for_python.csv)"
|
|
],
|
|
"model_307_here": [
|
|
"BASELINE: 01_harvest_baseline_prediction.py (run once)",
|
|
"MONITORING: 02_harvest_imminent_weekly.py (run weekly)"
|
|
],
|
|
"after_model_307": [
|
|
"Field analysis: 09b_field_analysis_weekly.R (reads harvest predictions)",
|
|
"Reports: 10_CI_report_with_kpis.Rmd (includes harvest status)"
|
|
]
|
|
},
|
|
"environment_requirements": {
|
|
"python_env": "python_gpu",
|
|
"activation": "conda activate python_gpu",
|
|
"required_packages": [
|
|
"torch (GPU-enabled)",
|
|
"pandas",
|
|
"numpy",
|
|
"scikit-learn",
|
|
"pyyaml",
|
|
"openpyxl"
|
|
],
|
|
"gpu": "NVIDIA GPU with CUDA (optional - falls back to CPU if unavailable)"
|
|
}
|
|
}
|
|
} |