SmartCane/python_app/model_config.json

144 lines
5.1 KiB
JSON

{
"name": "307_dropout02_with_doy",
"description": "Production Model 307: LSTM-based harvest detection (Phase 3, minimal regularization)",
"model_info": {
"type": "LSTM",
"architecture": "Unidirectional LSTM with dual output heads (imminent + detected)",
"total_parameters": 105120,
"input_features": 14,
"hidden_units": 256,
"output_heads": 2,
"training_data": "Historical multi-season CI data from multiple estates",
"validation_method": "5-fold cross-validation",
"device": "GPU (CUDA) or CPU fallback"
},
"production_scripts": {
"baseline": {
"script": "01_harvest_baseline_prediction.py",
"frequency": "Run ONCE during setup",
"purpose": "Predict all harvest dates (ground truth baseline)",
"input": "ci_data_for_python.csv (complete historical data)",
"output": "harvest_production_export.xlsx",
"time_estimate": "5-30 minutes depending on data volume"
},
"monitoring": {
"script": "02_harvest_imminent_weekly.py",
"frequency": "Run WEEKLY (or daily if required)",
"purpose": "Real-time harvest status and imminent alerts",
"input": "ci_data_for_python.csv (recent data)",
"output": "harvest_imminent_weekly.csv",
"time_estimate": "1-5 minutes"
}
},
"features": [
"CI_raw",
"7d_MA",
"14d_MA",
"21d_MA",
"7d_velocity",
"14d_velocity",
"21d_velocity",
"7d_min",
"14d_min",
"21d_min",
"7d_std",
"14d_std",
"21d_std",
"DOY_normalized"
],
"model": {
"type": "LSTM",
"hidden_size": 256,
"num_layers": 1,
"dropout": 0.2
},
"training": {
"imminent_days_before": 28,
"imminent_days_before_end": 1,
"detected_days_after_start": 1,
"detected_days_after_end": 21,
"k_folds": 5,
"num_epochs": 150,
"patience": 20,
"learning_rate": 0.001,
"batch_size": 4
},
"data": {
"csv_path": "../lstm_complete_data.csv",
"ci_column": "FitData",
"test_fraction": 0.15,
"seed": 42
},
"workflow_instructions": {
"overview": "Model 307 uses a two-script approach: baseline setup + weekly monitoring",
"step_1_baseline": {
"description": "Establish historical harvest date reference for all fields",
"script": "01_harvest_baseline_prediction.py",
"when": "Run once after setting up CI extraction pipeline",
"command": "conda activate python_gpu && python 01_harvest_baseline_prediction.py",
"input_data": "ci_data_for_python.csv (all available historical CI data)",
"output_file": "harvest_production_export.xlsx (ground truth baseline)",
"columns": [
"field - Field ID",
"sub_field - Sub-field designation",
"season - Season number (1, 2, 3...)",
"year - Year of harvest",
"season_start_date - Start of growing season",
"season_end_date - End of season (harvest date)",
"phase1_harvest_date - Refined harvest prediction"
],
"notes": "This becomes your reference - compare all weekly monitoring against this"
},
"step_2_monitoring": {
"description": "Weekly real-time harvest status and imminent alerts",
"script": "02_harvest_imminent_weekly.py",
"when": "Run every week (e.g., Mondays) or daily if near harvest",
"command": "conda activate python_gpu && python 02_harvest_imminent_weekly.py",
"input_data": "ci_data_for_python.csv (latest CI data from 02b conversion)",
"output_file": "harvest_imminent_weekly.csv",
"columns": [
"field - Field ID",
"sub_field - Sub-field designation",
"imminent_prob - Likelihood of harvest readiness in next 28 days (0.0-1.0)",
"detected_prob - Current harvest probability (0.0-1.0)",
"week - ISO week number",
"year - Year",
"as_of_date - Latest date in dataset",
"num_days - Days of history used"
],
"alert_thresholds": {
"imminent_high": "imminent_prob > 0.7 (prepare harvest)",
"imminent_medium": "imminent_prob 0.5-0.7 (monitor closely)",
"detected_high": "detected_prob > 0.6 (active harvesting)"
}
},
"integration_with_r_pipeline": {
"before_model_307": [
"Planet 8-band download: download_8band_pu_optimized.ipynb",
"CI extraction: 02_ci_extraction.R",
"Convert to CSV: 02b_convert_rds_to_csv.R (outputs ci_data_for_python.csv)"
],
"model_307_here": [
"BASELINE: 01_harvest_baseline_prediction.py (run once)",
"MONITORING: 02_harvest_imminent_weekly.py (run weekly)"
],
"after_model_307": [
"Field analysis: 09b_field_analysis_weekly.R (reads harvest predictions)",
"Reports: 10_CI_report_with_kpis.Rmd (includes harvest status)"
]
},
"environment_requirements": {
"python_env": "python_gpu",
"activation": "conda activate python_gpu",
"required_packages": [
"torch (GPU-enabled)",
"pandas",
"numpy",
"scikit-learn",
"pyyaml",
"openpyxl"
],
"gpu": "NVIDIA GPU with CUDA (optional - falls back to CPU if unavailable)"
}
}
}