{ "name": "307_dropout02_with_doy", "description": "Production Model 307: LSTM-based harvest detection (Phase 3, minimal regularization)", "model_info": { "type": "LSTM", "architecture": "Unidirectional LSTM with dual output heads (imminent + detected)", "total_parameters": 105120, "input_features": 14, "hidden_units": 256, "output_heads": 2, "training_data": "Historical multi-season CI data from multiple estates", "validation_method": "5-fold cross-validation", "device": "GPU (CUDA) or CPU fallback" }, "production_scripts": { "baseline": { "script": "01_harvest_baseline_prediction.py", "frequency": "Run ONCE during setup", "purpose": "Predict all harvest dates (ground truth baseline)", "input": "ci_data_for_python.csv (complete historical data)", "output": "harvest_production_export.xlsx", "time_estimate": "5-30 minutes depending on data volume" }, "monitoring": { "script": "02_harvest_imminent_weekly.py", "frequency": "Run WEEKLY (or daily if required)", "purpose": "Real-time harvest status and imminent alerts", "input": "ci_data_for_python.csv (recent data)", "output": "harvest_imminent_weekly.csv", "time_estimate": "1-5 minutes" } }, "features": [ "CI_raw", "7d_MA", "14d_MA", "21d_MA", "7d_velocity", "14d_velocity", "21d_velocity", "7d_min", "14d_min", "21d_min", "7d_std", "14d_std", "21d_std", "DOY_normalized" ], "model": { "type": "LSTM", "hidden_size": 256, "num_layers": 1, "dropout": 0.2 }, "training": { "imminent_days_before": 28, "imminent_days_before_end": 1, "detected_days_after_start": 1, "detected_days_after_end": 21, "k_folds": 5, "num_epochs": 150, "patience": 20, "learning_rate": 0.001, "batch_size": 4 }, "data": { "csv_path": "../lstm_complete_data.csv", "ci_column": "FitData", "test_fraction": 0.15, "seed": 42 }, "workflow_instructions": { "overview": "Model 307 uses a two-script approach: baseline setup + weekly monitoring", "step_1_baseline": { "description": "Establish historical harvest date reference for all fields", "script": "01_harvest_baseline_prediction.py", "when": "Run once after setting up CI extraction pipeline", "command": "conda activate python_gpu && python 01_harvest_baseline_prediction.py", "input_data": "ci_data_for_python.csv (all available historical CI data)", "output_file": "harvest_production_export.xlsx (ground truth baseline)", "columns": [ "field - Field ID", "sub_field - Sub-field designation", "season - Season number (1, 2, 3...)", "year - Year of harvest", "season_start_date - Start of growing season", "season_end_date - End of season (harvest date)", "phase1_harvest_date - Refined harvest prediction" ], "notes": "This becomes your reference - compare all weekly monitoring against this" }, "step_2_monitoring": { "description": "Weekly real-time harvest status and imminent alerts", "script": "02_harvest_imminent_weekly.py", "when": "Run every week (e.g., Mondays) or daily if near harvest", "command": "conda activate python_gpu && python 02_harvest_imminent_weekly.py", "input_data": "ci_data_for_python.csv (latest CI data from 02b conversion)", "output_file": "harvest_imminent_weekly.csv", "columns": [ "field - Field ID", "sub_field - Sub-field designation", "imminent_prob - Likelihood of harvest readiness in next 28 days (0.0-1.0)", "detected_prob - Current harvest probability (0.0-1.0)", "week - ISO week number", "year - Year", "as_of_date - Latest date in dataset", "num_days - Days of history used" ], "alert_thresholds": { "imminent_high": "imminent_prob > 0.7 (prepare harvest)", "imminent_medium": "imminent_prob 0.5-0.7 (monitor closely)", "detected_high": "detected_prob > 0.6 (active harvesting)" } }, "integration_with_r_pipeline": { "before_model_307": [ "Planet 8-band download: download_8band_pu_optimized.ipynb", "CI extraction: 02_ci_extraction.R", "Convert to CSV: 02b_convert_rds_to_csv.R (outputs ci_data_for_python.csv)" ], "model_307_here": [ "BASELINE: 01_harvest_baseline_prediction.py (run once)", "MONITORING: 02_harvest_imminent_weekly.py (run weekly)" ], "after_model_307": [ "Field analysis: 09b_field_analysis_weekly.R (reads harvest predictions)", "Reports: 10_CI_report_with_kpis.Rmd (includes harvest status)" ] }, "environment_requirements": { "python_env": "python_gpu", "activation": "conda activate python_gpu", "required_packages": [ "torch (GPU-enabled)", "pandas", "numpy", "scikit-learn", "pyyaml", "openpyxl" ], "gpu": "NVIDIA GPU with CUDA (optional - falls back to CPU if unavailable)" } } }