From 7b347ddba625086b6833f54f37df9ad1833bd1a2 Mon Sep 17 00:00:00 2001
From: Timon <timon@resiliencebv.com>
Date: Thu, 15 Jan 2026 09:08:07 +0100
Subject: [PATCH] Fix: Revert run_phase1_growing_window to original GitHub
 version (off-by-one fix) and move test scripts to
 harvest_detection_experiments/tests

---
 python_app/22_harvest_baseline_prediction.py  |   4 +-
 python_app/harvest_date_pred_utils.py         |   1 -
 .../tests/test_batch_model_distributions.py   | 175 +++++++++++
 .../test_growing_window_vs_single_run.py      | 272 +++++++++++++++++
 .../tests/test_model_output_distribution.py   | 276 ++++++++++++++++++
 5 files changed, 725 insertions(+), 3 deletions(-)
 create mode 100644 python_app/harvest_detection_experiments/tests/test_batch_model_distributions.py
 create mode 100644 python_app/harvest_detection_experiments/tests/test_growing_window_vs_single_run.py
 create mode 100644 python_app/harvest_detection_experiments/tests/test_model_output_distribution.py

diff --git a/python_app/22_harvest_baseline_prediction.py b/python_app/22_harvest_baseline_prediction.py
index 7a3a1e2..ac7c435 100644
--- a/python_app/22_harvest_baseline_prediction.py
+++ b/python_app/22_harvest_baseline_prediction.py
@@ -107,9 +107,9 @@ def main():
     
     # [3/4] Run model predictions with two-step detection
     print("\n[3/4] Running two-step harvest detection...")
-    print("      (Using threshold=0.45, consecutive_days=2 - tuned for Model 307 output)")
+    print("      (Using threshold=0.3, consecutive_days=2 - tuned for Model 307 output)")
     refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
-                                             phase1_threshold=0.45, phase1_consecutive=2)
+                                             phase1_threshold=0.3, phase1_consecutive=2)
 
     # Build and export
     print("\nBuilding production harvest table...")
diff --git a/python_app/harvest_date_pred_utils.py b/python_app/harvest_date_pred_utils.py
index f66e58e..ebf77c8 100644
--- a/python_app/harvest_date_pred_utils.py
+++ b/python_app/harvest_date_pred_utils.py
@@ -307,7 +307,6 @@ def run_phase1_growing_window(field_data, model, config, scalers, ci_column, dev
                 with torch.no_grad():
                     x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
                     imminent_probs, detected_probs = model(x_tensor)
-                    detected_probs = detected_probs.squeeze(0).cpu().numpy()
                 
                 # Check LAST timestep
                 last_prob = detected_probs[-1]
diff --git a/python_app/harvest_detection_experiments/tests/test_batch_model_distributions.py b/python_app/harvest_detection_experiments/tests/test_batch_model_distributions.py
new file mode 100644
index 0000000..d9540cb
--- /dev/null
+++ b/python_app/harvest_detection_experiments/tests/test_batch_model_distributions.py
@@ -0,0 +1,175 @@
+"""
+BATCH TEST: Sample multiple fields to understand model output distribution
+Purpose: Determine optimal threshold and consecutive_days parameters
+
+This runs the model on 10 random fields and summarizes the results,
+helping us decide what parameters to use in the production script.
+"""
+
+import pandas as pd
+import numpy as np
+import torch
+import sys
+from pathlib import Path
+from harvest_date_pred_utils import (
+    load_model_and_config,
+    extract_features,
+)
+
+
+def test_field(ci_data, field_id, model, config, scalers, device):
+    """Test a single field and return summary statistics"""
+    
+    field_data = ci_data[ci_data['field'] == field_id].sort_values('Date').reset_index(drop=True)
+    if len(field_data) == 0:
+        return None
+    
+    try:
+        ci_column = config['data']['ci_column']
+        features = extract_features(field_data, config['features'], ci_column=ci_column)
+        
+        # Apply scalers
+        for fi, scaler in enumerate(scalers):
+            try:
+                features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+            except:
+                pass
+        
+        # Run model
+        features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device)
+        with torch.no_grad():
+            output = model(features_tensor)
+        
+        if isinstance(output, tuple):
+            detected_probs = output[1].cpu().numpy().flatten()
+        else:
+            detected_probs = output.cpu().numpy().flatten()
+        
+        # Analyze
+        max_prob = detected_probs.max()
+        mean_prob = detected_probs.mean()
+        median_prob = np.median(detected_probs)
+        
+        # Count consecutive days above thresholds
+        consecutive_above = {}
+        for thresh in [0.2, 0.3, 0.4, 0.5]:
+            above = (detected_probs > thresh).astype(int)
+            changes = np.diff(np.concatenate(([0], above, [0])))
+            starts = np.where(changes == 1)[0]
+            ends = np.where(changes == -1)[0]
+            runs = ends - starts if len(starts) > 0 else []
+            consecutive_above[thresh] = np.max(runs) if len(runs) > 0 else 0
+        
+        return {
+            'field': field_id,
+            'max_prob': max_prob,
+            'mean_prob': mean_prob,
+            'median_prob': median_prob,
+            'consecutive_0.2': consecutive_above[0.2],
+            'consecutive_0.3': consecutive_above[0.3],
+            'consecutive_0.4': consecutive_above[0.4],
+            'consecutive_0.5': consecutive_above[0.5],
+            'num_days': len(field_data),
+        }
+    except Exception as e:
+        return None
+
+
+def main():
+    project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
+    num_samples = int(sys.argv[2]) if len(sys.argv) > 2 else 10
+    
+    # Load data
+    base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+    ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
+    CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
+    
+    if not CI_DATA_FILE.exists():
+        print(f"ERROR: {CI_DATA_FILE} not found")
+        return
+    
+    print(f"Loading CI data from {CI_DATA_FILE}...")
+    ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
+    ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+    
+    # Get random sample of fields
+    all_fields = sorted(ci_data['field'].unique())
+    np.random.seed(42)
+    sample_fields = np.random.choice(all_fields, size=min(num_samples, len(all_fields)), replace=False)
+    print(f"Testing {len(sample_fields)} random fields...")
+    
+    # Load model
+    print(f"Loading model...")
+    from harvest_date_pred_utils import load_model_and_config
+    model, config, scalers = load_model_and_config(Path("."))
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Test each field
+    results = []
+    for idx, field_id in enumerate(sample_fields, 1):
+        result = test_field(ci_data, field_id, model, config, scalers, device)
+        if result:
+            results.append(result)
+            print(f"  [{idx:2d}/{len(sample_fields)}] Field {field_id:>6s}: "
+                  f"max={result['max_prob']:.3f} mean={result['mean_prob']:.4f} "
+                  f"consec@0.3={result['consecutive_0.3']:2d} consec@0.2={result['consecutive_0.2']:2d}")
+    
+    # Summary statistics
+    results_df = pd.DataFrame(results)
+    
+    print(f"\n{'='*80}")
+    print(f"SUMMARY STATISTICS ({len(results)} fields tested):")
+    print(f"{'='*80}")
+    print(f"\nMax probability (per field):")
+    print(f"  Mean:   {results_df['max_prob'].mean():.4f}")
+    print(f"  Median: {results_df['max_prob'].median():.4f}")
+    print(f"  Min:    {results_df['max_prob'].min():.4f}")
+    print(f"  Max:    {results_df['max_prob'].max():.4f}")
+    
+    print(f"\nMean probability (per field):")
+    print(f"  Mean:   {results_df['mean_prob'].mean():.4f}")
+    print(f"  Median: {results_df['mean_prob'].median():.4f}")
+    
+    print(f"\nConsecutive days above threshold=0.3:")
+    print(f"  Mean:   {results_df['consecutive_0.3'].mean():.1f}")
+    print(f"  Median: {results_df['consecutive_0.3'].median():.1f}")
+    print(f"  Min:    {results_df['consecutive_0.3'].min():.0f}")
+    print(f"  Max:    {results_df['consecutive_0.3'].max():.0f}")
+    
+    print(f"\nConsecutive days above threshold=0.2:")
+    print(f"  Mean:   {results_df['consecutive_0.2'].mean():.1f}")
+    print(f"  Median: {results_df['consecutive_0.2'].median():.1f}")
+    print(f"  Min:    {results_df['consecutive_0.2'].min():.0f}")
+    print(f"  Max:    {results_df['consecutive_0.2'].max():.0f}")
+    
+    print(f"\n{'='*80}")
+    print(f"RECOMMENDATION:")
+    print(f"{'='*80}")
+    
+    # Calculate recommendations based on data
+    median_consec_0_3 = results_df['consecutive_0.3'].median()
+    median_consec_0_2 = results_df['consecutive_0.2'].median()
+    
+    if median_consec_0_3 >= 3:
+        print(f"✓ Threshold=0.3 with consecutive_days=3 should work")
+        print(f"  (median consecutive days: {median_consec_0_3:.0f})")
+    elif median_consec_0_3 >= 2:
+        print(f"✓ Threshold=0.3 with consecutive_days=2 recommended")
+        print(f"  (median consecutive days: {median_consec_0_3:.0f})")
+    elif median_consec_0_2 >= 3:
+        print(f"✓ Threshold=0.2 with consecutive_days=3 recommended")
+        print(f"  (median consecutive days: {median_consec_0_2:.0f})")
+    else:
+        print(f"✓ Threshold=0.2 with consecutive_days=2 recommended")
+        print(f"  (median consecutive days @ 0.2: {median_consec_0_2:.0f})")
+    
+    print(f"\nCurrent production settings: threshold=0.45, consecutive_days=2")
+    print(f"  → These are likely TOO STRICT (only 289 fields detected in batch run)")
+    print(f"\nSuggested adjustment:")
+    print(f"  → Lower threshold to 0.2-0.3")
+    print(f"  → Reduce consecutive_days to 1-2")
+    print(f"  → Re-run batch to get ~1000+ fields detected")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python_app/harvest_detection_experiments/tests/test_growing_window_vs_single_run.py b/python_app/harvest_detection_experiments/tests/test_growing_window_vs_single_run.py
new file mode 100644
index 0000000..bcda46d
--- /dev/null
+++ b/python_app/harvest_detection_experiments/tests/test_growing_window_vs_single_run.py
@@ -0,0 +1,272 @@
+"""
+COMPARISON TEST: Growing Window vs Single Run approach
+Purpose: Compare harvest dates detected by two different methods
+
+Method 1 (Growing Window - Current):
+  - Day 1: Run model on [day1]
+  - Day 2: Run model on [day1:2]
+  - Day 3: Run model on [day1:3]
+  - ... up to day 477
+  - This matches real-time production where data arrives daily
+  - Takes ~477 model runs per field (SLOW)
+
+Method 2 (Single Run - Proposed):
+  - Run model ONCE on full [day1:477] sequence
+  - Use these probabilities to scan for harvests
+  - This is 477x faster but assumes different LSTM context
+  - May produce different harvest dates
+
+Question: Do these methods detect similar harvest dates or different ones?
+"""
+
+import pandas as pd
+import numpy as np
+import torch
+import sys
+from pathlib import Path
+from harvest_date_pred_utils import (
+    load_model_and_config,
+    extract_features,
+)
+import time
+
+
+def method_growing_window(field_data, model, config, scalers, ci_column, device, threshold=0.3, consecutive_days=2):
+    """Original method: expanding window, run model multiple times"""
+    harvest_dates = []
+    current_pos = 0
+    
+    while current_pos < len(field_data):
+        consecutive_above_threshold = 0
+        
+        for window_end in range(current_pos + 1, len(field_data) + 1):
+            window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True)
+            
+            try:
+                features = extract_features(window_data, config['features'], ci_column=ci_column)
+                
+                # Apply scalers
+                for fi, scaler in enumerate(scalers):
+                    try:
+                        features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+                    except Exception:
+                        pass
+                
+                # Run model
+                with torch.no_grad():
+                    x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
+                    output = model(x_tensor)
+                
+                if isinstance(output, tuple):
+                    _, detected_probs = output
+                    detected_probs = detected_probs.squeeze(0).cpu().numpy()
+                else:
+                    detected_probs = output.squeeze(0).cpu().numpy()
+                
+                # Check LAST timestep
+                last_prob = detected_probs[-1]
+                
+                if last_prob > threshold:
+                    consecutive_above_threshold += 1
+                else:
+                    consecutive_above_threshold = 0
+                
+                # Harvest detected
+                if consecutive_above_threshold >= consecutive_days:
+                    harvest_idx = current_pos + window_end - consecutive_days - 1
+                    harvest_date = field_data.iloc[harvest_idx]['Date']
+                    harvest_dates.append((harvest_date, harvest_idx, last_prob))
+                    
+                    # Reset to next day after harvest
+                    current_pos = current_pos + window_end - consecutive_days
+                    break
+                    
+            except Exception:
+                continue
+        else:
+            break
+    
+    return harvest_dates
+
+
+def method_single_run(field_data, model, config, scalers, ci_column, device, threshold=0.3, consecutive_days=2):
+    """Proposed method: run model once on full sequence"""
+    harvest_dates = []
+    current_pos = 0
+    
+    try:
+        # Extract features ONCE for full dataset
+        features = extract_features(field_data, config['features'], ci_column=ci_column)
+        
+        # Apply scalers
+        for fi, scaler in enumerate(scalers):
+            try:
+                features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+            except Exception:
+                pass
+        
+        # Run model ONCE to get all probabilities
+        with torch.no_grad():
+            x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
+            output = model(x_tensor)
+        
+        if isinstance(output, tuple):
+            _, detected_probs = output
+            detected_probs = detected_probs.squeeze(0).cpu().numpy()
+        else:
+            detected_probs = output.squeeze(0).cpu().numpy()
+        
+        # Scan forward looking for harvests
+        while current_pos < len(field_data):
+            consecutive_above_threshold = 0
+            
+            for pos in range(current_pos, len(field_data)):
+                prob = detected_probs[pos]
+                
+                if prob > threshold:
+                    consecutive_above_threshold += 1
+                else:
+                    consecutive_above_threshold = 0
+                
+                # Harvest detected
+                if consecutive_above_threshold >= consecutive_days:
+                    harvest_idx = pos - consecutive_days + 1
+                    harvest_date = field_data.iloc[harvest_idx]['Date']
+                    harvest_dates.append((harvest_date, harvest_idx, prob))
+                    
+                    # Move anchor point past this harvest
+                    current_pos = harvest_idx + 1
+                    break
+            else:
+                # No harvest found in remaining data
+                break
+    
+    except Exception as e:
+        pass
+    
+    return harvest_dates
+
+
+def compare_field(field_id, ci_data, model, config, scalers, device, threshold=0.3):
+    """Compare both methods for a single field"""
+    field_data = ci_data[ci_data['field'] == field_id].sort_values('Date').reset_index(drop=True)
+    
+    if len(field_data) < 10:
+        return None
+    
+    ci_column = config['data']['ci_column']
+    
+    # Method 1: Growing window (SLOW)
+    print(f"\n  Growing Window method...", end=" ", flush=True)
+    start = time.time()
+    growing_harvests = method_growing_window(field_data, model, config, scalers, ci_column, device, threshold)
+    time_growing = time.time() - start
+    print(f"({time_growing:.2f}s, {len(field_data)} model runs)")
+    
+    # Method 2: Single run (FAST)
+    print(f"  Single Run method...", end=" ", flush=True)
+    start = time.time()
+    single_harvests = method_single_run(field_data, model, config, scalers, ci_column, device, threshold)
+    time_single = time.time() - start
+    print(f"({time_single:.2f}s, 1 model run)")
+    
+    return {
+        'field': field_id,
+        'num_days': len(field_data),
+        'growing_harvests': growing_harvests,
+        'single_harvests': single_harvests,
+        'time_growing': time_growing,
+        'time_single': time_single,
+        'speedup': time_growing / time_single if time_single > 0 else 0,
+    }
+
+
+def main():
+    project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
+    num_samples = int(sys.argv[2]) if len(sys.argv) > 2 else 3
+    threshold = float(sys.argv[3]) if len(sys.argv) > 3 else 0.3
+    
+    # Load data
+    base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+    ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
+    CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
+    
+    if not CI_DATA_FILE.exists():
+        print(f"ERROR: {CI_DATA_FILE} not found")
+        return
+    
+    print(f"Loading CI data...")
+    ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
+    ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+    
+    # Get sample of fields
+    all_fields = sorted(ci_data['field'].unique())
+    np.random.seed(42)
+    sample_fields = np.random.choice(all_fields, size=min(num_samples, len(all_fields)), replace=False)
+    
+    # Load model
+    print(f"Loading model...")
+    model, config, scalers = load_model_and_config(Path("."))
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    print(f"\n{'='*80}")
+    print(f"COMPARING METHODS (threshold={threshold}, consecutive_days=2)")
+    print(f"{'='*80}")
+    
+    all_results = []
+    for idx, field_id in enumerate(sample_fields, 1):
+        print(f"\n[{idx}/{len(sample_fields)}] Field {field_id}")
+        result = compare_field(field_id, ci_data, model, config, scalers, device, threshold)
+        
+        if result:
+            all_results.append(result)
+            
+            print(f"\n    Growing Window found {len(result['growing_harvests'])} harvests:")
+            for date, idx, prob in result['growing_harvests']:
+                print(f"      - {date.date()}: prob={prob:.4f}")
+            
+            print(f"\n    Single Run found {len(result['single_harvests'])} harvests:")
+            for date, idx, prob in result['single_harvests']:
+                print(f"      - {date.date()}: prob={prob:.4f}")
+            
+            print(f"\n    ⏱ Speed comparison:")
+            print(f"      Growing Window: {result['time_growing']:.2f}s ({result['num_days']} days processed)")
+            print(f"      Single Run:     {result['time_single']:.2f}s (1 run)")
+            print(f"      Speedup:        {result['speedup']:.0f}x faster")
+            
+            # Compare harvests
+            growing_dates = [d for d, _, _ in result['growing_harvests']]
+            single_dates = [d for d, _, _ in result['single_harvests']]
+            
+            if growing_dates == single_dates:
+                print(f"\n    ✓ IDENTICAL: Both methods found the same harvest dates")
+            else:
+                print(f"\n    ✗ DIFFERENT: Methods found different harvest dates")
+                print(f"      Growing only: {[d for d in growing_dates if d not in single_dates]}")
+                print(f"      Single only:  {[d for d in single_dates if d not in growing_dates]}")
+    
+    # Summary
+    print(f"\n{'='*80}")
+    print(f"SUMMARY ({len(all_results)} fields tested)")
+    print(f"{'='*80}")
+    
+    identical = sum(1 for r in all_results if [d for d, _, _ in r['growing_harvests']] == [d for d, _, _ in r['single_harvests']])
+    different = len(all_results) - identical
+    
+    print(f"\nIdentical results: {identical}/{len(all_results)}")
+    print(f"Different results: {different}/{len(all_results)}")
+    
+    if all_results:
+        avg_speedup = np.mean([r['speedup'] for r in all_results])
+        print(f"\nAverage speedup: {avg_speedup:.0f}x")
+        print(f"\nConclusion:")
+        if identical == len(all_results):
+            print(f"  ✓ Methods are EQUIVALENT - Single run approach is safe to use")
+            print(f"    Recommend switching to single run for {avg_speedup:.0f}x faster execution")
+        else:
+            print(f"  ✗ Methods produce DIFFERENT results - Need to understand why")
+            print(f"    Growing window is slower but may be more accurate for live deployment")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python_app/harvest_detection_experiments/tests/test_model_output_distribution.py b/python_app/harvest_detection_experiments/tests/test_model_output_distribution.py
new file mode 100644
index 0000000..571a32c
--- /dev/null
+++ b/python_app/harvest_detection_experiments/tests/test_model_output_distribution.py
@@ -0,0 +1,276 @@
+"""
+TEST SCRIPT: Inspect raw model output distributions for a single field
+Purpose: Diagnose why thresholding is failing and harvest dates are wrong
+
+This shows:
+1. Distribution of harvest probability scores (0-1 range)
+2. How often consecutive_days=3 is actually achievable
+3. Actual season boundaries detected
+4. Recommendations for threshold adjustment
+
+Usage:
+  python test_model_output_distribution.py angata [field_id]
+  
+Examples:
+  python test_model_output_distribution.py angata 1
+  python test_model_output_distribution.py angata 10042
+"""
+
+import pandas as pd
+import numpy as np
+import torch
+import sys
+from pathlib import Path
+from harvest_date_pred_utils import (
+    load_model_and_config,
+    extract_features,
+)
+
+
+def analyze_single_field(ci_data, field_id, model, config, scalers, device):
+    """Analyze raw model outputs for a single field"""
+    
+    # Filter to field
+    field_data = ci_data[ci_data['field'] == field_id].sort_values('Date').reset_index(drop=True)
+    
+    if len(field_data) == 0:
+        print(f"ERROR: No data for field {field_id}")
+        return
+    
+    print(f"\n{'='*80}")
+    print(f"FIELD: {field_id}")
+    print(f"{'='*80}")
+    print(f"Date range: {field_data['Date'].min()} to {field_data['Date'].max()}")
+    print(f"Total days: {len(field_data)}")
+    
+    # Extract features (same as main pipeline)
+    try:
+        ci_column = config['data']['ci_column']
+        features = extract_features(field_data, config['features'], ci_column=ci_column)
+        if features is None or len(features) == 0:
+            print(f"ERROR: extract_features returned empty for field {field_id}")
+            return
+    except Exception as e:
+        print(f"ERROR extracting features: {e}")
+        import traceback
+        traceback.print_exc()
+        return
+    
+    # Apply scalers (CRITICAL - same as production code)
+    try:
+        for fi, scaler in enumerate(scalers):
+            try:
+                features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+            except Exception as e:
+                print(f"  Warning: Scaler {fi} failed: {e}")
+                pass
+    except Exception as e:
+        print(f"ERROR applying scalers: {e}")
+        import traceback
+        traceback.print_exc()
+        return
+    
+    # Run model
+    features_tensor = torch.FloatTensor(features).unsqueeze(0).to(device)
+    with torch.no_grad():
+        output = model(features_tensor)
+    
+    # Convert to numpy (handle different output formats)
+    # Model has TWO heads: imminent_probs, detected_probs
+    if isinstance(output, tuple):
+        imminent_probs = output[0].cpu().numpy().flatten()
+        detected_probs = output[1].cpu().numpy().flatten()
+        probs = detected_probs  # Use DETECTED head (current harvest), not imminent (future)
+        print(f"Model outputs TWO heads: imminent + detected")
+        print(f"  Using DETECTED head for harvest detection (sparse spikes at harvest)")
+    else:
+        # Fallback for single output
+        probs = output.cpu().numpy().flatten()
+        print(f"Model output: single head")
+    
+    print(f"\nModel output shape: {probs.shape}")
+    print(f"Output range: {probs.min():.4f} to {probs.max():.4f}")
+    
+    # Statistics
+    print(f"\nHarvest probability statistics:")
+    print(f"  Mean: {probs.mean():.4f}")
+    print(f"  Median: {np.median(probs):.4f}")
+    print(f"  Std Dev: {probs.std():.4f}")
+    print(f"  Min: {probs.min():.4f}")
+    print(f"  Max: {probs.max():.4f}")
+    
+    # Distribution by threshold
+    print(f"\nDistribution by threshold:")
+    thresholds = [0.2, 0.3, 0.4, 0.45, 0.5, 0.6, 0.7]
+    for thresh in thresholds:
+        count = np.sum(probs > thresh)
+        pct = 100 * count / len(probs)
+        print(f"  > {thresh}: {count:4d} days ({pct:5.1f}%)")
+    
+    # Consecutive days analysis (key metric)
+    print(f"\nConsecutive days above threshold (Phase 1 requirement):")
+    for thresh in [0.3, 0.4, 0.45, 0.5]:
+        above = (probs > thresh).astype(int)
+        # Find consecutive runs
+        changes = np.diff(np.concatenate(([0], above, [0])))
+        starts = np.where(changes == 1)[0]
+        ends = np.where(changes == -1)[0]
+        runs = ends - starts
+        
+        if len(runs) > 0:
+            max_run = np.max(runs)
+            print(f"  Threshold {thresh}: max consecutive = {max_run} days (need 3 for Phase 1)")
+            if len(runs) > 5:
+                print(f"               {len(runs)} separate runs detected (seasons?)")
+        else:
+            print(f"  Threshold {thresh}: no runs detected")
+    
+    # Show top predictions
+    print(f"\nTop 10 harvest probability peaks:")
+    top_indices = np.argsort(probs)[-10:][::-1]
+    for rank, idx in enumerate(top_indices, 1):
+        date = field_data.iloc[idx]['Date']
+        prob = probs[idx]
+        print(f"  {rank:2d}. Day {idx:4d} ({date}): {prob:.4f}")
+    
+    # Show timeline
+    print(f"\nTimeline of probabilities (sampling every 10 days):")
+    for idx in range(0, len(probs), max(1, len(probs) // 20)):
+        date_str = field_data.iloc[idx]['Date'].strftime("%Y-%m-%d")
+        ci_value = field_data.iloc[idx]['FitData']
+        prob = probs[idx]
+        bar = '-' * int(prob * 35)
+        try:
+            print(f"  {date_str} CI={ci_value:.4f} Prob={prob:.4f} {bar}")
+        except UnicodeEncodeError:
+            print(f"  {date_str} CI={ci_value:.4f} Prob={prob:.4f}")
+    
+    # FULL DAILY PROBABILITIES WITH CI VALUES
+    print(f"\n{'='*80}")
+    print(f"FULL DAILY PROBABILITIES WITH CI VALUES ({len(probs)} days):")
+    print(f"{'='*80}")
+    print(f"{'Day':>4} {'Date':<12} {'CI':>8} {'Probability':>12} {'Visual':<40}")
+    print(f"{'-'*100}")
+    
+    for idx in range(len(probs)):
+        date_str = field_data.iloc[idx]['Date'].strftime("%Y-%m-%d")
+        ci_value = field_data.iloc[idx]['FitData']
+        prob = probs[idx]
+        bar = '-' * int(prob * 35)  # Use dashes instead of█ for Unicode safety
+        try:
+            print(f"{idx:4d} {date_str} {ci_value:8.4f} {prob:12.4f} {bar}")
+        except UnicodeEncodeError:
+            # Fallback for Windows encoding issues
+            print(f"{idx:4d} {date_str} {ci_value:8.4f} {prob:12.4f}")
+    
+    print(f"{'-'*100}")
+    
+    # Find valleys (days with low probabilities that could indicate season boundaries)
+    print(f"\nDays with LOWEST probabilities (potential season boundaries):")
+    valleys_threshold = 0.5  # Days below this might be season breaks
+    valley_indices = np.where(probs < valleys_threshold)[0]
+    
+    if len(valley_indices) > 0:
+        print(f"  Found {len(valley_indices)} days below {valleys_threshold}")
+        # Get valleys sorted by probability
+        valley_data = [(idx, probs[idx], field_data.iloc[idx]['Date']) for idx in valley_indices]
+        valley_data.sort(key=lambda x: x[1])  # Sort by probability (lowest first)
+        
+        print(f"\n  Bottom 20 lowest-probability days:")
+        for rank, (idx, prob, date) in enumerate(valley_data[:20], 1):
+            print(f"    {rank:2d}. Day {idx:3d} ({date}): {prob:.4f}")
+    else:
+        print(f"  None - all days above {valleys_threshold}")
+    
+    # Identify likely harvest dates by finding local minima (valleys between growing periods)
+    print(f"\nLikely season boundaries (local minima approach):")
+    # Find indices where probability suddenly drops (derivative)
+    if len(probs) > 7:
+        smoothed = pd.Series(probs).rolling(window=7, center=True).mean()
+        derivatives = smoothed.diff().fillna(0)
+        
+        # Find big drops (where derivative is very negative)
+        drops = np.where(derivatives < -0.2)[0]  # Significant downward moves
+        
+        if len(drops) > 0:
+            print(f"  Found {len(drops)} significant drops (prob falling by 0.2+):")
+            for idx in drops[:10]:  # Show first 10
+                date = field_data.iloc[idx]['Date']
+                before = probs[max(0, idx-1)]
+                after = probs[idx]
+                print(f"    Day {idx:3d} ({date}): {before:.4f} → {after:.4f}")
+        else:
+            print(f"  No significant drops detected (probabilities don't dip much)")
+    
+    # Show which harvest dates would be detected at different thresholds
+    print(f"\nHarvest detection (first day where prob > threshold for N consecutive days):")
+    for thresh in [0.2, 0.3, 0.4, 0.5, 0.6]:
+        for consec in [1, 2, 3]:
+            above = (probs > thresh).astype(int)
+            changes = np.diff(np.concatenate(([0], above, [0])))
+            starts = np.where(changes == 1)[0]
+            
+            if len(starts) > 0:
+                # For each harvest start, find where it would trigger with consecutive_days
+                detected_harvests = []
+                for start_idx in starts:
+                    # Check if we have enough consecutive days
+                    if start_idx + consec - 1 < len(probs):
+                        if all(probs[start_idx:start_idx + consec] > thresh):
+                            harvest_date = field_data.iloc[start_idx]['Date']
+                            detected_harvests.append((start_idx, harvest_date))
+                
+                if detected_harvests:
+                    first_idx, first_date = detected_harvests[0]
+                    print(f"  Threshold={thresh}, consecutive={consec}: {first_date} (day {first_idx})")
+            else:
+                print(f"  Threshold={thresh}, consecutive={consec}: None detected")
+
+
+def main():
+    project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
+    field_id = sys.argv[2] if len(sys.argv) > 2 else None
+    
+    # Paths
+    base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+    ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
+    CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
+    
+    if not CI_DATA_FILE.exists():
+        print(f"ERROR: {CI_DATA_FILE} not found")
+        return
+    
+    print(f"Loading CI data from {CI_DATA_FILE}...")
+    ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
+    ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+    
+    if field_id is None:
+        # Show first 10 fields if none specified
+        fields = sorted(ci_data['field'].unique())[:10]
+        field_id = fields[0]
+        print(f"No field specified. Testing first field: {field_id}")
+        print(f"Available fields: {', '.join(fields)}")
+    
+    # Load model
+    print(f"\nLoading model...")
+    from harvest_date_pred_utils import load_model_and_config
+    model, config, scalers = load_model_and_config(Path("."))
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Device: {device}")
+    
+    # Analyze
+    analyze_single_field(ci_data, str(field_id), model, config, scalers, device)
+    
+    print(f"\n{'='*80}")
+    print("INTERPRETATION:")
+    print("  If max consecutive < 3 for threshold=0.5:")
+    print("    → Lower threshold to 0.3-0.4, or reduce consecutive_days to 1-2")
+    print("  If multiple runs detected but harvest_date == first date:")
+    print("    → Season detection is failing (check extract_features)")
+    print("  If peaks are scattered randomly:")
+    print("    → Model may need retraining or data validation")
+    print("="*80)
+
+
+if __name__ == "__main__":
+    main()