Enhance harvest detection logic and testing framework

- Updated `detect_mosaic_mode` function to check for grid-size subdirectories in addition to tile-named files. - Added comprehensive tests for DOY reset logic in `test_doy_logic.py`. - Implemented feature extraction tests in `test_feature_extraction.py`. - Created tests for growing window method in `test_growing_window_only.py`. - Developed a complete model inference test in `test_model_inference.py`. - Added a debug script for testing two-step refinement logic in `test_script22_debug.py`.
2026-01-15 14:30:54 +01:00 · 2026-01-15 14:30:54 +01:00 · fabbf3214d
parent d6448b8703
commit fabbf3214d
11 changed files with 1416 additions and 187 deletions
--- a/python_app/22_harvest_baseline_prediction.py
+++ b/python_app/22_harvest_baseline_prediction.py
@ -26,7 +26,9 @@ This is your GROUND TRUTH - compare all future predictions against this baseline

 Usage:
  python 01_harvest_baseline_prediction.py [project_name]
-  conda activate pytorch_gpu; cd "C:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane_v2\smartcane\python_app"; python 22_harvest_baseline_prediction.py angata
+  conda activate pytorch_gpu
+  cd python_app
+  python 22_harvest_baseline_prediction.py angata
  
  Examples:
    python 01_harvest_baseline_prediction.py angata
@ -108,7 +110,7 @@ def main():
    
    # [3/4] Run model predictions with two-step detection
    print("\n[3/4] Running two-step harvest detection...")
-    print("      (Using threshold=0.3, consecutive_days=2 - tuned for Model 307 output)")
+    print("      (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)")
    refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
                                             phase1_threshold=0.3, phase1_consecutive=2)

--- a/python_app/harvest_date_pred_utils.py
+++ b/python_app/harvest_date_pred_utils.py
@ -184,18 +184,47 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
    return features.fillna(0)


-def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: str = 'FitData') -> np.ndarray:
-    """Extract and return specified features as numpy array."""
+def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: str = 'FitData', 
+                     season_anchor_day: int = None, lookback_start: int = 0) -> np.ndarray:
+    """
+    Extract and return specified features as numpy array.
+    
+    Args:
+        data_df: DataFrame with Date and CI data (may be a window after a harvest)
+        feature_names: List of feature names to extract
+        ci_column: Name of CI column
+        season_anchor_day: Day in FULL sequence where this season started (for DOY reset)
+                          DOY will be recalculated as: 1, 2, 3, ... from this point
+        lookback_start: Starting index in original full data (for season reset calculation)
+    
+    Returns:
+        NumPy array of shape (len(data_df), len(feature_names))
+    """
    # Compute all CI features
    ci_series = data_df[ci_column].astype(float)
-    doy_series = pd.Series(range(len(data_df)), index=data_df.index) % 365 if 'DOY_normalized' in feature_names else None
+    
+    # Compute DOY (age/days since season start) - NOT day-of-year!
+    # DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
+    # It only resets to 1 after a harvest is detected (new season)
+    doy_series = None
+    if 'DOY_normalized' in feature_names:
+        if season_anchor_day is not None and lookback_start >= season_anchor_day:
+            # Season was reset after harvest. Recalculate DOY as simple counter from 1
+            # This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ...
+            doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
+        elif 'DOY' in data_df.columns:
+            # Use DOY directly from CSV - already calculated as continuous age counter
+            doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index)
+        else:
+            # Fallback: create continuous age counter (1, 2, 3, ...)
+            doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
    
    all_features = compute_ci_features(ci_series, doy_series)
    
    # Select requested features
    requested = [f for f in feature_names if f in all_features.columns]
    if not requested:
-        raise ValueError(f"No valid features found. Requested: {feature_names}")
+        raise ValueError(f"No valid features found. Requested: {feature_names}, Available: {all_features.columns.tolist()}")
    
    return all_features[requested].values

@ -274,42 +303,63 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
 def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device, 
                              threshold=0.45, consecutive_days=2):
    """
-    Phase 1: Growing window detection with threshold crossing.
-    Expand window day-by-day, check last timestep's detected_prob.
-    When N consecutive days have prob > threshold, harvest detected.
+    Phase 1: Growing window detection with DOY season reset.
+    
+    For each detected harvest, reset DOY counter for the next season.
+    This allows the model to detect multiple consecutive harvests in multi-year data.
+    
+    Algorithm:
+    1. Start with season_anchor_day = 0 (DOY 1 at day 0)
+    2. Expand window: [0:1], [0:2], [0:3], ... until threshold crossed
+    3. When harvest detected: record date, set new season_anchor = day after harvest
+    4. Continue from next season start
    
    Args:
-        threshold (float): Probability threshold (default 0.45, tuned for Model 307)
-        consecutive_days (int): Required consecutive days above threshold (default 2, reduced from 3 for robustness)
+        threshold (float): Probability threshold (default 0.45)
+        consecutive_days (int): Required consecutive days above threshold (default 2)
    
    Returns list of (harvest_date, harvest_idx) tuples.
    """
    harvest_dates = []
+    season_anchor_day = 0  # DOY 1 starts at day 0
    current_pos = 0
    
    while current_pos < len(field_data):
        consecutive_above_threshold = 0
+        min_window_size = 120  # Need at least 120 days (~4 months) for patterns to establish
        
        for window_end in range(current_pos + 1, len(field_data) + 1):
            window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True)
            
+            # Skip if window is too small (model needs long sequences for pattern learning)
+            if len(window_data) < min_window_size:
+                continue
+            
            try:
-                features = extract_features(window_data, config['features'], ci_column=ci_column)
+                # CRITICAL: Pass season_anchor_day so DOY resets after harvest
+                features = extract_features(
+                    window_data, 
+                    config['features'], 
+                    ci_column=ci_column,
+                    season_anchor_day=season_anchor_day,
+                    lookback_start=current_pos
+                )
                
                # Apply scalers
+                features_scaled = features.copy().astype(float)
                for fi, scaler in enumerate(scalers):
                    try:
-                        features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+                        features_scaled[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
                    except Exception:
                        pass
                
-                # Run model
+                # Run model on expanding window
                with torch.no_grad():
-                    x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
+                    x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device)
                    imminent_probs, detected_probs = model(x_tensor)
                
-                # Check LAST timestep
-                last_prob = detected_probs[-1]
+                # Check LAST timestep only
+                last_prob = detected_probs[0, -1].item()
                
                if last_prob > threshold:
                    consecutive_above_threshold += 1
@ -318,16 +368,21 @@ def run_phase1_growing_window(field_data, model, config, scalers, ci_column, dev
                
                # Harvest detected: N consecutive days above threshold
                if consecutive_above_threshold >= consecutive_days:
-                    harvest_date = field_data.iloc[current_pos + window_end - consecutive_days]['Date']
-                    harvest_dates.append((harvest_date, current_pos + window_end - consecutive_days))
+                    harvest_idx = current_pos + window_end - consecutive_days
+                    harvest_date = field_data.iloc[harvest_idx]['Date']
+                    harvest_dates.append((harvest_date, harvest_idx))
                    
-                    # Reset to next day after harvest
-                    current_pos = current_pos + window_end - consecutive_days + 1
+                    # CRITICAL: Reset season anchor for next season
+                    # DOY 1 starts at day after harvest
+                    season_anchor_day = harvest_idx + 1
+                    current_pos = harvest_idx + 1
                    break
                    
-            except Exception:
+            except Exception as e:
+                # Skip window on error
                continue
        else:
+            # No more harvests found
            break
    
    return harvest_dates
@ -413,7 +468,9 @@ def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=Non
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    results = []
-    ci_column = config['data']['ci_column']
+    # CI column is 'FitData' (interpolated CI) - NOT 'value' (raw with NAs)
+    # 'FitData' is already gap-filled by R stage 03, ready for ML
+    ci_column = 'FitData'
    
    # Group by field and count total fields for progress
    field_groups = list(df.groupby('field'))
--- a/python_app/harvest_detection_experiments/tests/test_doy_logic.py
+++ b/python_app/harvest_detection_experiments/tests/test_doy_logic.py
@ -0,0 +1,65 @@
+"""
+Test DOY reset logic for harvest detection.
+Verify that DOY resets to 1, 2, 3, ... after harvest is detected.
+"""
+
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path.cwd()))
+
+import pandas as pd
+import numpy as np
+from harvest_date_pred_utils import extract_features, load_model_and_config
+import torch
+
+# Load sample data
+ci_data = pd.read_csv('../laravel_app/storage/app/angata/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv')
+ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+
+# Get field 779 data
+field_779 = ci_data[ci_data['field'] == '779'].reset_index(drop=True)
+print(f"Field 779: {len(field_779)} days of data")
+print(f"Date range: {field_779['Date'].min().date()} to {field_779['Date'].max().date()}\n")
+
+# Load model config
+model, config, scalers = load_model_and_config(Path.cwd())
+
+# Test 1: First season (season_anchor_day = 0)
+print("=" * 80)
+print("TEST 1: First season (season_anchor_day=0, lookback_start=0)")
+print("=" * 80)
+window = field_779.iloc[0:20].copy().reset_index(drop=True)
+features = extract_features(window, config['features'], ci_column='FitData', 
+                           season_anchor_day=0, lookback_start=0)
+
+# Extract DOY_normalized column (index 13 or find it)
+feature_names = config['features']
+doy_idx = feature_names.index('DOY_normalized') if 'DOY_normalized' in feature_names else -1
+if doy_idx >= 0:
+    doy_values = (features[:, doy_idx] * 450).astype(int)  # Denormalize
+    print(f"Window size: {len(window)} days")
+    print(f"DOY values: {doy_values[:10]}")
+    print(f"Expected: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]")
+    assert list(doy_values[:10]) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "DOY not incrementing correctly!"
+    print("✓ DOY incrementing correctly for first season\n")
+
+# Test 2: After harvest detected at day 100, next season starts
+print("=" * 80)
+print("TEST 2: After harvest at day 100, new season starts (season_anchor_day=101, lookback_start=101)")
+print("=" * 80)
+harvest_day = 100
+window = field_779.iloc[harvest_day:harvest_day+20].copy().reset_index(drop=True)
+features = extract_features(window, config['features'], ci_column='FitData',
+                           season_anchor_day=harvest_day+1, lookback_start=harvest_day+1)
+
+if doy_idx >= 0:
+    doy_values = (features[:, doy_idx] * 450).astype(int)  # Denormalize
+    print(f"Window size: {len(window)} days (starting at day {harvest_day})")
+    print(f"DOY values: {doy_values[:10]}")
+    print(f"Expected: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] (fresh season)")
+    assert list(doy_values[:10]) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "DOY not reset after harvest!"
+    print("✓ DOY reset correctly for new season\n")
+
+print("=" * 80)
+print("ALL TESTS PASSED! DOY logic is correct.")
+print("=" * 80)
--- a/python_app/harvest_detection_experiments/tests/test_feature_extraction.py
+++ b/python_app/harvest_detection_experiments/tests/test_feature_extraction.py
@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""
+Quick test: Verify feature extraction works
+"""
+
+import sys
+import pandas as pd
+import numpy as np
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from harvest_date_pred_utils import extract_features, load_model_and_config
+
+project_name = "angata"
+base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv"
+
+print("="*80)
+print("DEBUG: Feature Extraction Test")
+print("="*80)
+
+# Load model config
+print("\n[1] Loading model config...")
+model, config, scalers = load_model_and_config(Path("."))
+print(f"  Config features: {config['features']}")
+print(f"  Number of features: {len(config['features'])}")
+
+# Load CI data
+print("\n[2] Loading CI data...")
+ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
+ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+print(f"  Columns: {ci_data.columns.tolist()}")
+print(f"  Total rows: {len(ci_data)}")
+
+# Test on a single field
+test_field = "1"
+field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True)
+print(f"\n[3] Testing on field {test_field}...")
+print(f"  Data points: {len(field_data)}")
+print(f"  Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}")
+print(f"  Columns in field data: {field_data.columns.tolist()}")
+print(f"  Sample values:")
+print(field_data[['Date', 'value']].head())
+
+# Test feature extraction on first 50 days
+print(f"\n[4] Extracting features for first 50 days...")
+try:
+    subset = field_data.iloc[:50].copy()
+    features = extract_features(subset, config['features'], ci_column='value')
+    print(f"  ✓ Success!")
+    print(f"  Feature shape: {features.shape}")
+    print(f"  Expected shape: (50, {len(config['features'])})")
+    print(f"  Feature values sample (first 5 days):")
+    for i in range(min(5, features.shape[0])):
+        print(f"    Day {i}: {features[i]}")
+except Exception as e:
+    print(f"  ✗ Error: {e}")
+    import traceback
+    traceback.print_exc()
+
+print("\n[5] Testing on growing windows...")
+try:
+    for window_size in [10, 20, 30, 50]:
+        window_data = field_data.iloc[:window_size].copy()
+        features = extract_features(window_data, config['features'], ci_column='value')
+        print(f"  Window size {window_size}: shape={features.shape}, min={features.min():.4f}, max={features.max():.4f}")
+except Exception as e:
+    print(f"  ✗ Error: {e}")
+    import traceback
+    traceback.print_exc()
+
+print("\n✓ Feature extraction test complete")
--- a/python_app/harvest_detection_experiments/tests/test_growing_window_only.py
+++ b/python_app/harvest_detection_experiments/tests/test_growing_window_only.py
@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""
+Test ONLY the growing window method (what production actually uses)
+Never run model on full sequence - only on expanding windows
+
+This matches real deployment where data arrives daily
+"""
+
+import sys
+import pandas as pd
+import numpy as np
+import torch
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from harvest_date_pred_utils import (
+    load_model_and_config,
+    extract_features,
+)
+
+project_name = "angata"
+
+# Find root by walking up until we find laravel_app
+script_dir = Path(__file__).parent
+root = script_dir
+while root != root.parent:  # Stop at filesystem root
+    if (root / "laravel_app").exists():
+        break
+    root = root.parent
+
+base_storage = root / "laravel_app" / "storage" / "app" / project_name / "Data"
+CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv"
+MODEL_DIR = root / "python_app"
+
+print("="*80)
+print("GROWING WINDOW METHOD ONLY (Real Production Simulation)")
+print("="*80)
+
+# Load model
+print("\n[1] Loading model...")
+model, config, scalers = load_model_and_config(MODEL_DIR)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Load CI data
+print("\n[2] Loading CI data...")
+ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
+ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+
+# Test on field 779
+test_field = "779"
+field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True)
+
+print(f"\n[3] Field {test_field}: {len(field_data)} data points")
+print(f"    Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}")
+
+# Simulate growing window (real production)
+print(f"\n[4] Simulating growing window (expanding daily)...")
+
+harvest_dates = []
+current_pos = 0
+consecutive_above = 0
+threshold = 0.3
+consecutive_days = 2
+model_runs = 0
+
+while current_pos < len(field_data):
+    consecutive_above = 0
+    found_harvest = False
+    
+    for window_end in range(current_pos + 1, len(field_data) + 1):
+        # Expand window: current_pos to window_end
+        window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True)
+        
+        try:
+            # Extract features for THIS window only
+            features = extract_features(window_data, config['features'], ci_column='value')
+            
+            # Normalize
+            features_scaled = features.copy().astype(float)
+            for fi, scaler in enumerate(scalers):
+                features_scaled[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+            
+            # Run model on expanding window
+            with torch.no_grad():
+                x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device)
+                imminent_probs, detected_probs = model(x_tensor)
+            
+            model_runs += 1
+            
+            # Check LAST timestep only
+            last_prob = detected_probs[0, -1].item()
+            last_date = window_data.iloc[-1]['Date'].date()
+            
+            # Print every 50th window to track progress
+            if window_end % 50 == 0 or window_end < 10:
+                print(f"    Window [{current_pos:3d}:{window_end:3d}] ({last_date}): prob={last_prob:.4f}", end="")
+                if last_prob > threshold:
+                    print(" ✓ ABOVE", end="")
+                print()
+            
+            # Check threshold
+            if last_prob > threshold:
+                consecutive_above += 1
+            else:
+                consecutive_above = 0
+            
+            # Harvest detected
+            if consecutive_above >= consecutive_days:
+                harvest_idx = current_pos + window_end - consecutive_days
+                harvest_date = field_data.iloc[harvest_idx]['Date']
+                harvest_dates.append((harvest_date, harvest_idx, last_prob))
+                print(f"\n    ✓ HARVEST DETECTED at {harvest_date.date()} (index {harvest_idx})")
+                print(f"      {consecutive_days} consecutive days above {threshold}")
+                
+                # Jump past this harvest
+                current_pos = harvest_idx + 1
+                found_harvest = True
+                break
+        
+        except Exception as e:
+            print(f"    ERROR at window [{current_pos}:{window_end}]: {e}")
+            continue
+    
+    if not found_harvest:
+        break
+
+print(f"\n[5] Results:")
+print(f"    Total model runs: {model_runs}")
+print(f"    Harvests found: {len(harvest_dates)}")
+
+if harvest_dates:
+    print(f"\n    Harvest dates:")
+    for date, idx, prob in harvest_dates:
+        print(f"      {date.date()}: index {idx}, last_prob={prob:.4f}")
+else:
+    print(f"\n    No harvests detected")
+
+print(f"\n[6] Analysis:")
+print(f"    Model runs per day: {model_runs / len(field_data):.2f}x")
+print(f"    Expected: ~{len(field_data):.0f} runs (one per day)")
--- a/python_app/harvest_detection_experiments/tests/test_model_inference.py
+++ b/python_app/harvest_detection_experiments/tests/test_model_inference.py
@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+Complete test: Feature extraction + Model inference + Phase 1 detection
+"""
+
+import sys
+import pandas as pd
+import numpy as np
+import torch
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from harvest_date_pred_utils import (
+    load_model_and_config, 
+    extract_features, 
+    run_phase1_growing_window
+)
+
+project_name = "angata"
+base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv"
+
+print("="*80)
+print("DEBUG: Model Inference + Phase 1 Detection")
+print("="*80)
+
+# Load model
+print("\n[1] Loading model...")
+model, config, scalers = load_model_and_config(Path("."))
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"  Device: {device}")
+print(f"  Scalers type: {type(scalers)}")
+print(f"  Number of scalers: {len(scalers) if isinstance(scalers, list) else 'N/A (dict/object)'}")
+
+# Load CI data
+print("\n[2] Loading CI data...")
+ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
+ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+
+# Test on a known field (field 1)
+test_field = "1"
+field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True)
+print(f"\n[3] Testing on field {test_field}...")
+print(f"  Data points: {len(field_data)}")
+
+# Test with first 100 days
+subset_100 = field_data.iloc[:100].copy().reset_index(drop=True)
+print(f"\n[4] Testing model inference on first 100 days...")
+
+try:
+    features = extract_features(subset_100, config['features'], ci_column='value')
+    print(f"  Features shape: {features.shape}")
+    print(f"  Features dtype: {features.dtype}")
+    
+    # Apply scalers
+    features_scaled = features.copy().astype(float)
+    print(f"  Applying {len(scalers)} scalers...")
+    
+    for fi, scaler in enumerate(scalers):
+        try:
+            col_data = features[:, fi].reshape(-1, 1)
+            scaled_col = scaler.transform(col_data)
+            features_scaled[:, fi] = scaled_col.flatten()
+            if fi < 3:  # Show first 3 scalers
+                print(f"    Scaler {fi}: transformed {features[0, fi]:.4f} → {features_scaled[0, fi]:.4f}")
+        except Exception as e:
+            print(f"    ERROR in scaler {fi}: {e}")
+            raise
+    
+    # Run model
+    print(f"\n  Running model inference...")
+    x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device)
+    print(f"  Tensor shape: {x_tensor.shape}")
+    
+    with torch.no_grad():
+        imminent_probs, detected_probs = model(x_tensor)
+    
+    print(f"  Imminent probs shape: {imminent_probs.shape}")
+    print(f"  Detected probs shape: {detected_probs.shape}")
+    print(f"  Detected probs dtype: {detected_probs.dtype}")
+    
+    # Analyze detected probs
+    detected_np = detected_probs[0].cpu().numpy()  # Get first (only) batch
+    print(f"\n  Detected head analysis:")
+    print(f"    Min: {detected_np.min():.4f}")
+    print(f"    Max: {detected_np.max():.4f}")
+    print(f"    Mean: {detected_np.mean():.4f}")
+    print(f"    Median: {np.median(detected_np):.4f}")
+    print(f"    > 0.1: {(detected_np > 0.1).sum()} days")
+    print(f"    > 0.3: {(detected_np > 0.3).sum()} days")
+    print(f"    > 0.5: {(detected_np > 0.5).sum()} days")
+    
+    # Show top 5 peaks
+    top_indices = np.argsort(detected_np)[-5:][::-1]
+    print(f"\n  Top 5 detected peaks:")
+    for idx in top_indices:
+        date = subset_100.iloc[idx]['Date'].date()
+        prob = detected_np[idx]
+        print(f"    Day {idx} ({date}): {prob:.4f}")
+    
+except Exception as e:
+    print(f"  ERROR: {e}")
+    import traceback
+    traceback.print_exc()
+    sys.exit(1)
+
+# Test Phase 1 growing window
+print(f"\n[5] Testing Phase 1 growing window (threshold=0.3, consecutive=2)...")
+try:
+    phase1_results = run_phase1_growing_window(
+        subset_100, model, config, scalers, 'value', device,
+        threshold=0.3, consecutive_days=2
+    )
+    print(f"  ✓ Phase 1 found {len(phase1_results)} harvest(s):")
+    for harvest_date, harvest_idx in phase1_results:
+        print(f"    {harvest_date.date()}: index {harvest_idx}")
+except Exception as e:
+    print(f"  ERROR: {e}")
+    import traceback
+    traceback.print_exc()
+
+print("\n✓ Model inference test complete")
--- a/python_app/harvest_detection_experiments/tests/test_script22_debug.py
+++ b/python_app/harvest_detection_experiments/tests/test_script22_debug.py
@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""
+Debug script: Test if script 22 logic is working
+Tests the two-step refinement on a single known field
+"""
+
+import sys
+import time
+import pandas as pd
+import numpy as np
+import torch
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from harvest_date_pred_utils import (
+    load_model_and_config,
+    extract_features,
+    run_phase1_growing_window,
+)
+
+project_name = "angata"
+
+# Find the workspace root by looking for laravel_app folder
+script_dir = Path(__file__).parent
+root = script_dir
+while root != root.parent:
+    if (root / "laravel_app").exists():
+        break
+    root = root.parent
+
+base_storage = root / "laravel_app" / "storage" / "app" / project_name / "Data"
+CI_DATA_FILE = base_storage / "extracted_ci" / "ci_data_for_python" / "ci_data_for_python.csv"
+MODEL_DIR = root / "python_app"
+
+print("="*80)
+print("DEBUG: Script 22 Two-Step Refinement Logic")
+print("="*80)
+
+# Load model
+print("\n[1] Loading model...")
+model, config, scalers = load_model_and_config(MODEL_DIR)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"  Device: {device}")
+print(f"  Model features: {config['features']}")
+
+# Load CI data
+print("\n[2] Loading CI data...")
+ci_data = pd.read_csv(CI_DATA_FILE, dtype={'field': str})
+ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+print(f"  Total rows: {len(ci_data)}")
+print(f"  Fields: {ci_data['field'].nunique()}")
+print(f"  Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}")
+
+# Test on a known field (field 779 from our previous tests)
+test_field = "779"
+field_data = ci_data[ci_data['field'] == test_field].sort_values('Date').reset_index(drop=True)
+
+print(f"\n[3] Testing on field {test_field}...")
+print(f"  Data points: {len(field_data)}")
+print(f"  Date range: {field_data['Date'].min().date()} to {field_data['Date'].max().date()}")
+
+if len(field_data) == 0:
+    print(f"  ERROR: No data for field {test_field}")
+    sys.exit(1)
+
+# Extract features
+print(f"\n[4] Extracting features for field {test_field}...")
+try:
+    features = extract_features(field_data.reset_index(drop=True), config['features'], ci_column='value')
+    print(f"  Features shape: {features.shape}")
+    print(f"  Features dtype: {features.dtype}")
+except Exception as e:
+    print(f"  ERROR: Could not extract features: {e}")
+    sys.exit(1)
+
+# Normalize and run model
+print(f"\n[5] Running Phase 1 GROWING WINDOW method (threshold=0.5, consecutive=3)...")
+print(f"    This simulates real production: expanding windows, checking each day")
+print(f"    Expected: ~477 model runs for 477 days (SLOW)")
+
+import time
+start_time = time.time()
+
+# Add instrumentation to see how many model runs are happening
+original_run = run_phase1_growing_window
+
+def instrumented_run(*args, **kwargs):
+    import sys
+    from harvest_date_pred_utils import extract_features
+    
+    field_data = args[0]
+    model = args[1]
+    config = args[2]
+    scalers = args[3]
+    ci_column = args[4]
+    device = args[5]
+    threshold = kwargs.get('threshold', 0.3)
+    consecutive_days = kwargs.get('consecutive_days', 2)
+    
+    harvest_dates = []
+    current_pos = 0
+    model_runs = 0
+    
+    print(f"    Starting growing window loop...")
+    
+    while current_pos < len(field_data):
+        consecutive_above_threshold = 0
+        loop_start = current_pos
+        
+        for window_end in range(current_pos + 1, len(field_data) + 1):
+            window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True)
+            
+            try:
+                features = extract_features(window_data, config['features'], ci_column=ci_column)
+                
+                features_scaled = features.copy().astype(float)
+                for fi, scaler in enumerate(scalers):
+                    try:
+                        features_scaled[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+                    except Exception as e:
+                        raise ValueError(f"Scaler {fi} failed: {e}")
+                
+                import torch
+                with torch.no_grad():
+                    x_tensor = torch.tensor(features_scaled, dtype=torch.float32).unsqueeze(0).to(device)
+                    imminent_probs, detected_probs = model(x_tensor)
+                
+                model_runs += 1
+                last_prob = detected_probs[0, -1].item()
+                
+                if last_prob > threshold:
+                    consecutive_above_threshold += 1
+                else:
+                    consecutive_above_threshold = 0
+                
+                if consecutive_above_threshold >= consecutive_days:
+                    harvest_date = field_data.iloc[current_pos + window_end - consecutive_days]['Date']
+                    harvest_dates.append((harvest_date, current_pos + window_end - consecutive_days))
+                    current_pos = current_pos + window_end - consecutive_days + 1
+                    break
+                    
+            except Exception as e:
+                pass
+        else:
+            break
+    
+    print(f"    Model runs performed: {model_runs}")
+    return harvest_dates
+
+phase1_results = instrumented_run(
+    field_data.reset_index(drop=True),
+    model, config, scalers, 'value', device,
+    threshold=0.5,
+    consecutive_days=3
+)
+
+elapsed = time.time() - start_time
+print(f"\n    Time elapsed: {elapsed:.2f}s")
+
+if phase1_results:
+    print(f"  ✓ Phase 1 detected {len(phase1_results)} harvest(s):")
+    
+    # Get probabilities for display by running model once on full field
+    with torch.no_grad():
+        X = features.reshape(1, -1, len(config['features']))
+        X_normalized = np.zeros_like(X)
+        for fi, scaler in enumerate(scalers):
+            X_normalized[0, :, fi] = scaler.transform(X[0, :, fi].reshape(-1, 1)).flatten()
+        X_tensor = torch.from_numpy(X_normalized).float().to(device)
+        _, detected_probs = model(X_tensor)
+    detected_np = detected_probs[0].cpu().numpy()
+    
+    for harvest_date, harvest_idx in phase1_results:
+        prob = detected_np[harvest_idx] if harvest_idx < len(detected_np) else 0.0
+        print(f"    {harvest_date.date()}: index {harvest_idx}, probability={prob:.4f}")
+else:
+    print(f"  ✗ Phase 1: No harvest detected")
--- a/r_app/40_mosaic_creation.R
+++ b/r_app/40_mosaic_creation.R
@ -14,8 +14,8 @@
 #   - tile_size: Tile size in km (default: 5, only used if use_tiles=TRUE)
 #
 # Examples:
-#   Rscript 04_mosaic_creation.R 2025-12-21 7 angata
-#   Rscript 04_mosaic_creation.R 2025-12-21 7 angata week_51.tif TRUE 5  [tile-based]
+
+#   & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/40_mosaic_creation.R 2026-01-12 7 angata
 #

 # 1. Load required packages
@ -142,15 +142,35 @@ main <- function() {
  if (use_tile_mosaic) {
    # TILE-BASED APPROACH: Create per-tile weekly MAX mosaics
    # This is used for projects like Angata with large ROIs requiring spatial partitioning
+    # Input data comes from merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif (5-band tiles from script 20)
    tryCatch({
      safe_log("Starting per-tile mosaic creation (tile-based approach)...")
      
-      # Set output directory for per-tile mosaics
-      tile_output_base <- file.path(laravel_storage, "weekly_tile_max")
+      # Detect grid size from merged_final_tif folder structure
+      # Expected: merged_final_tif/5x5/ or merged_final_tif/10x10/ etc.
+      merged_final_base <- file.path(laravel_storage, "merged_final_tif")
+      grid_subfolders <- list.dirs(merged_final_base, full.names = FALSE, recursive = FALSE)
+      # Look for grid size patterns like "5x5", "10x10", "20x20"
+      grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
+      
+      if (length(grid_patterns) == 0) {
+        stop("No grid size subfolder found in merged_final_tif/ (expected: 5x5, 10x10, etc.)")
+      }
+      
+      grid_size <- grid_patterns[1]  # Use first grid size found
+      safe_log(paste("Detected grid size:", grid_size))
+      
+      # Point to the grid-specific merged_final_tif directory
+      merged_final_with_grid <- file.path(merged_final_base, grid_size)
+      
+      # Set output directory for per-tile mosaics, organized by grid size
+      # Output: weekly_tile_max/{grid_size}/week_WW_YYYY_TT.tif
+      tile_output_base <- file.path(laravel_storage, "weekly_tile_max", grid_size)
+      dir.create(tile_output_base, recursive = TRUE, showWarnings = FALSE)
      
      created_tile_files <- create_weekly_mosaic_from_tiles(
        dates = dates,
-        merged_final_dir = merged_final,
+        merged_final_dir = merged_final_with_grid,
        tile_output_dir = tile_output_base,
        field_boundaries = field_boundaries
      )
--- a/r_app/40_mosaic_creation_utils.R
+++ b/r_app/40_mosaic_creation_utils.R
@ -354,18 +354,17 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU
    }
    
    # Get filenames of best-coverage images
-    # Match by extracting tile ID from both cloud stats and TIF filenames
+    # Match by full filename from cloud stats to TIF files
    rasters_to_use <- character()
    for (idx in best_coverage) {
-      # Extract tile ID from cloud_coverage_stats filename (e.g., "tile_18" → 18)
+      # Get the full filename from cloud coverage stats
      cc_filename <- cloud_coverage_stats$filename[idx]
-      cc_tile_id <- gsub(".*_([0-9]+).*", "\\1", cc_filename)
      
-      # Find matching TIF file by matching tile ID
+      # Find matching TIF file by full filename
      matching_tif <- NULL
      for (tif_file in tif_files) {
-        tif_tile_id <- gsub(".*_([0-9]+)\\.tif", "\\1", basename(tif_file))
-        if (tif_tile_id == cc_tile_id) {
+        tif_basename <- basename(tif_file)
+        if (tif_basename == cc_filename) {
          matching_tif <- tif_file
          break
        }
@ -373,6 +372,8 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU
      
      if (!is.null(matching_tif)) {
        rasters_to_use <- c(rasters_to_use, matching_tif)
+      } else {
+        safe_log(paste("Warning: Could not find TIF file matching cloud stats entry:", cc_filename), "WARNING")
      }
    }
    
@ -420,42 +421,60 @@ create_mosaic <- function(tif_files, cloud_coverage_stats, field_boundaries = NU
      mosaic <- tryCatch({
        safe_log(paste("Creating max composite from", length(all_rasters), "images to fill clouds"))
        
-        # Get extent from field boundaries if available, otherwise use raster intersection
-        if (!is.null(field_boundaries)) {
-          crop_extent <- terra::ext(field_boundaries)
-          safe_log("Using field boundaries extent for consistent area across all dates")
+        # Check if all rasters have identical grids (extent and resolution)
+        # This is likely for per-tile mosaics from the same tiling scheme
+        reference_raster <- all_rasters[[1]]
+        ref_ext <- terra::ext(reference_raster)
+        ref_res <- terra::res(reference_raster)
+        
+        grids_match <- all(sapply(all_rasters[-1], function(r) {
+          isTRUE(all.equal(terra::ext(r), ref_ext, tolerance = 1e-6)) && 
+          isTRUE(all.equal(terra::res(r), ref_res, tolerance = 1e-6))
+        }))
+        
+        if (grids_match) {
+          # All rasters have matching grids - no cropping/resampling needed!
+          safe_log("All rasters have identical grids - stacking directly for max composite")
+          raster_collection <- terra::sprc(all_rasters)
+          max_mosaic <- terra::mosaic(raster_collection, fun = "max")
        } else {
-          # Fallback: use intersection of all raster extents
-          crop_extent <- terra::ext(all_rasters[[1]])
-          for (i in 2:length(all_rasters)) {
-            crop_extent <- terra::intersect(crop_extent, terra::ext(all_rasters[[i]]))
+          # Grids don't match - need to crop and resample
+          safe_log("Rasters have different grids - cropping and resampling to common extent")
+          
+          # Get extent from field boundaries if available, otherwise use raster union
+          if (!is.null(field_boundaries)) {
+            crop_extent <- terra::ext(field_boundaries)
+            safe_log("Using field boundaries extent for consistent area across all dates")
+          } else {
+            # Use union of all extents (covers all data)
+            crop_extent <- terra::ext(all_rasters[[1]])
+            for (i in 2:length(all_rasters)) {
+              crop_extent <- terra::union(crop_extent, terra::ext(all_rasters[[i]]))
+            }
+            safe_log("Using raster union extent")
          }
-          safe_log("Using raster intersection extent")
+          
+          # Crop all rasters to common extent
+          cropped_rasters <- lapply(all_rasters, function(r) {
+            terra::crop(r, crop_extent)
+          })
+          
+          # Resample all cropped rasters to match the first one's grid
+          reference_grid <- cropped_rasters[[1]]
+          
+          aligned_rasters <- lapply(cropped_rasters, function(r) {
+            if (isTRUE(all.equal(terra::ext(r), terra::ext(reference_grid), tolerance = 1e-6)) && 
+                isTRUE(all.equal(terra::res(r), terra::res(reference_grid), tolerance = 1e-6))) {
+              return(r)  # Already aligned
+            }
+            terra::resample(r, reference_grid, method = "near")
+          })
+          
+          # Create max composite using mosaic on aligned rasters
+          raster_collection <- terra::sprc(aligned_rasters)
+          max_mosaic <- terra::mosaic(raster_collection, fun = "max")
        }
        
-        # Crop all rasters to common extent
-        cropped_rasters <- lapply(all_rasters, function(r) {
-          terra::crop(r, crop_extent)
-        })
-        
-        # Resample all cropped rasters to match the first one's grid
-        # This handles pixel grid misalignment from Python's dynamic extent adjustment
-        reference_grid <- cropped_rasters[[1]]
-        safe_log("Resampling rasters to common grid for stacking")
-        
-        aligned_rasters <- lapply(cropped_rasters, function(r) {
-          if (identical(terra::ext(r), terra::ext(reference_grid)) && 
-              identical(terra::res(r), terra::res(reference_grid))) {
-            return(r)  # Already aligned
-          }
-          terra::resample(r, reference_grid, method = "near")
-        })
-        
-        # Create max composite using mosaic on aligned rasters
-        # Resample ensures all rasters have matching grids (no resolution mismatch)
-        raster_collection <- terra::sprc(aligned_rasters)
-        max_mosaic <- terra::mosaic(raster_collection, fun = "max")
-        
        max_mosaic
      }, error = function(e) {
        safe_log(paste("Max composite creation failed:", e$message), "WARNING")
@ -686,6 +705,16 @@ create_weekly_mosaic_from_tiles <- function(dates, merged_final_dir, tile_output
      next
    }
    
+    # DEBUG: Check mosaic content before saving
+    safe_log(paste("  DEBUG: Mosaic tile", tile_id, "dimensions:", nrow(tile_mosaic), "x", ncol(tile_mosaic)))
+    safe_log(paste("  DEBUG: Mosaic tile", tile_id, "bands:", terra::nlyr(tile_mosaic)))
+    
+    # Check first band values
+    band1 <- tile_mosaic[[1]]
+    band1_min <- terra::global(band1, fun = "min", na.rm = TRUE)$min
+    band1_max <- terra::global(band1, fun = "max", na.rm = TRUE)$max
+    safe_log(paste("  DEBUG: Band 1 MIN=", round(band1_min, 2), "MAX=", round(band1_max, 2)))
+    
    # Step 2c: Save this tile's weekly MAX mosaic
    # Filename format: week_WW_YYYY_TT.tif (e.g., week_02_2026_01.tif for week 2, 2026, tile 1)
    tile_filename <- paste0("week_", sprintf("%02d", dates$week), "_", dates$year, "_", 
@ -763,7 +792,7 @@ count_cloud_coverage_for_tile <- function(tile_files, field_boundaries = NULL) {
      missing_pct <- round(100 - ((total_notna / total_pixels) * 100))
      
      aggregated_results[[idx]] <- data.frame(
-        filename = paste0("tile_", sprintf("%02d", as.integer(gsub(".*_([0-9]+)\\.tif", "\\1", basename(tile_file))))),
+        filename = basename(tile_file),  # Keep full filename: 2026-01-07_03.tif
        notNA = total_notna,
        total_pixels = total_pixels,
        missing_pixels_percentage = missing_pct,
--- a/r_app/80_calculate_kpis.R
+++ b/r_app/80_calculate_kpis.R
@ -20,6 +20,8 @@
 #   Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
 #     Arguments: [end_date] [project_dir] [offset_days]
 #
+#  & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7
+#
 # Usage in run_full_pipeline.R:
 #   source("r_app/80_calculate_kpis.R")
 #   main()
@ -122,7 +124,7 @@ STATUS_TRIGGERS <- data.frame(
 # TILE-AWARE HELPER FUNCTIONS
 # ============================================================================

-get_tile_ids_for_field <- function(field_geom, tile_grid) {
+get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) {
  if (inherits(field_geom, "sf")) {
    field_bbox <- sf::st_bbox(field_geom)
    field_xmin <- field_bbox["xmin"]
@ -139,6 +141,17 @@ get_tile_ids_for_field <- function(field_geom, tile_grid) {
    stop("field_geom must be sf or terra::vect object")
  }
  
+  # DEBUG: Print bbox info for first field
+  if (!is.null(field_id) && field_id == "1391") {
+    message(paste("[DEBUG get_tile_ids] Field bbox - xmin:", field_xmin, "xmax:", field_xmax, 
+                  "ymin:", field_ymin, "ymax:", field_ymax))
+    message(paste("[DEBUG get_tile_ids] tile_grid sample: id=", tile_grid$id[1], 
+                  "xmin=", tile_grid$xmin[1], "xmax=", tile_grid$xmax[1],
+                  "ymin=", tile_grid$ymin[1], "ymax=", tile_grid$ymax[1]))
+    message(paste("[DEBUG get_tile_ids] tile_grid CRS:", sf::st_crs(tile_grid)))
+    message(paste("[DEBUG get_tile_ids] field CRS:", sf::st_crs(field_geom)))
+  }
+  
  intersecting_tiles <- tile_grid$id[
    !(tile_grid$xmax < field_xmin | 
      tile_grid$xmin > field_xmax | 
@ -189,6 +202,21 @@ load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_di
 }

 build_tile_grid <- function(mosaic_dir, week_num, year) {
+  # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/)
+  # First check if mosaic_dir contains grid-size subdirectories
+  detected_grid_size <- NA
+  if (dir.exists(mosaic_dir)) {
+    subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
+    grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
+    
+    if (length(grid_patterns) > 0) {
+      # Use the first grid-size subdirectory found
+      detected_grid_size <- grid_patterns[1]
+      mosaic_dir <- file.path(mosaic_dir, detected_grid_size)
+      message(paste("  Using grid-size subdirectory:", detected_grid_size))
+    }
+  }
+  
  tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
  tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
  
@ -230,7 +258,12 @@ build_tile_grid <- function(mosaic_dir, week_num, year) {
    stop("Could not extract extents from any tile files")
  }
  
-  return(tile_grid)
+  # RETURN BOTH the grid AND the corrected mosaic directory path
+  return(list(
+    tile_grid = tile_grid,
+    mosaic_dir = mosaic_dir,
+    grid_size = detected_grid_size
+  ))
 }

 # ============================================================================
@ -399,14 +432,20 @@ load_historical_field_data <- function(project_dir, current_week, reports_dir, n
 USE_UNIFORM_AGE <- TRUE
 UNIFORM_PLANTING_DATE <- as.Date("2025-01-01")

-extract_planting_dates <- function(harvesting_data) {
+extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
  if (USE_UNIFORM_AGE) {
    message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
-    return(data.frame(
-      field_id = character(),
-      planting_date = as.Date(character()),
-      stringsAsFactors = FALSE
-    ))
+    # Return a data frame with all field IDs mapped to uniform planting date
+    if (!is.null(field_boundaries_sf)) {
+      return(data.frame(
+        field_id = field_boundaries_sf$field,
+        date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)),
+        stringsAsFactors = FALSE
+      ))
+    } else {
+      # Fallback if field_boundaries_sf not provided
+      return(NULL)
+    }
  }
  
  if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
@ -449,6 +488,11 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week
    }
    field_name <- field_id
    
+    # DEBUG: Print for first few fields
+    if (field_idx <= 3) {
+      message(paste("[DEBUG] Field", field_idx, ":", field_id))
+    }
+    
    field_sf <- field_boundaries_sf[field_idx, ]
    if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) {
      return(data.frame(
@ -460,7 +504,14 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week
    field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
    field_area_acres <- field_area_ha / 0.404686
    
-    tile_ids <- get_tile_ids_for_field(field_sf, tile_grid)
+    tile_ids <- get_tile_ids_for_field(field_sf, tile_grid, field_id = field_id)
+    
+    # DEBUG: Print tile IDs for first field
+    if (field_idx == 1) {
+      message(paste("[DEBUG] First field tile_ids:", paste(tile_ids, collapse=",")))
+      message(paste("[DEBUG] tile_grid nrows:", nrow(tile_grid), "ncols:", ncol(tile_grid)))
+      message(paste("[DEBUG] mosaic_dir:", mosaic_dir))
+    }
    
    current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir)
    
@ -471,10 +522,26 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week
      ))
    }
    
-    field_vect <- terra::vect(sf::as_Spatial(field_sf))
-    terra::crs(field_vect) <- terra::crs(current_ci)
+    # Extract CI values: EXACTLY LIKE SCRIPT 20
+    # Crop to field bounding box first, then extract with sf directly (not terra::vect conversion)
+    field_bbox <- sf::st_bbox(field_sf)
+    ci_cropped <- terra::crop(current_ci, terra::ext(field_bbox), snap = "out")
+    extracted_vals <- terra::extract(ci_cropped, field_sf, fun = "mean", na.rm = TRUE)
    
-    all_extracted <- terra::extract(current_ci, field_vect)[, 2]
+    # extracted_vals is a data.frame with ID column (field index) + mean value
+    mean_ci_current <- as.numeric(extracted_vals[1, 2])
+    
+    if (is.na(mean_ci_current)) {
+      return(data.frame(
+        Field_id = field_id,
+        error = "No CI values extracted from tiles"
+      ))
+    }
+    
+    # For per-tile extraction, we only have mean from the aggregation function
+    # To get variance/CV, we need to extract all pixels without the fun parameter
+    # But for farm-level purposes, the mean CI is sufficient
+    all_extracted <- terra::extract(ci_cropped, field_sf)[, 2]
    current_ci_vals <- all_extracted[!is.na(all_extracted)]
    
    num_total <- length(all_extracted)
@ -509,7 +576,9 @@ analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week
    tryCatch({
      previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir)
      if (!is.null(previous_ci)) {
-        prev_extracted <- terra::extract(previous_ci, field_vect)[, 2]
+        prev_bbox <- sf::st_bbox(field_sf)
+        prev_ci_cropped <- terra::crop(previous_ci, terra::ext(prev_bbox), snap = "out")
+        prev_extracted <- terra::extract(prev_ci_cropped, field_sf)[, 2]
        previous_ci_vals <- prev_extracted[!is.na(prev_extracted)]
        if (length(previous_ci_vals) > 0) {
          mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE)
@ -763,6 +832,13 @@ generate_field_analysis_summary <- function(field_df) {
 export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
  message("Exporting per-field analysis to Excel, CSV, and RDS...")
  
+  # Round all numeric columns to 2 decimals
+  field_df_rounded <- field_df %>%
+    mutate(across(where(is.numeric), ~ round(., 2)))
+  
+  summary_df_rounded <- summary_df %>%
+    mutate(across(where(is.numeric), ~ round(., 2)))
+  
  output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
  if (!dir.exists(output_subdir)) {
    dir.create(output_subdir, recursive = TRUE)
@ -773,16 +849,16 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
  excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
  
  sheets <- list(
-    "Field Data" = field_df,
-    "Summary" = summary_df
+    "Field Data" = field_df_rounded,
+    "Summary" = summary_df_rounded
  )
  
  write_xlsx(sheets, excel_path)
  message(paste("✓ Field analysis Excel exported to:", excel_path))
  
  kpi_data <- list(
-    field_analysis = field_df,
-    field_analysis_summary = summary_df,
+    field_analysis = field_df_rounded,
+    field_analysis_summary = summary_df_rounded,
    metadata = list(
      current_week = current_week,
      project = project_dir,
@ -798,84 +874,254 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
  
  csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
  csv_path <- file.path(output_subdir, csv_filename)
-  write_csv(field_df, csv_path)
+  write_csv(field_df_rounded, csv_path)
  message(paste("✓ Field analysis CSV exported to:", csv_path))
  
  return(list(excel = excel_path, rds = rds_path, csv = csv_path))
 }

 # ============================================================================
-# FARM-LEVEL KPI CALCULATION (FROM OLD 09_CALCULATE_KPIS.R)
+# TILE-BASED KPI EXTRACTION FUNCTION
 # ============================================================================

+calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) {
+  # Loop through tiles, extract KPI statistics per field per tile
+  # Follows the same pattern as extract_ci_from_tiles in CI extraction
+  
+  message("Calculating field-level KPI statistics from tiles...")
+  
+  # Get all tile files for this week
+  tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
+  tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE)
+  
+  if (length(tile_files) == 0) {
+    message("No tiles found for week", week_num, year)
+    return(NULL)
+  }
+  
+  # Process tiles in parallel using furrr (same as CI extraction)
+  message(paste("Processing", length(tile_files), "tiles in parallel..."))
+  
+  field_kpi_list <- furrr::future_map(
+    tile_files,
+    ~ process_single_kpi_tile(
+      tile_file = .,
+      field_boundaries_sf = field_boundaries_sf,
+      tile_grid = tile_grid
+    ),
+    .progress = TRUE,
+    .options = furrr::furrr_options(seed = TRUE)
+  )
+  
+  # Combine results from all tiles
+  field_kpi_stats <- dplyr::bind_rows(field_kpi_list)
+  
+  if (nrow(field_kpi_stats) == 0) {
+    message("  No KPI data extracted from tiles")
+    return(NULL)
+  }
+  
+  message(paste("  Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields"))
+  return(field_kpi_stats)
+}
+
+# Helper function to process a single tile (like process_single_tile in CI extraction)
+process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) {
+  tryCatch({
+    tile_basename <- basename(tile_file)
+    # Load tile raster
+    tile_raster <- terra::rast(tile_file)
+    
+    # Get first band (CI band for weekly mosaics)
+    ci_band <- tile_raster[[1]]
+    
+    # EXACTLY LIKE SCRIPT 20: Crop to field bounding box first, then extract with sf directly
+    field_bbox <- sf::st_bbox(field_boundaries_sf)
+    ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out")
+    
+    # Extract CI values for ALL fields at once using sf object directly (NOT terra::vect)
+    # terra::extract() works with sf objects and handles geometries properly
+    extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE)
+    
+    # Initialize results for this tile
+    tile_results <- data.frame()
+    
+    # Get tile ID from filename
+    tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename))
+    
+    # Process each field: extracted_vals is a data.frame with ID column (field indices) + extracted values
+    for (field_idx in seq_len(nrow(field_boundaries_sf))) {
+      field_id <- field_boundaries_sf$field[field_idx]
+      
+      # extracted_vals columns: 1=ID, 2=mean_CI (since we used fun="mean")
+      mean_ci <- extracted_vals[field_idx, 2]
+      
+      # Skip if no data for this field in this tile
+      if (is.na(mean_ci)) {
+        next
+      }
+      
+      # For tile-level stats, we only have mean from extraction (no variance without all pixels)
+      # Add to results
+      tile_results <- rbind(tile_results, data.frame(
+        field = field_id,
+        tile_id = tile_id_match,
+        tile_file = tile_basename,
+        mean_ci = round(mean_ci, 4),
+        stringsAsFactors = FALSE
+      ))
+    }
+    
+    return(tile_results)
+    
+  }, error = function(e) {
+    message(paste("  Warning: Error processing tile", basename(tile_file), ":", e$message))
+    return(data.frame())
+  })
+}
+
 calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf, 
                                           harvesting_data, cumulative_CI_vals_dir, 
-                                           weekly_CI_mosaic, reports_dir) {
+                                           weekly_CI_mosaic, reports_dir, current_week, year,
+                                           tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") {
  message("\n=== CALCULATING FARM-LEVEL KPIs ===")
-  message("(6 high-level KPI metrics)")
-  
-  tryCatch({
-    source(here("r_app", "kpi_utils.R"))
-  }, error = function(e) {
-    message(paste("Warning: Could not load kpi_utils.R:", e$message))
-    message("Farm-level KPIs will be skipped")
-    return(NULL)
-  })
-  
-  if (!exists("calculate_all_kpis")) {
-    message("Warning: calculate_all_kpis() function not found in kpi_utils.R")
-    return(NULL)
-  }
+  message("(6 high-level KPI metrics with tile-based extraction)")
  
  output_dir <- file.path(reports_dir, "kpis")
  if (!dir.exists(output_dir)) {
    dir.create(output_dir, recursive = TRUE)
  }
  
-  tryCatch({
-    kpi_results <- calculate_all_kpis(
-      report_date = report_date,
-      output_dir = output_dir,
-      field_boundaries_sf = field_boundaries_sf,
-      harvesting_data = harvesting_data,
-      cumulative_CI_vals_dir = cumulative_CI_vals_dir,
-      weekly_CI_mosaic = weekly_CI_mosaic,
-      reports_dir = reports_dir,
-      project_dir = project_dir
+  # Get mosaic directory with grid size if using tiles
+  mosaic_dir <- if (use_tile_mosaic && !is.null(tile_grid_size)) {
+    file.path(weekly_CI_mosaic, tile_grid_size)
+  } else {
+    weekly_CI_mosaic
+  }
+  
+  # Extract field-level KPI statistics from tiles
+  field_kpi_stats <- calculate_field_kpis_from_tiles(
+    tile_dir = mosaic_dir,
+    week_num = current_week,
+    year = year,
+    field_boundaries_sf = field_boundaries_sf,
+    tile_grid = tile_grid
+  )
+  
+  if (is.null(field_kpi_stats) || nrow(field_kpi_stats) == 0) {
+    message("Warning: No field KPI statistics extracted from tiles")
+    return(NULL)
+  }
+  
+  # Aggregate tile-based statistics by field (average across tiles for each field)
+  field_summary_stats <- field_kpi_stats %>%
+    dplyr::group_by(field) %>%
+    dplyr::summarise(
+      mean_ci = mean(mean_ci, na.rm = TRUE),
+      cv_ci = mean(cv_ci, na.rm = TRUE),
+      min_ci = min(min_ci, na.rm = TRUE),
+      max_ci = max(max_ci, na.rm = TRUE),
+      total_pixels = sum(n_pixels, na.rm = TRUE),
+      num_tiles = n_distinct(tile_id),
+      .groups = 'drop'
    )
  
-    # Print KPI summary
-    cat("\n=== FARM-LEVEL KPI SUMMARY ===\n")
-    cat("Report Date:", as.character(kpi_results$metadata$report_date), "\n")
-    cat("Current Week:", kpi_results$metadata$current_week, "\n")
-    cat("Previous Week:", kpi_results$metadata$previous_week, "\n")
-    cat("Total Fields Analyzed:", kpi_results$metadata$total_fields, "\n")
-    cat("Calculation Time:", as.character(kpi_results$metadata$calculation_time), "\n")
+  # Create results list
+  kpi_results <- list(
+    field_kpi_stats = field_kpi_stats,
+    field_summary_stats = field_summary_stats,
+    metadata = list(
+      report_date = report_date,
+      current_week = current_week,
+      year = year,
+      calculation_method = "tile_based_extraction",
+      num_fields_processed = length(unique(field_kpi_stats$field)),
+      num_tiles_processed = length(unique(field_kpi_stats$tile_id))
+    )
+  )
  
-    cat("\n--- KPI Metrics ---\n")
-    cat("Field Uniformity Summary:\n")
-    print(kpi_results$field_uniformity_summary)
+  # Save results
+  rds_filename <- paste0(project_dir, "_farm_kpi_stats_week", sprintf("%02d", current_week), ".rds")
+  rds_path <- file.path(output_dir, rds_filename)
+  saveRDS(kpi_results, rds_path)
+  message(paste("✓ Farm-level KPI stats exported to:", rds_path))
  
-    cat("\nArea Change Summary:\n")
-    print(kpi_results$area_change)
+  # Print summary
+  cat("\n=== FARM-LEVEL KPI SUMMARY ===\n")
+  cat("Report Date:", as.character(report_date), "\n")
+  cat("Week:", current_week, "Year:", year, "\n")
+  cat("Fields Processed:", length(unique(field_kpi_stats$field)), "\n")
+  cat("Tiles Processed:", length(unique(field_kpi_stats$tile_id)), "\n")
+  cat("\n--- Field Summary Statistics (Mean across tiles) ---\n")
+  print(head(field_summary_stats, 20))
  
-    cat("\nTCH Forecasted:\n")
-    print(kpi_results$tch_forecasted)
+  return(kpi_results)
+}

-    cat("\nGrowth Decline Index:\n")
-    print(kpi_results$growth_decline)
+# ============================================================================
+# HELPER: Extract field-level statistics from CI raster (all pixels, single call)
+# ============================================================================

-    cat("\nWeed Presence Score:\n")
-    print(kpi_results$weed_presence)
+extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) {
+  #' Extract CI statistics for all fields from a single CI raster band
+  #' 
+  #' This function extracts all pixel values for each field in one terra::extract call,
+  #' then calculates mean, CV, and percentiles from those pixels.
+  #' 
+  #' @param ci_band Single CI band from terra raster
+  #' @param field_boundaries_sf SF object with field geometries
+  #' @return Data frame with columns: field_idx, mean_ci, cv, p10, p90, pixel_count
  
-    cat("\nGap Filling Score:\n")
-    print(kpi_results$gap_filling)
+  # Extract all pixels for all fields at once (more efficient than individual calls)
+  all_pixels <- terra::extract(ci_band, field_boundaries_sf)
  
-    return(kpi_results)
-  }, error = function(e) {
-    message(paste("Error calculating farm-level KPIs:", e$message))
-    return(NULL)
-  })
+  # Calculate statistics for each field
+  stats_list <- list()
+  
+  for (field_idx in seq_len(nrow(field_boundaries_sf))) {
+    # Extract pixel values for this field (skip ID column 1)
+    pixels <- all_pixels[field_idx, -1, drop = TRUE]
+    pixels <- as.numeric(pixels)
+    pixels <- pixels[!is.na(pixels)]
+    
+    # Only calculate stats if we have pixels
+    if (length(pixels) > 0) {
+      mean_val <- mean(pixels, na.rm = TRUE)
+      
+      # Only calculate CV if mean > 0 (avoid division by zero)
+      if (mean_val > 0) {
+        cv_val <- sd(pixels, na.rm = TRUE) / mean_val
+      } else {
+        cv_val <- NA
+      }
+      
+      p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]]
+      p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]]
+      
+      stats_list[[field_idx]] <- data.frame(
+        field_idx = field_idx,
+        mean_ci = mean_val,
+        cv = cv_val,
+        p10 = p10_val,
+        p90 = p90_val,
+        pixel_count = length(pixels),
+        stringsAsFactors = FALSE
+      )
+    } else {
+      # No pixels for this field (doesn't intersect tile)
+      stats_list[[field_idx]] <- data.frame(
+        field_idx = field_idx,
+        mean_ci = NA_real_,
+        cv = NA_real_,
+        p10 = NA_real_,
+        p90 = NA_real_,
+        pixel_count = 0,
+        stringsAsFactors = FALSE
+      )
+    }
+  }
+  
+  return(dplyr::bind_rows(stats_list))
 }

 # ============================================================================
@ -923,7 +1169,7 @@ main <- function() {
  message("")
  
  # Load configuration and utilities
-  source(here("r_app", "crop_messaging_utils.R"))
+  # source(here("r_app", "crop_messaging_utils.R"))
  
  tryCatch({
    source(here("r_app", "parameters_project.R"))
@ -950,10 +1196,29 @@ main <- function() {
  
  message(paste("Week:", current_week, "/ Year:", year))
  
-  message("Building tile grid from available weekly tiles...")
-  tile_grid <- build_tile_grid(weekly_tile_max, current_week, year)
-  message(paste("  Found", nrow(tile_grid), "tiles"))
+  # Find tile files - approach from Script 20
+  message("Finding tile files...")
+  tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year)
  
+  # Detect grid size subdirectory
+  detected_grid_size <- NA
+  if (dir.exists(weekly_tile_max)) {
+    subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
+    grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
+    if (length(grid_patterns) > 0) {
+      detected_grid_size <- grid_patterns[1]
+      mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
+      message(paste("  Using grid-size subdirectory:", detected_grid_size))
+    }
+  }
+  
+  tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
+  if (length(tile_files) == 0) {
+    stop(paste("No tile files found for week", current_week, year, "in", mosaic_dir))
+  }
+  message(paste("  Found", length(tile_files), "tiles"))
+  
+  # Load field boundaries
  tryCatch({
    boundaries_result <- load_field_boundaries(data_dir)
    
@ -979,38 +1244,217 @@ main <- function() {
  }
  historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, num_weeks = num_weeks_to_load)
  
-  planting_dates <- extract_planting_dates(harvesting_data)
+  planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
  
-  message("Setting up parallel processing...")
-  current_plan <- class(future::plan())[1]
-  if (current_plan == "sequential") {
-    num_workers <- parallel::detectCores() - 1
-    message(paste("  Using", num_workers, "workers"))
-    future::plan(future::multisession, workers = num_workers)
+  # Validate planting_dates
+  if (is.null(planting_dates) || nrow(planting_dates) == 0) {
+    message("WARNING: No planting dates available. Using NA for all fields.")
+    planting_dates <- data.frame(
+      field_id = field_boundaries_sf$field,
+      date = rep(as.Date(NA), nrow(field_boundaries_sf)),
+      stringsAsFactors = FALSE
+    )
  }
  
-  message("Analyzing", nrow(field_boundaries_sf), "fields in parallel...")
+  # SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
+  message("\nProcessing tiles and extracting field statistics...")
+  all_tile_results <- list()
  
-  field_analysis_list <- furrr::future_map(
-    seq_len(nrow(field_boundaries_sf)),
-    ~ analyze_single_field(
-      field_idx = .,
-      field_boundaries_sf = field_boundaries_sf,
-      tile_grid = tile_grid,
-      week_num = current_week,
-      year = year,
-      mosaic_dir = weekly_tile_max,
-      historical_data = historical_data,
-      planting_dates = planting_dates,
-      report_date = end_date,
-      harvest_imminence_data = NULL,
-      harvesting_data = harvesting_data
-    ),
-    .progress = TRUE,
-    .options = furrr::furrr_options(seed = TRUE)
-  )
+  for (i in seq_along(tile_files)) {
+    tile_file <- tile_files[i]
+    message(paste("  Processing tile", i, "of", length(tile_files), ":", basename(tile_file)))
    
-  field_analysis_df <- dplyr::bind_rows(field_analysis_list)
+    tryCatch({
+      # Load current tile and previous week tile
+      current_rast <- terra::rast(tile_file)
+      
+      # DEBUG: Check tile structure on first tile
+      if (i == 1) {
+        message(paste("    [DEBUG] Tile CRS:", terra::crs(current_rast)))
+        message(paste("    [DEBUG] Tile extent:", paste(terra::ext(current_rast))))
+        message(paste("    [DEBUG] Field boundaries CRS:", sf::st_crs(field_boundaries_sf)))
+        field_bbox <- sf::st_bbox(field_boundaries_sf)
+        message(paste("    [DEBUG] Field bbox:", paste(round(field_bbox, 2))))
+        message(paste("    [DEBUG] Band names:", paste(names(current_rast), collapse=", ")))
+      }
+      
+      # Extract CI band by name
+      ci_band <- current_rast[["CI"]]
+      
+      # Check if CI band exists - use proper logical checks
+      if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
+        message(paste("    ERROR: CI band not found. Available bands:", paste(names(current_rast), collapse=", ")))
+        next
+      }
+      
+      # Check if CI band has any valid data
+      if (tryCatch(all(is.na(values(ci_band))), error = function(e) TRUE)) {
+        message(paste("    ERROR: CI band has no valid data"))
+        next
+      }
+      
+      # Load previous week tile if available
+      previous_tile_file <- sub(sprintf("week_%02d", current_week), 
+                               sprintf("week_%02d", previous_week), 
+                               tile_file)
+      previous_ci <- NULL
+      if (file.exists(previous_tile_file)) {
+        previous_rast <- terra::rast(previous_tile_file)
+        previous_ci <- previous_rast[["CI"]]
+      }
+      
+      # OPTION 1 + 2: Extract all CI statistics from one pixel extraction (single call)
+      current_stats <- extract_field_statistics_from_ci(ci_band, field_boundaries_sf)
+      
+      # DEBUG: Check extraction result on first tile
+      if (i == 1) {
+        num_with_data <- sum(!is.na(current_stats$mean_ci))
+        message(paste("    [DEBUG] Extracted", nrow(current_stats), "fields, ", num_with_data, "with non-NA data"))
+        if (num_with_data > 0) {
+          message(paste("    [DEBUG] Sample mean CIs:", paste(head(current_stats$mean_ci[!is.na(current_stats$mean_ci)], 3), collapse=", ")))
+        }
+      }
+      
+      # Extract previous week CI statistics if available
+      previous_stats <- NULL
+      if (!is.null(previous_ci)) {
+        previous_stats <- extract_field_statistics_from_ci(previous_ci, field_boundaries_sf)
+      }
+      
+      # Process each field that was extracted
+      field_results_this_tile <- list()
+      fields_added <- 0
+      
+      for (field_idx in seq_len(nrow(field_boundaries_sf))) {
+        tryCatch({
+          field_id <- field_boundaries_sf$field[field_idx]
+          field_sf <- field_boundaries_sf[field_idx, ]
+          
+          # Get statistics from helper function results
+          # current_stats should have same number of rows as field_boundaries_sf
+          if (field_idx > nrow(current_stats)) {
+            message(paste("    [ERROR] field_idx", field_idx, "> nrow(current_stats)", nrow(current_stats)))
+            next
+          }
+          
+          mean_ci_current <- current_stats$mean_ci[field_idx]
+          pixel_count <- current_stats$pixel_count[field_idx]
+          
+          # SKIP fields with no data in this tile (they don't intersect this tile)
+          if (is.na(pixel_count) || pixel_count == 0) {
+            next
+          }
+          ci_cv_current <- current_stats$cv[field_idx]
+          ci_percentile_low <- current_stats$p10[field_idx]
+          ci_percentile_high <- current_stats$p90[field_idx]
+          
+          # If field doesn't intersect this tile, mean_ci_current will be NA
+          if (is.na(mean_ci_current)) {
+            next  # Skip this field - doesn't intersect this tile
+          }
+          
+          field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
+          field_area_acres <- field_area_ha / 0.404686
+          
+          # Extract previous week CI if available
+          mean_ci_previous <- NA
+          ci_change <- NA
+          if (!is.null(previous_stats)) {
+            mean_ci_previous <- previous_stats$mean_ci[field_idx]
+            if (!is.na(mean_ci_previous)) {
+              ci_change <- mean_ci_current - mean_ci_previous
+            }
+          }
+          
+          # Reconstruct pixel values for status trigger (we need the actual pixel array)
+          # Use the percentiles and mean to create a synthetic distribution for status_trigger
+          # For now, use mean CI repeated by pixel count for testing
+          # TODO: Consider extracting pixels directly if needed for more complex triggers
+          pixel_count <- current_stats$pixel_count[field_idx]
+          ci_vals_current <- if (pixel_count > 0) {
+            rep(mean_ci_current, pixel_count)  # Simplified: use mean value repeated
+          } else {
+            numeric(0)
+          }
+          
+          # Calculate age
+          age_weeks <- if (!is.null(planting_dates) && nrow(planting_dates) > 0 && field_idx <= nrow(planting_dates)) {
+            planting_date <- planting_dates$date[field_idx]
+            if (!is.na(planting_date)) {
+              as.numeric(difftime(end_date, planting_date, units = "weeks"))
+            } else {
+              0
+            }
+          } else {
+            0
+          }
+          
+          # Get phase and status
+          phase <- get_phase_by_age(age_weeks)
+          status_trigger <- get_status_trigger(ci_vals_current, ci_change, age_weeks)
+          
+          # Cloud coverage categorization based on CI value
+          # No data = No image available
+          # CI 0.01 to 95 = Partial coverage
+          # CI >= 95 = Clear view
+          if (is.na(mean_ci_current) || mean_ci_current == 0) {
+            cloud_category <- "No image available"
+            # Set all CI metrics to NA since no valid data
+            ci_change <- NA
+            ci_cv_current <- NA
+            ci_percentile_low <- NA
+            ci_percentile_high <- NA
+          } else if (mean_ci_current >= 95) {
+            cloud_category <- "Clear view"
+          } else {
+            cloud_category <- "Partial coverage"
+          }
+          
+          # Build result row
+          result_row <- data.frame(
+            Field_id = field_id,
+            Acreage = field_area_acres,
+            Mean_CI = mean_ci_current,
+            Mean_CI_prev = mean_ci_previous,
+            CI_change = ci_change,
+            CI_CV = ci_cv_current,
+            CI_percentile_low = ci_percentile_low,
+            CI_percentile_high = ci_percentile_high,
+            Age_weeks = age_weeks,
+            Phase = phase,
+            Status_trigger = status_trigger,
+            Cloud_category = cloud_category,
+            stringsAsFactors = FALSE
+          )
+          
+          field_results_this_tile[[as.character(field_id)]] <- result_row
+          fields_added <- fields_added + 1
+          
+        }, error = function(e) {
+          # Show error for debugging
+          message(paste("      [FIELD ERROR] Field", field_idx, ":", e$message))
+        })
+      }
+      
+      if (length(field_results_this_tile) > 0) {
+        all_tile_results[[basename(tile_file)]] <- dplyr::bind_rows(field_results_this_tile)
+        message(paste("    Extracted", length(field_results_this_tile), "fields from tile (processed", fields_added, "fields total)"))
+      } else {
+        message(paste("    WARNING: No fields extracted from this tile (processed", fields_added, "fields, all either NA or errored)"))
+      }
+      
+    }, error = function(e) {
+      message(paste("  Error processing tile", basename(tile_file), ":", e$message))
+    })
+  }
+  
+  # Combine all tile results, keeping unique fields (may appear in multiple tiles)
+  if (length(all_tile_results) == 0) {
+    stop("No fields extracted from any tiles!")
+  }
+  
+  field_analysis_df <- dplyr::bind_rows(all_tile_results) %>%
+    distinct(Field_id, .keep_all = TRUE)
  
  if (nrow(field_analysis_df) == 0) {
    stop("No fields analyzed successfully!")
@ -1038,17 +1482,90 @@ main <- function() {
  cat("\n--- Summary Statistics ---\n")
  print(summary_statistics_df)
  
-  # ========== FARM-LEVEL KPI CALCULATION ==========
+  # ========== FARM-LEVEL KPI AGGREGATION ==========
+  # Aggregate the per-field analysis into farm-level summary statistics
  
-  farm_kpi_results <- calculate_and_export_farm_kpis(
-    report_date = end_date,
-    project_dir = project_dir,
-    field_boundaries_sf = field_boundaries_sf,
-    harvesting_data = harvesting_data,
-    cumulative_CI_vals_dir = cumulative_CI_vals_dir,
-    weekly_CI_mosaic = weekly_CI_mosaic,
-    reports_dir = reports_dir
-  )
+  cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
+  
+  # Filter to only fields that have actual data (non-NA CI and valid acreage)
+  field_data <- field_analysis_df %>%
+    filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
+    filter(Acreage > 0)
+  
+  if (nrow(field_data) > 0) {
+    
+    if (nrow(field_data) > 0) {
+      # Create summary statistics
+      farm_summary <- list()
+      
+      # 1. PHASE DISTRIBUTION
+      phase_dist <- field_data %>%
+        group_by(Phase) %>%
+        summarise(
+          num_fields = n(),
+          acreage = sum(Acreage, na.rm = TRUE),
+          .groups = 'drop'
+        ) %>%
+        rename(Category = Phase)
+      
+      farm_summary$phase_distribution <- phase_dist
+      
+      # 2. STATUS TRIGGER DISTRIBUTION
+      status_dist <- field_data %>%
+        group_by(Status_trigger) %>%
+        summarise(
+          num_fields = n(),
+          acreage = sum(Acreage, na.rm = TRUE),
+          .groups = 'drop'
+        ) %>%
+        rename(Category = Status_trigger)
+      
+      farm_summary$status_distribution <- status_dist
+      
+      # 3. CLOUD COVERAGE DISTRIBUTION
+      cloud_dist <- field_data %>%
+        group_by(Cloud_category) %>%
+        summarise(
+          num_fields = n(),
+          acreage = sum(Acreage, na.rm = TRUE),
+          .groups = 'drop'
+        ) %>%
+        rename(Category = Cloud_category)
+      
+      farm_summary$cloud_distribution <- cloud_dist
+      
+      # 4. OVERALL STATISTICS
+      farm_summary$overall_stats <- data.frame(
+        total_fields = nrow(field_data),
+        total_acreage = sum(field_data$Acreage, na.rm = TRUE),
+        mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
+        median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
+        mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4),
+        week = current_week,
+        year = year,
+        date = as.character(end_date)
+      )
+      
+      # Print summaries
+      cat("\n--- PHASE DISTRIBUTION ---\n")
+      print(phase_dist)
+      
+      cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
+      print(status_dist)
+      
+      cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
+      print(cloud_dist)
+      
+      cat("\n--- OVERALL FARM STATISTICS ---\n")
+      print(farm_summary$overall_stats)
+      
+      farm_kpi_results <- farm_summary
+    } else {
+      farm_kpi_results <- NULL
+    }
+  } else {
+    farm_kpi_results <- NULL
+  }
  
  # ========== FINAL SUMMARY ==========
  
@ -1063,7 +1580,7 @@ main <- function() {
  if (!is.null(farm_kpi_results)) {
    cat("\nFarm-level KPIs: CALCULATED\n")
  } else {
-    cat("\nFarm-level KPIs: SKIPPED (kpi_utils.R not available)\n")
+    cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
  }
  
  cat("\n✓ Consolidated KPI calculation complete!\n")
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@ -50,7 +50,7 @@ detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NUL
  }
  
  # PRIORITY 2: File-based detection (fallback if metadata not found)
-  # Check if merged_final_tif/ contains tile-named files
+  # Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories
  
  if (!dir.exists(merged_final_tif_dir)) {
    return(list(
@ -61,6 +61,30 @@ detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NUL
    ))
  }
  
+  # First check if there are grid-size subdirectories (5x5, 10x10, etc.)
+  # This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif
+  grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE)
+  grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
+  
+  if (length(grid_patterns) > 0) {
+    # Found grid-size subdirectories - tiles exist!
+    grid_size <- grid_patterns[1]
+    grid_dir <- file.path(merged_final_tif_dir, grid_size)
+    
+    # List sample tile files from the grid directory
+    sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3]
+    
+    return(list(
+      has_tiles = TRUE,
+      detected_tiles = sample_tiles,
+      total_files = length(sample_tiles),
+      source = "grid_subdirectory_detection",
+      grid_size = grid_size,
+      grid_path = grid_dir
+    ))
+  }
+  
+  # Fall back to checking for tile-named files directly in merged_final_tif
  # List all .tif files in merged_final_tif
  tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)