updated code to fit in workflow better

2026-01-12 16:33:23 +01:00 · 2026-01-12 16:33:23 +01:00 · cb63cf00b8
parent d1303dd218
commit cb63cf00b8
11 changed files with 2602 additions and 69 deletions
--- a/python_app/00_download_8band_pu_optimized.py
+++ b/python_app/00_download_8band_pu_optimized.py
@ -31,7 +31,7 @@ Examples:
        python download_8band_pu_optimized.py chemba  # Uses today's date
        python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
        python download_8band_pu_optimized.py angata --clear-all --resolution 5
-
+ 
 Cost Model:
        - 4-band uint16 with cloud masking: ~50% lower cost than 9-band FLOAT32
        - Reduced bbox sizes: ~10-20% lower cost due to smaller average tile size
@ -39,6 +39,18 @@ Cost Model:
        - Requests: Slightly higher (~50-60 tiles) but within 700k budget
        
        Expected result: ~75% PU savings with dynamic geometry-fitted grid
+        
+Example running it in powershell:
+        $startDate = [DateTime]::ParseExact("2025-11-01", "yyyy-MM-dd", $null)
+        $endDate = [DateTime]::ParseExact("2025-12-24", "yyyy-MM-dd", $null)
+
+        $current = $startDate
+        while ($current -le $endDate) {
+            $dateStr = $current.ToString("yyyy-MM-dd")
+            Write-Host "Downloading $dateStr..."
+            python download_8band_pu_optimized.py angata --date $dateStr
+            $current = $current.AddDays(1)
+        }
 """

 import os
--- a/python_app/01_harvest_baseline_prediction.py
+++ b/python_app/01_harvest_baseline_prediction.py
@ -0,0 +1,111 @@
+"""
+Script: 01_harvest_baseline_prediction.py
+Purpose: BASELINE PREDICTION - Run ONCE to establish harvest date baseline for all fields and seasons
+
+This script processes COMPLETE historical CI data (all available dates) and uses Model 307 
+to predict ALL harvest dates across the entire dataset. This becomes your reference baseline
+for monitoring and comparisons going forward.
+
+RUN FREQUENCY: Once during initial setup
+INPUT: ci_data_for_python.csv (complete historical CI data from 02b_convert_rds_to_csv.R)
+        Location: laravel_app/storage/app/{project}/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv
+OUTPUT: harvest_production_export.xlsx (baseline harvest predictions for all fields/seasons)
+
+Workflow:
+1. Load ci_data_for_python.csv (daily interpolated, all historical dates)
+2. Group data by field and season (Model 307 detects season boundaries internally)
+3. Run two-step harvest detection (Phase 1: fast detection, Phase 2: ±40 day refinement)
+4. Export harvest_production_export.xlsx with columns:
+   - field, sub_field, season, year, season_start_date, season_end_date, phase1_harvest_date
+
+Two-Step Detection Algorithm:
+  Phase 1 (Growing Window): Expands daily, checks when detected_prob > 0.5 for 3 consecutive days
+  Phase 2 (Refinement): Extracts ±40 day window, finds peak harvest signal with argmax
+  
+This is your GROUND TRUTH - compare all future predictions against this baseline.
+
+Usage:
+  python 01_harvest_baseline_prediction.py [project_name]
+  
+  Examples:
+    python 01_harvest_baseline_prediction.py angata
+    python 01_harvest_baseline_prediction.py esa
+    python 01_harvest_baseline_prediction.py chemba
+  
+  If no project specified, defaults to 'angata'
+"""
+
+import pandas as pd
+import numpy as np
+import torch
+import sys
+from pathlib import Path
+from harvest_date_pred_utils import (
+    load_model_and_config,
+    extract_features,
+    run_two_step_refinement,
+    build_production_harvest_table
+)
+
+
+def main():
+    # Get project name from command line or use default
+    project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
+    
+    # Construct paths
+    base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+    ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
+    CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
+    harvest_data_dir = base_storage / "HarvestData"
+    harvest_data_dir.mkdir(parents=True, exist_ok=True)  # Create if doesn't exist
+    OUTPUT_XLSX = harvest_data_dir / "harvest_production_export.xlsx"
+    MODEL_DIR = Path(".")  # Model files in python_app/
+    
+    # Check if input exists
+    if not CI_DATA_FILE.exists():
+        print(f"ERROR: {CI_DATA_FILE} not found")
+        print(f"       Expected at: {CI_DATA_FILE.resolve()}")
+        print(f"\n       Run 02b_convert_rds_to_csv.R first to generate this file:")
+        print(f"       Rscript r_app/02b_convert_ci_rds_to_csv.R {project_name}")
+        return
+
+    print("="*80)
+    print(f"HARVEST DATE PREDICTION - LSTM MODEL 307 ({project_name})")
+    print("="*80)
+    
+    # [1/4] Load model
+    print("\n[1/4] Loading Model 307...")
+    model, config, scalers = load_model_and_config(MODEL_DIR)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"  Device: {device}")
+
+    # [2/4] Load and prepare CI data
+    print("\n[2/4] Loading CI data...")
+    print(f"  From: {CI_DATA_FILE}")
+    ci_data = pd.read_csv(CI_DATA_FILE)
+    ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+    print(f"  Loaded {len(ci_data)} daily rows across {ci_data['field'].nunique()} fields")
+    print(f"  Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}")
+    
+    # [3/4] Run model predictions with two-step detection
+    print("\n[3/4] Running two-step harvest detection...")
+    refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device)
+
+    # Build and export
+    print("\nBuilding production harvest table...")
+    prod_table = build_production_harvest_table(refined_results)
+    
+    prod_table.to_excel(OUTPUT_XLSX, index=False)
+    print(f"\n✓ Exported {len(prod_table)} predictions to {OUTPUT_XLSX}")
+    print(f"\nOutput location: {OUTPUT_XLSX.resolve()}")
+    print(f"\nStorage structure:")
+    print(f"  Input:  laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
+    print(f"  Output: laravel_app/storage/app/{project_name}/Data/HarvestData/")
+    print(f"\nColumn structure:")
+    print(f"  field, sub_field, season, year, season_start_date, season_end_date, phase1_harvest_date")
+    print(f"\nNext steps:")
+    print(f"  1. Review baseline predictions in harvest_production_export.xlsx")
+    print(f"  2. Run weekly monitoring: python 02_harvest_imminent_weekly.py {project_name}")
+
+if __name__ == "__main__":
+    main()
--- a/python_app/02_harvest_imminent_weekly.py
+++ b/python_app/02_harvest_imminent_weekly.py
@ -0,0 +1,348 @@
+"""
+Script: 02_harvest_imminent_weekly.py
+Purpose: WEEKLY MONITORING - Run WEEKLY/DAILY to get real-time harvest status for all fields
+
+This script runs on RECENT CI data (typically last 300 days) to predict whether each field
+is approaching harvest. Use this for operational decision-making and real-time alerts.
+
+RUN FREQUENCY: Weekly (or daily if required)
+INPUT: 
+  - ci_data_for_python.csv (recent CI data from 02b_convert_rds_to_csv.R)
+    Location: laravel_app/storage/app/{project}/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv
+  - harvest_production_export.xlsx (baseline from script 01 - optional, for reference)
+OUTPUT: 
+  - harvest_imminent_weekly.csv (weekly probabilities: field, imminent_prob, detected_prob, week, year)
+
+Workflow:
+1. Load harvest_production_export.xlsx (baseline dates - optional, for context)
+2. Load ci_data_for_python.csv (recent CI data)
+3. For each field, extract last 300 days of history
+4. Run Model 307 inference on full sequence (last timestep probabilities)
+5. Export harvest_imminent_weekly.csv with probabilities
+
+Output Columns:
+  - field: Field ID
+  - sub_field: Sub-field identifier
+  - imminent_prob: Probability field will be harvestable in next 28 days (0.0-1.0)
+  - detected_prob: Probability field is currently being harvested (0.0-1.0)
+  - week: ISO week number
+  - year: Year
+  - as_of_date: Latest date in dataset
+  - num_days: Number of days of history used
+
+Use Cases:
+  - Alert when imminent_prob > 0.7 (prepare harvest operations)
+  - Alert when detected_prob > 0.6 (field is being harvested)
+  - Track trends over weeks to validate baseline predictions
+  - Feed into 09b script for weekly dashboard reports
+
+Usage:
+  python 02_harvest_imminent_weekly.py [project_name]
+  
+  Examples:
+    python 02_harvest_imminent_weekly.py angata
+    python 02_harvest_imminent_weekly.py esa
+    python 02_harvest_imminent_weekly.py chemba
+  
+  If no project specified, defaults to 'angata'
+"""
+
+import pandas as pd
+import numpy as np
+import torch
+import subprocess
+import sys
+from pathlib import Path
+from datetime import datetime, timedelta
+from harvest_date_pred_utils import (
+    load_model_and_config,
+    extract_features,
+)
+
+ 
+def load_harvest_dates(harvest_file):
+    """Load latest harvest end dates from Excel file (from harvest_production_export.xlsx)."""
+    print("[1/5] Loading harvest dates...")
+    
+    if not Path(harvest_file).exists():
+        print(f"  ERROR: {harvest_file} not found")
+        print("  Using 180-day lookback as default")
+        return None
+    
+    try:
+        harvest_df = pd.read_excel(harvest_file)
+        print(f"  Loaded {len(harvest_df)} field-season records")
+        
+        # Use season_end_date column (output from harvest prediction script)
+        harvest_df['season_end_date'] = pd.to_datetime(harvest_df['season_end_date'])
+        
+        # Group by field and get the latest season_end_date
+        harvest_dates = {}
+        for field_id, group in harvest_df.groupby('field'):
+            latest_end = group['season_end_date'].max()
+            harvest_dates[str(field_id).strip()] = latest_end
+        
+        print(f"  Successfully mapped {len(harvest_dates)} fields")
+        print(f"  Harvest end dates range: {min(harvest_dates.values()).date()} to {max(harvest_dates.values()).date()}")
+        return harvest_dates
+    except Exception as e:
+        print(f"  ERROR loading harvest file: {e}")
+        print(f"  Using 180-day lookback instead")
+        return None
+
+
+def run_rds_to_csv_conversion():
+    """Run R script to convert RDS to CSV."""
+    print("\n[2/5] Converting RDS to CSV (daily interpolation)...")
+    r_script = Path("02b_convert_rds_to_csv.R")
+    
+    if not r_script.exists():
+        print(f"  ERROR: {r_script} not found")
+        return False
+    
+    # Use full path to Rscript on Windows
+    rscript_exe = r"C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe"
+    
+    try:
+        result = subprocess.run(
+            [rscript_exe, str(r_script)],
+            capture_output=True,
+            text=True,
+            timeout=300
+        )
+        
+        if result.returncode != 0:
+            print(f"  ERROR running R script:\n{result.stderr}")
+            return False
+        
+        # Show last few lines of output
+        lines = result.stdout.strip().split('\n')
+        for line in lines[-5:]:
+            if line.strip():
+                print(f"  {line}")
+        
+        return True
+    except Exception as e:
+        print(f"  ERROR: {e}")
+        return False
+
+
+def load_ci_data(csv_file):
+    """Load CI data."""
+    print("\n[3/5] Loading CI data...")
+    
+    if not Path(csv_file).exists():
+        print(f"  ERROR: {csv_file} not found")
+        return None
+    
+    ci_data = pd.read_csv(csv_file)
+    ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+    
+    print(f"  Loaded {len(ci_data)} daily rows for {ci_data['field'].nunique()} fields")
+    print(f"  Date range: {ci_data['Date'].min().date()} to {ci_data['Date'].max().date()}")
+    
+    return ci_data
+
+
+def extract_seasonal_data(field_id, harvest_date, ci_data):
+    """
+    Extract CI data from harvest date to latest for a specific field.
+    Returns dataframe sorted by date, or None if insufficient data.
+    """
+    # field_id is int, ci_data['field'] is also int
+    field_data = ci_data[ci_data['field'] == field_id].copy()
+    
+    if len(field_data) == 0:
+        return None
+    
+    # Filter from harvest date onwards
+    field_data = field_data[field_data['Date'] >= harvest_date].sort_values('Date')
+    
+    # Need at least 30 days of data for meaningful inference
+    if len(field_data) < 30:
+        return None
+    
+    return field_data
+
+
+def run_inference_on_season(field_data, model, config, scalers, device, ci_column='FitData'):
+    """
+    Run Model 307 inference on recent field CI history.
+    Predicts probability that field will be ready to harvest in next 28 days.
+    Uses last timestep from the provided data sequence.
+    Returns (imminent_prob, detected_prob) for prediction.
+    """
+    try:
+        # Use last 300 days of data for inference (enough history for meaningful patterns, 
+        # avoids training data seasonality mismatch)
+        if len(field_data) > 300:
+            field_data = field_data.iloc[-300:]
+        
+        # Extract features 
+        features_array = extract_features(field_data, config['features'], ci_column)
+        
+        if features_array.shape[0] < 10:
+            return None, None
+        
+        # Scale features using per-feature scalers (CRITICAL: same as Phase 1 in harvest_date_pred_utils.py)
+        # Scalers is a list of StandardScaler objects, one per feature
+        if scalers and isinstance(scalers, list):
+            for fi, scaler in enumerate(scalers):
+                try:
+                    features_array[:, fi] = scaler.transform(features_array[:, fi].reshape(-1, 1)).flatten()
+                except Exception:
+                    pass
+        
+        # Run inference
+        with torch.no_grad():
+            x_tensor = torch.tensor(features_array, dtype=torch.float32).unsqueeze(0).to(device)
+            out_imm, out_det = model(x_tensor)
+            
+            # Get last timestep probabilities
+            imminent_prob = out_imm.squeeze(0)[-1].cpu().item()
+            detected_prob = out_det.squeeze(0)[-1].cpu().item()
+        
+        return round(imminent_prob, 4), round(detected_prob, 4)
+    
+    except Exception as e:
+        return None, None
+
+
+def main():
+    # Get project name from command line or use default
+    project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
+    
+    # Construct paths
+    base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+    ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
+    CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
+    harvest_data_dir = base_storage / "HarvestData"
+    BASELINE_FILE = harvest_data_dir / "harvest_production_export.xlsx"
+    OUTPUT_CSV = harvest_data_dir / "harvest_imminent_weekly.csv"
+    harvest_data_dir.mkdir(parents=True, exist_ok=True)  # Create if doesn't exist
+    
+    print("="*80)
+    print(f"HARVEST IMMINENT PROBABILITY - WEEKLY MONITORING ({project_name})")
+    print("="*80)
+    
+    # [1] Load harvest dates (optional - for projects with predictions)
+    harvest_dates = None
+    if BASELINE_FILE.exists():
+        harvest_dates = load_harvest_dates(BASELINE_FILE)
+    else:
+        print("[1/5] Loading harvest dates...")
+        print(f"  INFO: {BASELINE_FILE} not found (optional for weekly monitoring)")
+    
+    # [2] Load CI data
+    print(f"\n[2/5] Loading CI data...")
+    print(f"  From: {CI_DATA_FILE}")
+    
+    if not CI_DATA_FILE.exists():
+        print(f"  ERROR: {CI_DATA_FILE} not found")
+        print(f"  Expected at: {CI_DATA_FILE.resolve()}")
+        print(f"\n  Run 02b_convert_rds_to_csv.R first to generate this file:")
+        print(f"  Rscript r_app/02b_convert_ci_rds_to_csv.R {project_name}")
+        return
+    
+    ci_data = load_ci_data(CI_DATA_FILE)
+    if ci_data is None:
+        print("ERROR: Could not load CI data")
+        return
+    
+    # [3] Load model (from python_app directory)
+    print("\n[3/5] Loading Model 307...")
+    model_dir = Path(".")  # Current directory is python_app/, contains model.pt, config.json, scalers.pkl
+    model, config, scalers = load_model_and_config(model_dir)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"  Device: {device}")
+    
+    # [4] Run inference per field
+    print("\n[4/5] Running seasonal inference...")
+    
+    results_list = []
+    ci_column = config['data']['ci_column']
+    
+    # Get field metadata
+    field_meta = ci_data.groupby('field').agg({
+        'sub_field': 'first',
+        'Date': 'max'
+    }).reset_index()
+    field_meta.columns = ['field', 'sub_field', 'latest_date']
+    
+    count = 0
+    for field_id in ci_data['field'].unique():
+        # Get metadata
+        meta = field_meta[field_meta['field'] == field_id]
+        if len(meta) == 0:
+            continue
+        
+        sub_field = meta['sub_field'].iloc[0]
+        latest_date = meta['latest_date'].iloc[0]
+        
+        # Use recent CI history (last 300 days from latest available data)
+        field_data = ci_data[ci_data['field'] == field_id].copy()
+        field_data = field_data.sort_values('Date')
+        
+        # Keep last 300 days of history for inference
+        if len(field_data) > 300:
+            field_data = field_data.iloc[-300:]
+        
+        if len(field_data) < 30:
+            continue
+        
+        # Run inference on recent history to predict next 28 days
+        imminent_prob, detected_prob = run_inference_on_season(
+            field_data, model, config, scalers, device, ci_column
+        )
+        
+        if imminent_prob is None:
+            continue
+        
+        week = int(latest_date.strftime('%V'))
+        year = int(latest_date.strftime('%Y'))
+        
+        results_list.append({
+            'field': field_id,
+            'sub_field': sub_field,
+            'imminent_prob': imminent_prob,
+            'detected_prob': detected_prob,
+            'week': week,
+            'year': year,
+            'as_of_date': latest_date,
+            'num_days': len(field_data),
+        })
+        
+        count += 1
+    
+    print(f"  Completed inference for {count} fields")
+    
+    # Build output DataFrame
+    df = pd.DataFrame(results_list)
+    df.to_csv(OUTPUT_CSV, index=False)
+    
+    print(f"\n[5/5] Exporting results...")
+    print(f"✓ Exported {len(df)} fields to {OUTPUT_CSV}")
+    print(f"  Output location: {OUTPUT_CSV.resolve()}")
+    
+    if len(df) > 0:
+        print(f"\nSample rows:")
+        print(df[['field', 'sub_field', 'imminent_prob', 'detected_prob', 'num_days', 'week', 'year']].head(15).to_string(index=False))
+        
+        # Show alert summary
+        high_imminent = len(df[df['imminent_prob'] > 0.7])
+        high_detected = len(df[df['detected_prob'] > 0.6])
+        print(f"\n⚠ ALERTS:")
+        print(f"  Fields with imminent_prob > 0.70: {high_imminent}")
+        print(f"  Fields with detected_prob > 0.60: {high_detected}")
+    else:
+        print(f"  WARNING: No results exported - check CI data availability")
+    
+    print(f"\nStorage structure:")
+    print(f"  Input CI:   laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
+    print(f"  Input baseline: laravel_app/storage/app/{project_name}/Data/HarvestData/harvest_production_export.xlsx")
+    print(f"  Output:     laravel_app/storage/app/{project_name}/Data/HarvestData/")
+    print(f"\nReady to load into 09b field analysis report")
+
+
+if __name__ == "__main__":
+    main()
--- a/python_app/download_planet_missing_dates.py
+++ b/python_app/download_planet_missing_dates.py
@ -18,6 +18,7 @@ import sys
 import json
 import datetime
 import argparse
+import subprocess
 from pathlib import Path
 from osgeo import gdal
 import time
@ -441,6 +442,7 @@ def get_evalscript():
 def main():
    print("="*80)
    print("PLANET SATELLITE DATA DOWNLOADER - MISSING DATES ONLY")
+    print("Wrapper for 00_download_8band_pu_optimized.py")
    print("="*80)
    
    config_dict = get_config()
@ -495,47 +497,45 @@ def main():
                print(f"    - {date}")
    
    if config_dict['dry_run']:
-        print("\n[DRY-RUN] Would download and merge above dates")
+        print("\n[DRY-RUN] Would download above dates using 00_download_8band_pu_optimized.py")
        return 0
    
-    # Setup BBox list
-    print(f"\nLoading field geometries...")
-    bbox_list = setup_bbox_list(paths['geojson'], resolution=config_dict['resolution'])
-    if bbox_list is None:
-        return 1
-    print(f"  Created {len(bbox_list)} BBox tiles")
-    
-    # Download and merge each missing date
-    print(f"\nDownloading missing dates...")
+    # Download each missing date using the optimized downloader
+    print(f"\n{'='*80}")
+    print(f"Downloading missing dates using optimized script...")
    print(f"{'='*80}")
    
    success_count = 0
-    for i, slot in enumerate(missing_dates, 1):
-        print(f"\n[{i}/{len(missing_dates)}] Processing {slot}...")
+    for i, date_str in enumerate(missing_dates, 1):
+        print(f"\n[{i}/{len(missing_dates)}] Downloading {date_str}...")
        
-        # Check availability
-        if not is_image_available(slot, bbox_list, collection_id):
-            print(f"  Skipping {slot} - no imagery available")
-            continue
+        # Call 00_download_8band_pu_optimized.py for this date
+        cmd = [
+            sys.executable,
+            "00_download_8band_pu_optimized.py",
+            config_dict['project'],
+            "--date", date_str,
+            "--resolution", str(config_dict['resolution']),
+            "--cleanup"
+        ]
        
-        # Download for all bboxes
-        print(f"  Downloading {len(bbox_list)} tiles...")
-        for bbox in bbox_list:
-            size = bbox_to_dimensions(bbox, resolution=config_dict['resolution'])
-            download_function(slot, bbox, size, paths['single_images'])
-        
-        # Merge
-        print(f"  Merging tiles...")
-        if merge_files(slot, paths['single_images'], paths['merged_tifs'], paths['virtual_raster']):
+        try:
+            result = subprocess.run(cmd, check=True, capture_output=False)
            success_count += 1
+            print(f"  ✓ Successfully downloaded {date_str}")
+        except subprocess.CalledProcessError as e:
+            print(f"  ✗ Failed to download {date_str}: {e}")
+            # Continue with next date instead of stopping
+            continue
    
    # Summary
    print(f"\n{'='*80}")
    print(f"SUMMARY:")
    print(f"  Successfully processed: {success_count}/{len(missing_dates)} dates")
    print(f"  Output folder: {paths['merged_tifs']}")
+    print(f"{'='*80}")
    
-    return 0
+    return 0 if success_count == len(missing_dates) else 1

 if __name__ == "__main__":
    sys.exit(main())
--- a/python_app/harvest_date_pred_utils.py
+++ b/python_app/harvest_date_pred_utils.py
@ -0,0 +1,482 @@
+"""
+Self-contained utility module for two-step harvest date prediction and Excel export.
+Includes model architecture, feature engineering, and core prediction logic.
+"""
+
+import sys
+import pandas as pd
+import numpy as np
+import torch
+import torch.nn as nn
+import pickle
+import yaml
+from pathlib import Path
+from typing import Tuple, Dict, Any, List
+from sklearn.preprocessing import StandardScaler
+
+# ============================================================================
+# TORCH MODELS (from src/models.py, inlined for self-containment)
+# ============================================================================
+
+class HarvestDetectionLSTM(nn.Module):
+    """Unidirectional LSTM for harvest detection with dual outputs."""
+    def __init__(self, input_size: int, hidden_size: int = 128, 
+                 num_layers: int = 1, dropout: float = 0.5):
+        super(HarvestDetectionLSTM, self).__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        
+        self.lstm = nn.LSTM(
+            input_size=input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            dropout=dropout if num_layers > 1 else 0,
+            bidirectional=False,
+            batch_first=True
+        )
+        
+        self.imminent_head = nn.Sequential(
+            nn.Linear(hidden_size, 16),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(16, 1),
+            nn.Sigmoid()
+        )
+        
+        self.detected_head = nn.Sequential(
+            nn.Linear(hidden_size, 16),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(16, 1),
+            nn.Sigmoid()
+        )
+    
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        lstm_out, _ = self.lstm(x)
+        batch_size, seq_len, hidden_size = lstm_out.shape
+        lstm_flat = lstm_out.reshape(-1, hidden_size)
+        
+        imminent_flat = self.imminent_head(lstm_flat).reshape(batch_size, seq_len)
+        detected_flat = self.detected_head(lstm_flat).reshape(batch_size, seq_len)
+        
+        return imminent_flat, detected_flat
+
+
+class HarvestDetectionGRU(nn.Module):
+    """Unidirectional GRU for harvest detection with dual outputs."""
+    def __init__(self, input_size: int, hidden_size: int = 128, 
+                 num_layers: int = 1, dropout: float = 0.5):
+        super(HarvestDetectionGRU, self).__init__()
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        
+        self.gru = nn.GRU(
+            input_size=input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            dropout=dropout if num_layers > 1 else 0,
+            bidirectional=False,
+            batch_first=True
+        )
+        
+        self.imminent_head = nn.Sequential(
+            nn.Linear(hidden_size, 16),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(16, 1),
+            nn.Sigmoid()
+        )
+        
+        self.detected_head = nn.Sequential(
+            nn.Linear(hidden_size, 16),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(16, 1),
+            nn.Sigmoid()
+        )
+    
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        gru_out, _ = self.gru(x)
+        batch_size, seq_len, hidden_size = gru_out.shape
+        gru_flat = gru_out.reshape(-1, hidden_size)
+        
+        imminent_flat = self.imminent_head(gru_flat).reshape(batch_size, seq_len)
+        detected_flat = self.detected_head(gru_flat).reshape(batch_size, seq_len)
+        
+        return imminent_flat, detected_flat
+
+
+def create_model(model_type: str, input_size: int, hidden_size: int = 128,
+                num_layers: int = 1, dropout: float = 0.5, device = None) -> nn.Module:
+    """Create a model from registry."""
+    registry = {'LSTM': HarvestDetectionLSTM, 'GRU': HarvestDetectionGRU}
+    if model_type not in registry:
+        raise ValueError(f"Unknown model type: {model_type}")
+    
+    model = registry[model_type](
+        input_size=input_size,
+        hidden_size=hidden_size,
+        num_layers=num_layers,
+        dropout=dropout
+    )
+    
+    if device:
+        model = model.to(device)
+    
+    # Print model info
+    total_params = sum(p.numel() for p in model.parameters())
+    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print(f"Model: {model_type}")
+    print(f"  Input size: {input_size}")
+    print(f"  Hidden size: {hidden_size}")
+    print(f"  Num layers: {num_layers}")
+    print(f"  Dropout: {dropout}")
+    print(f"  Total parameters: {total_params:,}")
+    print(f"  Trainable parameters: {trainable_params:,}")
+    print(f"  Device: {device}")
+    
+    return model
+
+
+# ============================================================================
+# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
+# ============================================================================
+
+def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
+    """Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
+    features = pd.DataFrame(index=ci_series.index)
+    
+    # State (moving averages)
+    features['CI_raw'] = ci_series
+    features['7d_MA'] = ci_series.rolling(window=7, min_periods=1).mean()
+    features['14d_MA'] = ci_series.rolling(window=14, min_periods=1).mean()
+    features['21d_MA'] = ci_series.rolling(window=21, min_periods=1).mean()
+    
+    # Velocity (gradient of MA)
+    for window in [7, 14, 21]:
+        ma = ci_series.rolling(window=window, min_periods=1).mean()
+        features[f'{window}d_velocity'] = ma.diff() / 1.0  # Simplified gradient
+    
+    # Acceleration (gradient of velocity)
+    for window in [7, 14, 21]:
+        ma = ci_series.rolling(window=window, min_periods=1).mean()
+        vel = ma.diff()
+        features[f'{window}d_acceleration'] = vel.diff()
+    
+    # Min, Max, Range
+    for window in [7, 14, 21]:
+        features[f'{window}d_min'] = ci_series.rolling(window=window, min_periods=1).min()
+        features[f'{window}d_max'] = ci_series.rolling(window=window, min_periods=1).max()
+        features[f'{window}d_range'] = features[f'{window}d_max'] - features[f'{window}d_min']
+    
+    # Std and CV
+    for window in [7, 14, 21]:
+        features[f'{window}d_std'] = ci_series.rolling(window=window, min_periods=1).std()
+        ma = ci_series.rolling(window=window, min_periods=1).mean()
+        features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
+    
+    # DOY normalized
+    if doy_series is not None:
+        features['DOY_normalized'] = doy_series / 450.0
+    
+    return features.fillna(0)
+
+
+def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: str = 'FitData') -> np.ndarray:
+    """Extract and return specified features as numpy array."""
+    # Compute all CI features
+    ci_series = data_df[ci_column].astype(float)
+    doy_series = pd.Series(range(len(data_df)), index=data_df.index) % 365 if 'DOY_normalized' in feature_names else None
+    
+    all_features = compute_ci_features(ci_series, doy_series)
+    
+    # Select requested features
+    requested = [f for f in feature_names if f in all_features.columns]
+    if not requested:
+        raise ValueError(f"No valid features found. Requested: {feature_names}")
+    
+    return all_features[requested].values
+
+
+# ============================================================================
+# MAIN UTILITY FUNCTIONS
+# ============================================================================
+
+def load_model_and_config(model_dir: Path):
+    """Load model, config, and scalers from a given directory."""
+    cwd = Path.cwd()
+    
+    # Try different naming conventions
+    candidates = [
+        # Standard names
+        (model_dir / "config.json", model_dir / "model.pt", model_dir / "scalers.pkl"),
+        # Model 307 specific names
+        (model_dir / "model_config.json", model_dir / "model_307.pt", model_dir / "model_scalers.pkl"),
+        # CWD standard names
+        (cwd / "config.json", cwd / "model.pt", cwd / "scalers.pkl"),
+        # CWD Model 307 names
+        (cwd / "model_config.json", cwd / "model_307.pt", cwd / "model_scalers.pkl"),
+    ]
+    
+    config_file = model_file = scalers_file = None
+    for cfg, mdl, scl in candidates:
+        if cfg.exists() and mdl.exists() and scl.exists():
+            config_file, model_file, scalers_file = cfg, mdl, scl
+            print(f"Found model files in: {cfg.parent}")
+            break
+    
+    if not (config_file and model_file and scalers_file):
+        missing = []
+        for cfg, mdl, scl in candidates:
+            if not cfg.exists():
+                missing.append(str(cfg))
+            if not mdl.exists():
+                missing.append(str(mdl))
+            if not scl.exists():
+                missing.append(str(scl))
+        raise FileNotFoundError(
+            f"Missing model files. Checked multiple locations. Missing: {missing}"
+        )
+
+    with open(config_file) as f:
+        config = yaml.safe_load(f)
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = create_model(
+        model_type=config['model']['type'],
+        input_size=len(config['features']),
+        hidden_size=config['model']['hidden_size'],
+        num_layers=config['model']['num_layers'],
+        dropout=config['model']['dropout'],
+        device=device
+    )
+    
+    print(f"Loading weights from: {model_file}")
+    model.load_state_dict(torch.load(model_file, map_location=device, weights_only=False))
+    model.eval()
+
+    with open(scalers_file, 'rb') as f:
+        scalers = pickle.load(f)
+
+    return model, config, scalers
+
+
+def load_harvest_data(data_file: Path) -> pd.DataFrame:
+    """Load harvest data CSV."""
+    print(f"Loading data from: {data_file}")
+    df = pd.read_csv(data_file)
+    print(f"Loaded {len(df)} rows")
+    return df
+
+
+def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device):
+    """
+    Phase 1: Growing window detection with threshold crossing.
+    Expand window day-by-day, check last timestep's detected_prob.
+    When 3 consecutive days have prob > 0.5, harvest detected.
+    Returns list of (harvest_date, harvest_idx) tuples.
+    """
+    harvest_dates = []
+    current_pos = 0
+    
+    while current_pos < len(field_data):
+        consecutive_above_threshold = 0
+        
+        for window_end in range(current_pos + 1, len(field_data) + 1):
+            window_data = field_data.iloc[current_pos:window_end].copy().reset_index(drop=True)
+            
+            try:
+                features = extract_features(window_data, config['features'], ci_column=ci_column)
+                
+                # Apply scalers
+                for fi, scaler in enumerate(scalers):
+                    try:
+                        features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+                    except Exception:
+                        pass
+                
+                # Run model
+                with torch.no_grad():
+                    x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
+                    imminent_probs, detected_probs = model(x_tensor)
+                    detected_probs = detected_probs.squeeze(0).cpu().numpy()
+                
+                # Check LAST timestep
+                last_prob = detected_probs[-1]
+                
+                if last_prob > 0.5:
+                    consecutive_above_threshold += 1
+                else:
+                    consecutive_above_threshold = 0
+                
+                # Harvest detected: 3 consecutive days above threshold
+                if consecutive_above_threshold >= 3:
+                    harvest_date = field_data.iloc[current_pos + window_end - 3]['Date']
+                    harvest_dates.append((harvest_date, current_pos + window_end - 3))
+                    
+                    # Reset to next day after harvest
+                    current_pos = current_pos + window_end - 2
+                    break
+                    
+            except Exception:
+                continue
+        else:
+            break
+    
+    return harvest_dates
+
+
+def run_phase2_refinement(field_data, phase1_harvests, model, config, scalers, ci_column, device):
+    """
+    Phase 2: Refinement with ±40 day window.
+    For each Phase 1 harvest, extract window and refine with argmax.
+    Returns list of (harvest_date, harvest_idx) tuples.
+    """
+    refined_harvests = []
+    field_data = field_data.sort_values('Date').reset_index(drop=True)
+    
+    for i, (phase1_harvest_date, phase1_idx) in enumerate(phase1_harvests):
+        try:
+            # Determine season start
+            if i == 0:
+                season_start_date = field_data.iloc[0]['Date']
+            else:
+                prev_harvest_idx = phase1_harvests[i-1][1]
+                season_start_idx = prev_harvest_idx + 1
+                if season_start_idx >= len(field_data):
+                    break
+                season_start_date = field_data.iloc[season_start_idx]['Date']
+            
+            # Extract ±40 day window
+            window_start_date = season_start_date - pd.Timedelta(days=40)
+            window_end_date = phase1_harvest_date + pd.Timedelta(days=40)
+            
+            window_start_idx = max(0, (field_data['Date'] >= window_start_date).idxmax() if (field_data['Date'] >= window_start_date).any() else 0)
+            window_end_idx = min(len(field_data), (field_data['Date'] <= window_end_date).idxmax() + 1 if (field_data['Date'] <= window_end_date).any() else len(field_data))
+            
+            if window_end_idx <= window_start_idx:
+                refined_harvests.append((phase1_harvest_date, phase1_idx))
+                continue
+            
+            window_data = field_data.iloc[window_start_idx:window_end_idx].copy().reset_index(drop=True)
+            
+            # Extract features for full window
+            features = extract_features(window_data, config['features'], ci_column=ci_column)
+            
+            # Apply scalers
+            for fi, scaler in enumerate(scalers):
+                try:
+                    features[:, fi] = scaler.transform(features[:, fi].reshape(-1, 1)).flatten()
+                except Exception:
+                    pass
+            
+            # Run model once on full window
+            with torch.no_grad():
+                x_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
+                imminent_probs, detected_probs = model(x_tensor)
+                detected_probs = detected_probs.squeeze(0).cpu().numpy()
+            
+            # Find refined harvest (argmax in window)
+            refined_idx_in_window = int(np.argmax(detected_probs))
+            refined_idx_global = window_start_idx + refined_idx_in_window
+            refined_harvest_date = field_data.iloc[refined_idx_global]['Date']
+            
+            refined_harvests.append((refined_harvest_date, refined_idx_global))
+            
+        except Exception:
+            refined_harvests.append((phase1_harvest_date, phase1_idx))
+    
+    return refined_harvests
+
+
+def run_two_step_refinement(df: pd.DataFrame, model, config, scalers, device=None):
+    """
+    Two-step harvest detection for each field:
+    1. Phase 1: Growing window with 3-day threshold confirmation
+    2. Phase 2: ±40 day refinement with argmax
+    
+    Returns list of dicts with field, season_start_date, season_end_date, etc.
+    """
+    if device is None:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    results = []
+    ci_column = config['data']['ci_column']
+    
+    # Group by field and count total fields for progress
+    field_groups = list(df.groupby('field'))
+    total_fields = len(field_groups)
+    harvests_found = 0
+    
+    print(f"  Processing {total_fields} fields...")
+    
+    for idx, (field, field_data) in enumerate(field_groups, 1):
+        # Simple progress indicator
+        pct = int((idx / total_fields) * 100)
+        bar_length = 40
+        filled = int((idx / total_fields) * bar_length)
+        bar = "█" * filled + "░" * (bar_length - filled)
+        print(f"  [{bar}] {pct:3d}% ({idx}/{total_fields} fields)", end='\r')
+        
+        field_data = field_data.sort_values('Date').reset_index(drop=True)
+        
+        # Phase 1: Growing window detection
+        phase1_harvests = run_phase1_growing_window(field_data, model, config, scalers, ci_column, device)
+        
+        if not phase1_harvests:
+            continue
+        
+        # Phase 2: Refinement
+        phase2_harvests = run_phase2_refinement(field_data, phase1_harvests, model, config, scalers, ci_column, device)
+        
+        # Store results
+        for i, (harvest_date, harvest_idx) in enumerate(phase2_harvests):
+            if i == 0:
+                season_start_date = field_data.iloc[0]['Date']
+            else:
+                prev_harvest_idx = phase2_harvests[i-1][1]
+                season_start_idx = prev_harvest_idx + 1
+                if season_start_idx >= len(field_data):
+                    break
+                season_start_date = field_data.iloc[season_start_idx]['Date']
+            
+            season_end_date = harvest_date
+            
+            result = {
+                'field': field,
+                'season': i + 1,
+                'season_start_date': season_start_date,
+                'season_end_date': season_end_date,
+                'phase2_harvest_date': harvest_date,
+            }
+            results.append(result)
+            harvests_found += 1
+    
+    print()  # New line after progress bar
+    print(f"  ✓ Complete: Found {harvests_found} harvest events across {total_fields} fields")
+    
+    return results
+
+
+def build_production_harvest_table(refined_results: List[Dict]) -> pd.DataFrame:
+    """
+    Build a DataFrame from refined results with columns for production pipeline.
+    One row per field/season with season start and end dates (formatted as YYYY-MM-DD).
+    """
+    if not refined_results:
+        print("WARNING: No refined results to build table")
+        return pd.DataFrame(columns=['field', 'season', 'season_start_date', 'season_end_date'])
+    
+    # Build DataFrame
+    df = pd.DataFrame(refined_results)
+    
+    # Ensure date columns are datetime
+    df['season_start_date'] = pd.to_datetime(df['season_start_date']).dt.strftime('%Y-%m-%d')
+    df['season_end_date'] = pd.to_datetime(df['season_end_date']).dt.strftime('%Y-%m-%d')
+    df['phase1_harvest_date'] = pd.to_datetime(df['phase1_harvest_date']).dt.strftime('%Y-%m-%d')
+    
+    print(f"Built production table with {len(df)} field/season combinations")
+    
+    return df
--- a/python_app/model_307.pt
+++ b/python_app/model_307.pt
--- a/python_app/model_config.json
+++ b/python_app/model_config.json
@ -0,0 +1,144 @@
+{
+  "name": "307_dropout02_with_doy",
+  "description": "Production Model 307: LSTM-based harvest detection (Phase 3, minimal regularization)",
+  "model_info": {
+    "type": "LSTM",
+    "architecture": "Unidirectional LSTM with dual output heads (imminent + detected)",
+    "total_parameters": 105120,
+    "input_features": 14,
+    "hidden_units": 256,
+    "output_heads": 2,
+    "training_data": "Historical multi-season CI data from multiple estates",
+    "validation_method": "5-fold cross-validation",
+    "device": "GPU (CUDA) or CPU fallback"
+  },
+  "production_scripts": {
+    "baseline": {
+      "script": "01_harvest_baseline_prediction.py",
+      "frequency": "Run ONCE during setup",
+      "purpose": "Predict all harvest dates (ground truth baseline)",
+      "input": "ci_data_for_python.csv (complete historical data)",
+      "output": "harvest_production_export.xlsx",
+      "time_estimate": "5-30 minutes depending on data volume"
+    },
+    "monitoring": {
+      "script": "02_harvest_imminent_weekly.py",
+      "frequency": "Run WEEKLY (or daily if required)",
+      "purpose": "Real-time harvest status and imminent alerts",
+      "input": "ci_data_for_python.csv (recent data)",
+      "output": "harvest_imminent_weekly.csv",
+      "time_estimate": "1-5 minutes"
+    }
+  },
+  "features": [
+    "CI_raw",
+    "7d_MA",
+    "14d_MA",
+    "21d_MA",
+    "7d_velocity",
+    "14d_velocity",
+    "21d_velocity",
+    "7d_min",
+    "14d_min",
+    "21d_min",
+    "7d_std",
+    "14d_std",
+    "21d_std",
+    "DOY_normalized"
+  ],
+  "model": {
+    "type": "LSTM",
+    "hidden_size": 256,
+    "num_layers": 1,
+    "dropout": 0.2
+  },
+  "training": {
+    "imminent_days_before": 28,
+    "imminent_days_before_end": 1,
+    "detected_days_after_start": 1,
+    "detected_days_after_end": 21,
+    "k_folds": 5,
+    "num_epochs": 150,
+    "patience": 20,
+    "learning_rate": 0.001,
+    "batch_size": 4
+  },
+  "data": {
+    "csv_path": "../lstm_complete_data.csv",
+    "ci_column": "FitData",
+    "test_fraction": 0.15,
+    "seed": 42
+  },
+  "workflow_instructions": {
+    "overview": "Model 307 uses a two-script approach: baseline setup + weekly monitoring",
+    "step_1_baseline": {
+      "description": "Establish historical harvest date reference for all fields",
+      "script": "01_harvest_baseline_prediction.py",
+      "when": "Run once after setting up CI extraction pipeline",
+      "command": "conda activate python_gpu && python 01_harvest_baseline_prediction.py",
+      "input_data": "ci_data_for_python.csv (all available historical CI data)",
+      "output_file": "harvest_production_export.xlsx (ground truth baseline)",
+      "columns": [
+        "field - Field ID",
+        "sub_field - Sub-field designation",
+        "season - Season number (1, 2, 3...)",
+        "year - Year of harvest",
+        "season_start_date - Start of growing season",
+        "season_end_date - End of season (harvest date)",
+        "phase1_harvest_date - Refined harvest prediction"
+      ],
+      "notes": "This becomes your reference - compare all weekly monitoring against this"
+    },
+    "step_2_monitoring": {
+      "description": "Weekly real-time harvest status and imminent alerts",
+      "script": "02_harvest_imminent_weekly.py",
+      "when": "Run every week (e.g., Mondays) or daily if near harvest",
+      "command": "conda activate python_gpu && python 02_harvest_imminent_weekly.py",
+      "input_data": "ci_data_for_python.csv (latest CI data from 02b conversion)",
+      "output_file": "harvest_imminent_weekly.csv",
+      "columns": [
+        "field - Field ID",
+        "sub_field - Sub-field designation",
+        "imminent_prob - Likelihood of harvest readiness in next 28 days (0.0-1.0)",
+        "detected_prob - Current harvest probability (0.0-1.0)",
+        "week - ISO week number",
+        "year - Year",
+        "as_of_date - Latest date in dataset",
+        "num_days - Days of history used"
+      ],
+      "alert_thresholds": {
+        "imminent_high": "imminent_prob > 0.7 (prepare harvest)",
+        "imminent_medium": "imminent_prob 0.5-0.7 (monitor closely)",
+        "detected_high": "detected_prob > 0.6 (active harvesting)"
+      }
+    },
+    "integration_with_r_pipeline": {
+      "before_model_307": [
+        "Planet 8-band download: download_8band_pu_optimized.ipynb",
+        "CI extraction: 02_ci_extraction.R",
+        "Convert to CSV: 02b_convert_rds_to_csv.R (outputs ci_data_for_python.csv)"
+      ],
+      "model_307_here": [
+        "BASELINE: 01_harvest_baseline_prediction.py (run once)",
+        "MONITORING: 02_harvest_imminent_weekly.py (run weekly)"
+      ],
+      "after_model_307": [
+        "Field analysis: 09b_field_analysis_weekly.R (reads harvest predictions)",
+        "Reports: 10_CI_report_with_kpis.Rmd (includes harvest status)"
+      ]
+    },
+    "environment_requirements": {
+      "python_env": "python_gpu",
+      "activation": "conda activate python_gpu",
+      "required_packages": [
+        "torch (GPU-enabled)",
+        "pandas",
+        "numpy",
+        "scikit-learn",
+        "pyyaml",
+        "openpyxl"
+      ],
+      "gpu": "NVIDIA GPU with CUDA (optional - falls back to CPU if unavailable)"
+    }
+  }
+}
--- a/python_app/model_scalers.pkl
+++ b/python_app/model_scalers.pkl
--- a/r_app/02b_convert_ci_rds_to_csv.R
+++ b/r_app/02b_convert_ci_rds_to_csv.R
@ -15,9 +15,113 @@
 suppressPackageStartupMessages({
  library(tidyverse)
  library(lubridate)
+  library(zoo)
  library(here)
 })

+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+#' Convert wide format RDS to long format
+#'
+#' @param ci_data_wide Tibble with columns: field, sub_field, and dates as columns
+#' @return Long format tibble: field, sub_field, Date, FitData
+wide_to_long_ci_data <- function(ci_data_wide) {
+  ci_data_wide %>%
+    pivot_longer(
+      cols = -c(field, sub_field),
+      names_to = "Date",
+      values_to = "FitData",
+      values_drop_na = TRUE
+    ) %>%
+    mutate(
+      Date = as.Date(Date),
+      FitData = as.numeric(FitData)
+    ) %>%
+    filter(!is.na(FitData))
+}
+
+#' Create daily interpolated sequences with DOY for each field
+#'
+#' For each field/sub_field combination, creates complete daily sequences from first to last date,
+#' fills in measurements, and interpolates missing dates.
+#'
+#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
+#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
+create_interpolated_daily_sequences <- function(ci_data_long) {
+  ci_data_long %>%
+    group_by(field, sub_field) %>%
+    nest() %>%
+    mutate(
+      data = map(data, function(df) {
+        # Sort measurements by date
+        df <- df %>% arrange(Date)
+        
+        # Create complete daily sequence from first to last date
+        date_seq <- seq(min(df$Date), max(df$Date), by = "day")
+        
+        # Build daily dataframe (field/sub_field stay in outer df, not here)
+        daily_df <- tibble(
+          Date = date_seq,
+          value = NA_real_,
+          FitData = NA_real_,
+          DOY = seq_along(date_seq)  # Continuous day counter: 1, 2, 3, ...
+        )
+        
+        # Fill in actual measurement values
+        for (i in seq_len(nrow(df))) {
+          idx <- which(daily_df$Date == df$Date[i])
+          if (length(idx) > 0) {
+            daily_df$value[idx] <- df$FitData[i]
+          }
+        }
+        
+        # Interpolate missing dates linearly
+        daily_df$FitData <- zoo::na.approx(daily_df$value, na.rm = FALSE)
+        
+        daily_df
+      })
+    ) %>%
+    unnest(data) %>%
+    select(field, sub_field, Date, FitData, DOY, value) %>%
+    arrange(field, Date)
+}
+
+#' Validate conversion output
+#'
+#' @param ci_data_python Tibble with converted CI data
+#' @return Invisibly returns the tibble (for piping)
+validate_conversion_output <- function(ci_data_python) {
+  cat(sprintf("\nValidation:\n"))
+  cat(sprintf("  Unique fields: %d\n", n_distinct(ci_data_python$field)))
+  cat(sprintf("  Total daily rows: %d\n", nrow(ci_data_python)))
+  cat(sprintf("  Date range: %s to %s\n", 
+              min(ci_data_python$Date, na.rm = TRUE), 
+              max(ci_data_python$Date, na.rm = TRUE)))
+  cat(sprintf("  FitData range: %.2f to %.2f\n",
+              min(ci_data_python$FitData, na.rm = TRUE),
+              max(ci_data_python$FitData, na.rm = TRUE)))
+  cat(sprintf("  Raw measurements: %d\n", sum(!is.na(ci_data_python$value))))
+  cat(sprintf("  Interpolated values: %d\n", sum(is.na(ci_data_python$value) & !is.na(ci_data_python$FitData))))
+  
+  invisible(ci_data_python)
+}
+
+#' Print next steps message
+print_next_steps <- function() {
+  cat("\nNext steps for Python harvest detection:\n")
+  cat("  1. Read this CSV file in Python\n")
+  cat("  2. Group by field to identify seasons\n")
+  cat("  3. Run LSTM model to detect harvest dates\n")
+  cat("  4. Save predicted harvest dates to Excel\n")
+  cat("  5. Use output in script 03 for interpolation\n")
+}
+
+# ============================================================================
+# MAIN FUNCTION
+# ============================================================================
+
 main <- function() {
  # Process command line arguments
  args <- commandArgs(trailingOnly = TRUE)
@ -28,7 +132,7 @@ main <- function() {
  } else if (exists("project_dir", envir = .GlobalEnv)) {
    project_dir <- get("project_dir", envir = .GlobalEnv)
  } else {
-    project_dir <- "esa"
+    project_dir <- "angata"
  }
  
  # Make available globally
@ -49,9 +153,17 @@ main <- function() {
  })
  
  # Define paths
-  ci_data_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
-  input_file <- file.path(ci_data_dir, "combined_CI_data.rds")
-  output_file <- file.path(ci_data_dir, "ci_data_for_python.csv")
+  ci_data_source_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "cumulative_vals")
+  ci_data_output_dir <- here::here("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "ci_data_for_python")
+  
+  # Create output directory if it doesn't exist (for new projects)
+  if (!dir.exists(ci_data_output_dir)) {
+    dir.create(ci_data_output_dir, recursive = TRUE, showWarnings = FALSE)
+    cat(sprintf("✓ Created output directory: %s\n", ci_data_output_dir))
+  }
+  
+  input_file <- file.path(ci_data_source_dir, "combined_CI_data.rds")
+  output_file <- file.path(ci_data_output_dir, "ci_data_for_python.csv")
  
  # Check if input file exists
  if (!file.exists(input_file)) {
@ -61,52 +173,32 @@ main <- function() {
  cat(sprintf("Loading: %s\n", input_file))
  
  # Load RDS file
-  ci_data <- readRDS(input_file) %>%
+  ci_data_wide <- readRDS(input_file) %>%
    as_tibble()
  
-  cat(sprintf("  Loaded %d rows\n", nrow(ci_data)))
-  cat(sprintf("  Columns: %s\n", paste(names(ci_data), collapse = ", ")))
+  cat(sprintf("  Loaded %d rows\n", nrow(ci_data_wide)))
+  cat(sprintf("  Format: WIDE (field, sub_field, then dates as columns)\n"))
+  cat(sprintf("  Sample columns: %s\n", paste(names(ci_data_wide)[1:6], collapse = ", ")))
  
-  # Prepare data for Python
-  ci_data_python <- ci_data %>%
-    # Ensure standard column names
-    rename(
-      field = field,
-      sub_field = sub_field,
-      Date = Date,
-      FitData = FitData,
-      DOY = DOY
-    ) %>%
-    # Add 'value' as an alias for FitData (sometimes needed)
-    mutate(value = FitData) %>%
-    # Keep only necessary columns
-    select(field, sub_field, Date, FitData, DOY, value) %>%
-    # Sort by field and date
-    arrange(field, Date)
+  # Step 1: Convert from WIDE to LONG format
+  cat("\nStep 1: Converting from wide to long format...\n")
+  ci_data_long <- wide_to_long_ci_data(ci_data_wide)
  
-  # Validate data
-  cat(sprintf("\nValidation:\n"))
-  cat(sprintf("  Unique fields: %d\n", n_distinct(ci_data_python$field)))
-  cat(sprintf("  Date range: %s to %s\n", 
-              min(ci_data_python$Date, na.rm = TRUE), 
-              max(ci_data_python$Date, na.rm = TRUE)))
-  cat(sprintf("  FitData range: %.2f to %.2f\n",
-              min(ci_data_python$FitData, na.rm = TRUE),
-              max(ci_data_python$FitData, na.rm = TRUE)))
-  cat(sprintf("  Missing FitData: %d rows\n", sum(is.na(ci_data_python$FitData))))
+  # Step 2: Create complete daily sequences with interpolation
+  cat("\nStep 2: Creating complete daily sequences with interpolation...\n")
+  ci_data_python <- create_interpolated_daily_sequences(ci_data_long)
  
-  # Save to CSV
-  cat(sprintf("\nSaving to: %s\n", output_file))
+  # Step 3: Validate output
+  cat("\nStep 3: Validating output...")
+  validate_conversion_output(ci_data_python)
  
+  # Step 4: Save to CSV
+  cat(sprintf("\nStep 4: Saving to CSV...\n"))
+  cat(sprintf("  Output: %s\n", output_file))
  write_csv(ci_data_python, output_file)
  
-  cat(sprintf("✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
-  cat("\nNext steps for Python harvest detection:\n")
-  cat("  1. Read this CSV file in Python\n")
-  cat("  2. Group by field to identify seasons\n")
-  cat("  3. Run LSTM model to detect harvest dates\n")
-  cat("  4. Save predicted harvest dates to Excel\n")
-  cat("  5. Use output in script 03 for interpolation\n")
+  cat(sprintf("\n✓ Successfully created CSV with %d rows\n", nrow(ci_data_python)))
+  print_next_steps()
 }

 if (sys.nframe() == 0) {
--- a/webapps/geojson_viewer.html
+++ b/webapps/geojson_viewer.html
--- a/webapps/index.html
+++ b/webapps/index.html
@ -212,6 +212,22 @@
                    <a href="./data_validation_tool/" class="app-btn">Open Tool</a>
                </div>
            </div>
+            
+            <!-- GeoJSON Viewer -->
+            <div class="app-card">
+                <div class="app-icon">📍</div>
+                <div class="app-content">
+                    <h2>GeoJSON Viewer</h2>
+                    <p>Upload and visualize GeoJSON files on an interactive map with feature properties.</p>
+                    <ul class="app-features">
+                        <li>Upload GeoJSON files</li>
+                        <li>Interactive map view</li>
+                        <li>View feature properties</li>
+                        <li>Download exports</li>
+                    </ul>
+                    <a href="./geojson_viewer.html" class="app-btn">Open Viewer</a>
+                </div>
+            </div>
        </div>
        
        <footer>