working on visualisation, not working too well...

2026-01-18 11:25:31 +01:00 · 2026-01-18 11:25:31 +01:00 · c11b10a73f
parent 9f312131d7
commit c11b10a73f
9 changed files with 1182 additions and 171 deletions
--- a/python_app/31_harvest_imminent_weekly.py
+++ b/python_app/31_harvest_imminent_weekly.py
@ -7,18 +7,19 @@ is approaching harvest. Use this for operational decision-making and real-time a

 RUN FREQUENCY: Weekly (or daily if required)
 INPUT: 
-  - ci_data_for_python.csv (recent CI data from 02b_convert_rds_to_csv.R)
+  - harvest.xlsx (baseline from scripts 22+23 - contains last harvest date per field)
+    Location: laravel_app/storage/app/{project}/Data/harvest.xlsx
+  - ci_data_for_python.csv (complete CI data from R script)
    Location: laravel_app/storage/app/{project}/Data/extracted_ci/ci_data_for_python/ci_data_for_python.csv
-  - harvest_production_export.xlsx (baseline from script 01 - optional, for reference)
 OUTPUT: 
-  - harvest_imminent_weekly.csv (weekly probabilities: field, imminent_prob, detected_prob, week, year)
+  - reports/kpis/field_stats/{project}_harvest_imminent_week_{WW}_{YYYY}.csv (weekly probabilities: field, imminent_prob, detected_prob, week, year)

 Workflow:
-1. Load harvest_production_export.xlsx (baseline dates - optional, for context)
-2. Load ci_data_for_python.csv (recent CI data)
-3. For each field, extract last 300 days of history
-4. Run Model 307 inference on full sequence (last timestep probabilities)
-5. Export harvest_imminent_weekly.csv with probabilities
+1. Load harvest.xlsx to find last harvest date (season_end) per field
+2. Load ci_data_for_python.csv (complete CI data)
+3. For each field, extract all CI data AFTER last harvest (complete current season)
+4. Run Model 307 inference on full season sequence (last timestep probabilities)
+5. Export week_WW_YYYY.csv with probabilities

 Output Columns:
  - field: Field ID
@ -61,33 +62,34 @@ from harvest_date_pred_utils import (

 
 def load_harvest_dates(harvest_file):
-    """Load latest harvest end dates from Excel file (from harvest_production_export.xlsx)."""
-    print("[1/5] Loading harvest dates...")
+    """Load last harvest end dates from harvest.xlsx (output from scripts 22+23)."""
+    print("[1/5] Loading harvest data for season boundaries...")
    
    if not Path(harvest_file).exists():
        print(f"  ERROR: {harvest_file} not found")
-        print("  Using 180-day lookback as default")
+        print(f"  harvest.xlsx is required to determine current season boundaries")
        return None
    
    try:
        harvest_df = pd.read_excel(harvest_file)
-        print(f"  Loaded {len(harvest_df)} field-season records")
+        print(f"  Loaded {len(harvest_df)} season records")
        
-        # Use season_end_date column (output from harvest prediction script)
-        harvest_df['season_end_date'] = pd.to_datetime(harvest_df['season_end_date'])
+        # season_end contains the last harvest date for each season
+        harvest_df['season_end'] = pd.to_datetime(harvest_df['season_end'])
+        harvest_df['field'] = harvest_df['field'].astype(str).str.strip()
        
-        # Group by field and get the latest season_end_date
+        # Group by field and get the LATEST season_end_date (most recent harvest)
+        # This marks the start of the current season
        harvest_dates = {}
        for field_id, group in harvest_df.groupby('field'):
-            latest_end = group['season_end_date'].max()
-            harvest_dates[str(field_id).strip()] = latest_end
+            latest_harvest = group['season_end'].max()
+            harvest_dates[field_id] = latest_harvest
        
        print(f"  Successfully mapped {len(harvest_dates)} fields")
-        print(f"  Harvest end dates range: {min(harvest_dates.values()).date()} to {max(harvest_dates.values()).date()}")
+        print(f"  Last harvest dates range: {min(harvest_dates.values()).date()} to {max(harvest_dates.values()).date()}")
        return harvest_dates
    except Exception as e:
-        print(f"  ERROR loading harvest file: {e}")
-        print(f"  Using 180-day lookback instead")
+        print(f"  ERROR loading harvest.xlsx: {e}")
        return None


@ -212,26 +214,37 @@ def main():
    # Get project name from command line or use default
    project_name = sys.argv[1] if len(sys.argv) > 1 else "angata"
    
-    # Construct paths
-    base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+    # Construct paths - work from either python_app/ or root smartcane/ directory
+    # Try root first (laravel_app/...), then fall back to ../ (running from python_app/)
+    if Path("laravel_app/storage/app").exists():
+        base_storage = Path("laravel_app/storage/app") / project_name / "Data"
+    else:
+        base_storage = Path("../laravel_app/storage/app") / project_name / "Data"
+    
    ci_data_dir = base_storage / "extracted_ci" / "ci_data_for_python"
    CI_DATA_FILE = ci_data_dir / "ci_data_for_python.csv"
-    harvest_data_dir = base_storage / "HarvestData"
-    BASELINE_FILE = harvest_data_dir / "harvest_production_export.xlsx"
-    OUTPUT_CSV = harvest_data_dir / "harvest_imminent_weekly.csv"
-    harvest_data_dir.mkdir(parents=True, exist_ok=True)  # Create if doesn't exist
+    HARVEST_FILE = base_storage / "harvest.xlsx"  # Output from scripts 22+23
+    
+    # Determine week and year from current date for timestamped export
+    today = datetime.now()
+    week_num = int(today.strftime('%V'))
+    year_num = int(today.strftime('%Y'))
+    
+    # Output directory: reports/kpis/field_stats/
+    reports_dir = base_storage.parent / "reports" / "kpis" / "field_stats"
+    reports_dir.mkdir(parents=True, exist_ok=True)
+    OUTPUT_CSV = reports_dir / f"{project_name}_harvest_imminent_week_{week_num:02d}_{year_num}.csv"
    
    print("="*80)
    print(f"HARVEST IMMINENT PROBABILITY - WEEKLY MONITORING ({project_name})")
    print("="*80)
    
-    # [1] Load harvest dates (optional - for projects with predictions)
-    harvest_dates = None
-    if BASELINE_FILE.exists():
-        harvest_dates = load_harvest_dates(BASELINE_FILE)
-    else:
-        print("[1/5] Loading harvest dates...")
-        print(f"  INFO: {BASELINE_FILE} not found (optional for weekly monitoring)")
+    # [1] Load harvest dates (required to determine season boundaries)
+    harvest_dates = load_harvest_dates(HARVEST_FILE)
+    if harvest_dates is None or len(harvest_dates) == 0:
+        print(f"ERROR: Cannot run without harvest.xlsx - required to determine current season boundaries")
+        print(f"  Please run scripts 22 (baseline prediction) and 23 (format conversion) first")
+        return
    
    # [2] Load CI data
    print(f"\n[2/5] Loading CI data...")
@ -271,6 +284,9 @@ def main():
    
    count = 0
    for field_id in ci_data['field'].unique():
+        # Convert field_id to string for consistency
+        field_id_str = str(field_id).strip()
+        
        # Get metadata
        meta = field_meta[field_meta['field'] == field_id]
        if len(meta) == 0:
@ -279,18 +295,21 @@ def main():
        sub_field = meta['sub_field'].iloc[0]
        latest_date = meta['latest_date'].iloc[0]
        
-        # Use recent CI history (last 300 days from latest available data)
+        # Get last harvest date for this field (start of current season)
+        last_harvest = harvest_dates.get(field_id_str)
+        if last_harvest is None:
+            continue
+        
+        # Extract all CI data AFTER last harvest (complete current season)
        field_data = ci_data[ci_data['field'] == field_id].copy()
        field_data = field_data.sort_values('Date')
+        field_data = field_data[field_data['Date'] > last_harvest]  # After last harvest
        
-        # Keep last 300 days of history for inference
-        if len(field_data) > 300:
-            field_data = field_data.iloc[-300:]
-        
+        # Need at least 30 days of data since planting
        if len(field_data) < 30:
            continue
        
-        # Run inference on recent history to predict next 28 days
+        # Run inference on full current season to predict next 28 days
        imminent_prob, detected_prob = run_inference_on_season(
            field_data, model, config, scalers, device, ci_column
        )
@ -338,10 +357,11 @@ def main():
        print(f"  WARNING: No results exported - check CI data availability")
    
    print(f"\nStorage structure:")
-    print(f"  Input CI:   laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
-    print(f"  Input baseline: laravel_app/storage/app/{project_name}/Data/HarvestData/harvest_production_export.xlsx")
-    print(f"  Output:     laravel_app/storage/app/{project_name}/Data/HarvestData/")
-    print(f"\nReady to load into 09b field analysis report")
+    print(f"  Input harvest: laravel_app/storage/app/{project_name}/Data/harvest.xlsx")
+    print(f"  Input CI:      laravel_app/storage/app/{project_name}/Data/extracted_ci/ci_data_for_python/")
+    print(f"  Output:        laravel_app/storage/app/{project_name}/reports/kpis/field_stats/")
+    print(f"  Filename:      {project_name}_harvest_imminent_week_{week_num:02d}_{year_num}.csv")
+    print(f"\nReady to load into 80_calculate_kpis.R")


 if __name__ == "__main__":
--- a/python_app/batch_rgb_validation_top_fields.py
+++ b/python_app/batch_rgb_validation_top_fields.py
@ -0,0 +1,288 @@
+#!/usr/bin/env python
+"""
+Batch RGB Validation for Top 50 Largest Fields
+
+Generates 5x3 RGB temporal grids for the latest complete harvest season of the 50 largest fields.
+Uses actual season_end dates from harvest.xlsx for visual validation of field conditions at harvest.
+
+Configuration:
+- GeoJSON: pivot.geojson (defines field boundaries and sizes)
+- Harvest data: harvest.xlsx (season_end dates for completed harvests)
+- CI data: ci_data_for_python.csv
+- Output: RGB directory with field_name_YYYYMMDD_harvest_rgb.png
+
+Usage:
+    python batch_rgb_validation_top_fields.py
+
+Output:
+    - Saves 5x3 RGB grids to: laravel_app/storage/app/angata/RGB/
+    - Filenames: field_<NAME>_<YYYYMMDD>_harvest_rgb.png
+    - Each grid shows 15 images at 7-day intervals around the season_end date
+"""
+
+import json
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from datetime import datetime, timedelta
+import sys
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent))
+
+from rgb_visualization import generate_rgb_grids
+
+
+def load_geojson_and_calculate_areas(geojson_path):
+    """
+    Load GeoJSON and calculate area for each field.
+    
+    Returns:
+        pd.DataFrame: Columns [field, field_name, area_m2] sorted by area descending
+    """
+    geojson_path = Path(geojson_path)
+    
+    if not geojson_path.exists():
+        print(f"✗ GeoJSON not found: {geojson_path}")
+        return None
+    
+    print(f"Loading GeoJSON: {geojson_path}")
+    
+    with open(geojson_path) as f:
+        geojson_data = json.load(f)
+    
+    fields = []
+    
+    for feature in geojson_data.get('features', []):
+        props = feature.get('properties', {})
+        field_id = str(props.get('field', ''))
+        field_name = props.get('name', f"field_{field_id}")
+        
+        geometry = feature.get('geometry', {})
+        geom_type = geometry.get('type', '')
+        coordinates = geometry.get('coordinates', [])
+        
+        # Simple area calculation using Shoelace formula
+        area_m2 = 0
+        if geom_type == 'Polygon' and coordinates:
+            coords = coordinates[0]  # Exterior ring
+            area_m2 = calculate_polygon_area(coords)
+        elif geom_type == 'MultiPolygon' and coordinates:
+            for poly_coords in coordinates:
+                area_m2 += calculate_polygon_area(poly_coords[0])
+        
+        if area_m2 > 0:
+            fields.append({
+                'field': field_id,
+                'field_name': field_name,
+                'area_m2': area_m2,
+                'area_hectares': area_m2 / 10000
+            })
+    
+    df = pd.DataFrame(fields)
+    df = df.sort_values('area_m2', ascending=False)
+    
+    print(f"  ✓ Loaded {len(df)} fields")
+    print(f"  Top 10 largest fields (hectares):")
+    for i, row in df.head(10).iterrows():
+        print(f"    {row['field_name']:30s} ({row['field']:>6s}): {row['area_hectares']:>8.2f} ha")
+    
+    return df
+
+
+def calculate_polygon_area(coords):
+    """
+    Calculate area of polygon using Shoelace formula (in m²).
+    Assumes coordinates are in lat/lon (roughly converts to meters).
+    """
+    if len(coords) < 3:
+        return 0
+    
+    # Rough conversion: at equator, 1 degree ≈ 111 km
+    # For lat/lon coordinates, use average latitude
+    lats = [c[1] for c in coords]
+    avg_lat = np.mean(lats)
+    lat_m_per_deg = 111000
+    lon_m_per_deg = 111000 * np.cos(np.radians(avg_lat))
+    
+    # Convert to meters
+    coords_m = []
+    for lon, lat in coords:
+        x = (lon - coords[0][0]) * lon_m_per_deg
+        y = (lat - coords[0][1]) * lat_m_per_deg
+        coords_m.append((x, y))
+    
+    # Shoelace formula
+    area = 0
+    for i in range(len(coords_m)):
+        j = (i + 1) % len(coords_m)
+        area += coords_m[i][0] * coords_m[j][1]
+        area -= coords_m[j][0] * coords_m[i][1]
+    
+    return abs(area) / 2
+
+
+def load_harvest_dates_from_xlsx(harvest_xlsx_path, top_50_fields_df):
+    """
+    Load harvest data from Excel file and get latest completed season for each field.
+    
+    Returns season_end date for each field (latest complete season where season_end is not null).
+    
+    Args:
+        harvest_xlsx_path (Path): Path to harvest.xlsx
+        top_50_fields_df (pd.DataFrame): DataFrame with 'field' column for filtering
+    
+    Returns:
+        dict: {field_id: {'field_name': str, 'harvest_date': pd.Timestamp}}
+    """
+    harvest_xlsx_path = Path(harvest_xlsx_path)
+    
+    if not harvest_xlsx_path.exists():
+        print(f"✗ Harvest Excel file not found: {harvest_xlsx_path}")
+        return {}
+    
+    print(f"Loading harvest data: {harvest_xlsx_path}")
+    
+    try:
+        harvest_df = pd.read_excel(harvest_xlsx_path)
+        
+        # Ensure date columns are datetime
+        if 'season_end' in harvest_df.columns:
+            harvest_df['season_end'] = pd.to_datetime(harvest_df['season_end'], errors='coerce')
+        
+        # Filter to top 50 fields and get only rows with season_end filled in
+        top_50_field_ids = set(top_50_fields_df['field'].astype(str).str.strip())
+        harvest_df['field'] = harvest_df['field'].astype(str).str.strip()
+        harvest_df = harvest_df[harvest_df['field'].isin(top_50_field_ids)]
+        harvest_df = harvest_df[harvest_df['season_end'].notna()]
+        
+        # Group by field and get the LATEST (most recent) season_end
+        latest_harvests = {}
+        
+        for field_id in top_50_field_ids:
+            field_records = harvest_df[harvest_df['field'] == field_id]
+            
+            if len(field_records) > 0:
+                # Get row with latest season_end
+                latest_idx = field_records['season_end'].idxmax()
+                latest_row = field_records.loc[latest_idx]
+                
+                # Get field name from top_50_fields_df
+                field_info = top_50_fields_df[top_50_fields_df['field'] == field_id]
+                if len(field_info) > 0:
+                    field_name = field_info.iloc[0]['field_name']
+                else:
+                    field_name = f"field_{field_id}"
+                
+                latest_harvests[field_id] = {
+                    'field_name': field_name,
+                    'harvest_date': latest_row['season_end']
+                }
+        
+        print(f"  ✓ Loaded latest complete seasons for {len(latest_harvests)} fields")
+        
+        return latest_harvests
+    
+    except Exception as e:
+        print(f"✗ Error loading harvest data: {e}")
+        return {}
+
+
+def main():
+    print("="*90)
+    print("BATCH RGB VALIDATION - TOP 50 LARGEST FIELDS")
+    print("Visual inspection of latest harvest dates from harvest.xlsx using RGB imagery")
+    print("="*90)
+    
+    # Configuration
+    geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
+    harvest_xlsx = Path("laravel_app/storage/app/angata/Data/harvest.xlsx")
+    output_dir = Path("laravel_app/storage/app/angata/RGB")
+    tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
+    
+    # Verify paths
+    if not geojson_path.exists():
+        print(f"✗ GeoJSON not found: {geojson_path}")
+        return
+    if not harvest_xlsx.exists():
+        print(f"✗ Harvest Excel not found: {harvest_xlsx}")
+        return
+    if not tiff_dir.exists():
+        print(f"✗ TIFF directory not found: {tiff_dir}")
+        return
+    
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Step 1: Load GeoJSON and get top 50 largest fields
+    print("\n[1/4] Loading GeoJSON and identifying top 50 largest fields...")
+    fields_df = load_geojson_and_calculate_areas(geojson_path)
+    if fields_df is None:
+        return
+    
+    top_50_fields = fields_df.head(50)
+    print(f"  ✓ Selected {len(top_50_fields)} largest fields for processing")
+    
+    # Step 2: Load harvest dates from Excel
+    print("\n[2/4] Loading harvest dates from Excel (latest complete seasons)...")
+    harvest_dates = load_harvest_dates_from_xlsx(harvest_xlsx, top_50_fields)
+    
+    if len(harvest_dates) == 0:
+        print("✗ No harvest dates found in Excel file")
+        return
+    
+    print(f"  ✓ Found {len(harvest_dates)} fields with completed seasons")
+    for field_id, info in list(harvest_dates.items())[:5]:
+        print(f"    - {info['field_name']:30s}: {info['harvest_date'].strftime('%Y-%m-%d')}")
+    if len(harvest_dates) > 5:
+        print(f"    ... and {len(harvest_dates) - 5} more")
+    
+    # Step 3: Generate RGB grids for each field
+    print("\n[3/4] Generating RGB validation grids...")
+    rgb_count = 0
+    
+    for idx, (field_id, harvest_info) in enumerate(harvest_dates.items(), 1):
+        field_name = harvest_info['field_name']
+        harvest_date = harvest_info['harvest_date']
+        
+        try:
+            # Run RGB visualization (harvest dates only, no registered/predicted distinction)
+            results = generate_rgb_grids(
+                field_data=None,  # Not needed - just for function compatibility
+                field_id=field_id,
+                registered_harvest_dates=[],  # Empty - using harvest.xlsx instead
+                predicted_harvest_dates=[
+                    {
+                        'harvest_date': harvest_date,
+                        'model_name': 'harvest_xlsx'
+                    }
+                ],
+                output_dir=str(output_dir),  # All PNGs in same folder
+                tiff_dir=str(tiff_dir),
+                geojson_path=str(geojson_path)
+            )
+            
+            if results['predicted']:
+                rgb_count += 1
+                print(f"  [{idx:2d}/{len(harvest_dates)}] {field_name}: ✓ {harvest_date.strftime('%Y-%m-%d')}")
+            else:
+                print(f"  [{idx:2d}/{len(harvest_dates)}] {field_name}: ⚠ No RGB grid (no imagery available)")
+        
+        except Exception as e:
+            print(f"  [{idx:2d}/{len(harvest_dates)}] {field_name}: ✗ Error - {e}")
+    
+    # Summary
+    print("\n" + "="*90)
+    print(f"SUMMARY:")
+    print(f"  Fields with harvest dates: {len(harvest_dates)}")
+    print(f"  RGB grids generated: {rgb_count}/{len(harvest_dates)}")
+    print(f"  Output directory: {output_dir}")
+    print("="*90)
+    print("\nVisual inspection checklist:")
+    print("  ✓ Brown/bare soil at T~0d (harvest date) = Field properly harvested")
+    print("  ⚠ Green vegetation at T~0d = Possible data error or replanting")
+    print("  ✓ Green → Brown progression = Normal harvest sequence")
+    print("="*90)
+
+
+if __name__ == "__main__":
+    main()
--- a/python_app/debug_all_tiles_for_date.py
+++ b/python_app/debug_all_tiles_for_date.py
@ -0,0 +1,107 @@
+#!/usr/bin/env python
+"""
+Debug script to find all tiles for a date and check which overlap with field boundary
+"""
+
+import json
+import rasterio
+from rasterio.mask import mask
+from pathlib import Path
+import numpy as np
+import shapely.geometry as shgeom
+import pandas as pd
+
+# Load field 79 boundary
+geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
+field_id = "79"
+
+print(f"Loading field {field_id} from GeoJSON...")
+with open(geojson_path) as f:
+    geojson_data = json.load(f)
+
+field_boundary = None
+for feature in geojson_data.get('features', []):
+    props = feature.get('properties', {})
+    if str(props.get('field', '')) == str(field_id):
+        geometry = feature.get('geometry')
+        if geometry:
+            geom_type = geometry.get('type', '')
+            coordinates = geometry.get('coordinates', [])
+            
+            if geom_type == 'MultiPolygon':
+                if coordinates and len(coordinates) > 0:
+                    coords = coordinates[0][0]
+                    field_boundary = shgeom.Polygon(coords)
+            elif geom_type == 'Polygon':
+                if coordinates and len(coordinates) > 0:
+                    coords = coordinates[0]
+                    field_boundary = shgeom.Polygon(coords)
+        break
+
+if field_boundary is None:
+    print(f"Field {field_id} not found")
+    exit(1)
+
+print(f"Field boundary bounds: {field_boundary.bounds}")
+print(f"Field boundary area: {field_boundary.area}")
+
+# Find a specific date directory
+tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
+target_date = pd.Timestamp("2026-01-15")  # Use a recent date that exists
+
+# Find tiles for that date
+date_dirs = []
+for date_dir in tiff_dir.iterdir():
+    if date_dir.is_dir():
+        try:
+            dir_name = date_dir.name
+            date_str = dir_name.split('_')[0]
+            tile_date = pd.Timestamp(date_str)
+            if tile_date == target_date:
+                date_dirs.append(date_dir)
+        except:
+            pass
+
+if not date_dirs:
+    print(f"No tiles found for {target_date}")
+    exit(1)
+
+print(f"\nFound {len(date_dirs)} date directory(ies) for {target_date}")
+
+for date_dir in date_dirs:
+    print(f"\n=== Checking date directory: {date_dir.name} ===")
+    
+    tiles = list(date_dir.glob("*.tif"))
+    print(f"Found {len(tiles)} tiles in this directory")
+    
+    for tile_path in sorted(tiles):
+        try:
+            with rasterio.open(tile_path) as src:
+                tile_bounds = src.bounds
+                tile_geom = shgeom.box(*tile_bounds)
+                
+                intersects = field_boundary.intersects(tile_geom)
+                intersection = field_boundary.intersection(tile_geom) if intersects else None
+                intersection_area = intersection.area if intersection else 0
+                
+                print(f"\n{tile_path.name}")
+                print(f"  Tile bounds: {tile_bounds}")
+                print(f"  Intersects field: {intersects}")
+                if intersects:
+                    print(f"  Intersection area: {intersection_area:.8f}")
+                    
+                    # Try to mask this tile
+                    geom = shgeom.mapping(field_boundary)
+                    try:
+                        masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
+                        print(f"  ✓ Successfully masked! Shape: {masked_data.shape}")
+                        
+                        # Check the data in each band
+                        for i, band_idx in enumerate([1, 2, 3]):
+                            band_data = masked_data[i]
+                            non_zero = (band_data != 0).sum()
+                            print(f"    Band {band_idx}: {non_zero} non-zero pixels out of {band_data.size}")
+                    except ValueError as e:
+                        print(f"  ✗ Masking failed: {e}")
+        except Exception as e:
+            print(f"  Error reading tile: {e}")
--- a/python_app/debug_field_mask.py
+++ b/python_app/debug_field_mask.py
@ -0,0 +1,102 @@
+#!/usr/bin/env python
+"""
+Debug script to diagnose why field boundary masking produces no data
+"""
+
+import json
+import rasterio
+from rasterio.mask import mask
+from pathlib import Path
+import numpy as np
+import shapely.geometry as shgeom
+
+# Load a sample field boundary
+geojson_path = Path("laravel_app/storage/app/angata/Data/pivot.geojson")
+field_id = "79"  # A field that had issues
+
+print(f"Loading field {field_id} from GeoJSON...")
+with open(geojson_path) as f:
+    geojson_data = json.load(f)
+
+field_boundary = None
+for feature in geojson_data.get('features', []):
+    props = feature.get('properties', {})
+    if str(props.get('field', '')) == str(field_id):
+        geometry = feature.get('geometry')
+        if geometry:
+            geom_type = geometry.get('type', '')
+            coordinates = geometry.get('coordinates', [])
+            
+            if geom_type == 'MultiPolygon':
+                if coordinates and len(coordinates) > 0:
+                    coords = coordinates[0][0]
+                    field_boundary = shgeom.Polygon(coords)
+            elif geom_type == 'Polygon':
+                if coordinates and len(coordinates) > 0:
+                    coords = coordinates[0]
+                    field_boundary = shgeom.Polygon(coords)
+        break
+
+if field_boundary is None:
+    print(f"Field {field_id} not found")
+    exit(1)
+
+print(f"Field boundary bounds: {field_boundary.bounds}")
+print(f"Field boundary area: {field_boundary.area}")
+
+# Load a sample TIFF tile
+tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
+tile_file = None
+for date_dir in sorted(tiff_dir.iterdir()):
+    if date_dir.is_dir():
+        for tif in date_dir.glob("*.tif"):
+            if tif.stat().st_size > 12e6:
+                tile_file = tif
+                break
+        if tile_file:
+            break
+
+if not tile_file:
+    print("No suitable TIFF found")
+    exit(1)
+
+print(f"\nTesting with TIFF: {tile_file.name}")
+
+with rasterio.open(tile_file) as src:
+    print(f"TIFF Bounds: {src.bounds}")
+    print(f"TIFF CRS: {src.crs}")
+    
+    # Check if field boundary is within tile bounds
+    tile_box = shgeom.box(*src.bounds)
+    intersects = field_boundary.intersects(tile_box)
+    print(f"Field boundary intersects tile: {intersects}")
+    
+    if intersects:
+        intersection = field_boundary.intersection(tile_box)
+        print(f"Intersection area: {intersection.area}")
+        print(f"Intersection bounds: {intersection.bounds}")
+    
+    # Try to mask and see what we get
+    print("\nAttempting to mask...")
+    geom = shgeom.mapping(field_boundary)
+    try:
+        masked_data, _ = mask(src, [geom], crop=True, indexes=[1, 2, 3])
+        print(f"Masked data shape: {masked_data.shape}")
+        print(f"Masked data dtype: {masked_data.dtype}")
+        
+        # Check the data
+        for i, band_idx in enumerate([1, 2, 3]):
+            band_data = masked_data[i]
+            print(f"\nBand {band_idx}:")
+            print(f"  min: {np.nanmin(band_data):.6f}")
+            print(f"  max: {np.nanmax(band_data):.6f}")
+            print(f"  mean: {np.nanmean(band_data):.6f}")
+            print(f"  % valid (non-zero): {(band_data != 0).sum() / band_data.size * 100:.2f}%")
+            print(f"  % NaN: {np.isnan(band_data).sum() / band_data.size * 100:.2f}%")
+            
+            # Show sample values
+            valid_pixels = band_data[band_data != 0]
+            if len(valid_pixels) > 0:
+                print(f"  Sample valid values: {valid_pixels[:10]}")
+    except ValueError as e:
+        print(f"Error during masking: {e}")
--- a/python_app/debug_tiff_inspect.py
+++ b/python_app/debug_tiff_inspect.py
@ -0,0 +1,47 @@
+#!/usr/bin/env python
+"""
+Debug script to inspect TIFF file structure and data
+"""
+
+import rasterio
+from pathlib import Path
+import numpy as np
+
+# Pick a tile file to inspect
+tiff_dir = Path("laravel_app/storage/app/angata/merged_final_tif/5x5")
+
+# Find first available tile
+tile_file = None
+for date_dir in sorted(tiff_dir.iterdir()):
+    if date_dir.is_dir():
+        for tif in date_dir.glob("*.tif"):
+            if tif.stat().st_size > 12e6:  # Skip empty files
+                tile_file = tif
+                break
+        if tile_file:
+            break
+
+if not tile_file:
+    print("No suitable TIFF files found")
+    exit(1)
+
+print(f"Inspecting: {tile_file.name}")
+print("=" * 80)
+
+with rasterio.open(tile_file) as src:
+    print(f"Band count: {src.count}")
+    print(f"Data type: {src.dtypes[0]}")
+    print(f"Shape: {src.height} x {src.width}")
+    print(f"CRS: {src.crs}")
+    print(f"Bounds: {src.bounds}")
+    print()
+    
+    # Read each band
+    for band_idx in range(1, min(6, src.count + 1)):
+        data = src.read(band_idx)
+        print(f"Band {band_idx}:")
+        print(f"  dtype: {data.dtype}")
+        print(f"  range: {data.min():.6f} - {data.max():.6f}")
+        print(f"  mean: {data.mean():.6f}")
+        print(f"  % valid (non-zero): {(data != 0).sum() / data.size * 100:.1f}%")
+        print()
--- a/python_app/harvest_detection_experiments/experiment_framework/04_production_export/batch_plot_fields_rgb.py
+++ b/python_app/harvest_detection_experiments/experiment_framework/04_production_export/batch_plot_fields_rgb.py
@ -0,0 +1,299 @@
+"""
+Batch Field Visualization Tool - RGB Imagery Around Harvest Date
+Purpose: Generate visual validation using RGB satellite imagery samples around 
+predicted harvest date to verify predictions (bare soil = harvested, green = not harvested)
+
+Shows 12-15 RGB images in a grid, centered around the predicted harvest date
+
+Usage:
+  python batch_plot_fields_rgb.py field1,field2,field3
+  python batch_plot_fields_rgb.py 10125,88,97
+  
+  Or read from CSV:
+  python batch_plot_fields_rgb.py --file fields_to_check.csv
+"""
+
+import pandas as pd
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+from pathlib import Path
+from datetime import datetime, timedelta
+import sys
+import rasterio
+from rasterio.mask import mask
+import geopandas as gpd
+from harvest_date_pred_utils import load_model_and_config, extract_features
+
+
+def get_field_centroid(field_id, geojson_path="pivot.geojson"):
+    """Get centroid of field from GeoJSON for cropping RGB images."""
+    try:
+        gdf = gpd.read_file(geojson_path)
+        field_geom = gdf[gdf['field'] == str(field_id)]
+        if len(field_geom) > 0:
+            centroid = field_geom.geometry.iloc[0].centroid
+            return (centroid.x, centroid.y)
+    except Exception as e:
+        print(f"    Warning: Could not get field centroid - {e}")
+    return None
+
+
+def load_rgb_image(tif_path, field_id=None, geojson_path="pivot.geojson"):
+    """
+    Load RGB bands from 8-band GeoTIFF
+    Bands: 0=coastal, 1=blue, 2=green, 3=green_i, 4=yellow, 5=red, 6=rededge, 7=nir
+    RGB = bands 5,3,1 (Red, Green, Blue)
+    """
+    try:
+        with rasterio.open(tif_path) as src:
+            # Read RGB bands (bands are 1-indexed in rasterio)
+            red = src.read(6)  # Band 6 = red (0-indexed band 5)
+            green = src.read(3)  # Band 3 = green (0-indexed band 2)
+            blue = src.read(2)  # Band 2 = blue (0-indexed band 1)
+            
+            # Stack into RGB image
+            rgb = np.stack([red, green, blue], axis=2)
+            
+            # Normalize to 0-1 range (8-band data is typically 0-10000)
+            rgb = np.clip(rgb / 5000.0, 0, 1)
+            
+            return rgb
+    except Exception as e:
+        print(f"    Error loading RGB from {tif_path}: {e}")
+        return None
+
+
+def plot_field_rgb_validation(field_id, ci_data, model, config, scalers, device, 
+                              tif_folder="../../../laravel_app/storage/app/angata/merged_tif_8b",
+                              output_dir="validation_plots_rgb"):
+    """
+    Create validation plot for a single field:
+    - Top: Harvest probability over time with peak marked
+    - Bottom: 12-15 RGB images in grid around predicted harvest date
+    """
+    # Create output directory
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    
+    # Filter field data
+    field_data = ci_data[ci_data['field'] == field_id].copy()
+    if len(field_data) == 0:
+        print(f"  ✗ Field {field_id}: No CI data found")
+        return False
+    
+    field_data = field_data.sort_values('Date')
+    print(f"  ✓ Field {field_id}: {len(field_data)} days of data")
+    
+    try:
+        # Extract features and run inference
+        ci_column = config['data']['ci_column']
+        feature_names = config['features']
+        
+        feat_array = extract_features(field_data, feature_names, ci_column=ci_column)
+        if feat_array is None:
+            print(f"  ✗ Field {field_id}: Feature extraction failed")
+            return False
+        
+        # Apply scalers
+        if isinstance(scalers, dict) and 'features' in scalers:
+            feat_array = scalers['features'].transform(feat_array)
+        
+        # Run inference
+        with torch.no_grad():
+            x_tensor = torch.tensor(feat_array, dtype=torch.float32).unsqueeze(0).to(device)
+            out_imm, out_det = model(x_tensor)
+            imm_probs = out_imm.squeeze(0).cpu().numpy()
+        
+        # Find peak probability (predicted harvest date)
+        peak_idx = np.argmax(imm_probs)
+        peak_date = field_data['Date'].iloc[peak_idx]
+        peak_prob = imm_probs[peak_idx]
+        
+        print(f"    Peak probability: {peak_prob:.3f} on {peak_date.strftime('%Y-%m-%d')}")
+        
+        # Get date range: ±6 days around peak (12-13 images total)
+        date_range = field_data['Date'].dt.date
+        peak_date_only = peak_date.date() if hasattr(peak_date, 'date') else peak_date
+        
+        days_before = 6
+        days_after = 6
+        start_date = peak_date_only - timedelta(days=days_before)
+        end_date = peak_date_only + timedelta(days=days_after)
+        
+        # Find available TIF files in date range
+        tif_folder_path = Path(tif_folder)
+        available_dates = []
+        for tif_file in sorted(tif_folder_path.glob("*.tif")):
+            date_str = tif_file.stem  # YYYY-MM-DD
+            try:
+                tif_date = datetime.strptime(date_str, "%Y-%m-%d").date()
+                if start_date <= tif_date <= end_date:
+                    available_dates.append((tif_date, tif_file))
+            except ValueError:
+                pass
+        
+        if len(available_dates) == 0:
+            print(f"    Warning: No TIF files found in {start_date} to {end_date}")
+            return False
+        
+        print(f"    Found {len(available_dates)} RGB images in date range")
+        
+        # Load RGB images
+        rgb_images = []
+        rgb_dates = []
+        for tif_date, tif_file in available_dates:
+            rgb = load_rgb_image(str(tif_file), field_id)
+            if rgb is not None:
+                rgb_images.append(rgb)
+                rgb_dates.append(tif_date)
+        
+        if len(rgb_images) == 0:
+            print(f"    ✗ No RGB images loaded")
+            return False
+        
+        print(f"    Loaded {len(rgb_images)} RGB images")
+        
+        # Create figure with probability plot + RGB grid
+        n_images = len(rgb_images)
+        n_cols = min(5, n_images)  # Max 5 columns
+        n_rows = (n_images + n_cols - 1) // n_cols  # Calculate rows needed
+        
+        fig = plt.figure(figsize=(18, 12))
+        
+        # Probability plot (top, spanning all columns)
+        ax_prob = plt.subplot(n_rows + 1, n_cols, (1, n_cols))
+        dates_arr = field_data['Date'].values
+        ax_prob.plot(dates_arr, imm_probs, '-', color='orange', linewidth=2.5, label='Imminent Probability', alpha=0.8)
+        ax_prob.axhline(y=0.5, color='red', linestyle='--', linewidth=1.5, alpha=0.5, label='Threshold (0.5)')
+        ax_prob.axvline(x=peak_date, color='darkred', linestyle=':', linewidth=2, alpha=0.7, label='Peak')
+        ax_prob.fill_between(dates_arr, 0.5, 1.0, alpha=0.08, color='red')
+        ax_prob.set_ylim(-0.05, 1.05)
+        ax_prob.set_ylabel('Probability', fontsize=11, fontweight='bold')
+        ax_prob.set_xlabel('Date', fontsize=11, fontweight='bold')
+        ax_prob.set_title(f'Field {field_id} - Model 307 Harvest Probability', fontsize=12, fontweight='bold')
+        ax_prob.grid(True, alpha=0.3)
+        ax_prob.legend(loc='upper right', fontsize=9)
+        ax_prob.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
+        ax_prob.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
+        plt.setp(ax_prob.xaxis.get_majorticklabels(), rotation=45, ha='right')
+        
+        # Annotate peak
+        ax_prob.annotate(f'{peak_prob:.2f}\n{peak_date_only}',
+                        xy=(peak_date, peak_prob),
+                        xytext=(20, 20), textcoords='offset points',
+                        bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.8),
+                        arrowprops=dict(arrowstyle='->', lw=1.5, color='darkred'))
+        
+        # RGB images in grid (below probability plot)
+        for i, (rgb, date) in enumerate(zip(rgb_images, rgb_dates)):
+            ax = plt.subplot(n_rows + 1, n_cols, n_cols + i + 1)
+            ax.imshow(rgb, extent=[0, 100, 0, 100])
+            
+            # Highlight peak date
+            date_label = date.strftime('%m-%d')
+            is_peak = date == peak_date_only
+            color = 'darkred' if is_peak else 'black'
+            weight = 'bold' if is_peak else 'normal'
+            size = 11 if is_peak else 9
+            
+            ax.set_title(date_label, fontsize=size, fontweight=weight, color=color)
+            ax.set_xticks([])
+            ax.set_yticks([])
+        
+        plt.suptitle(f'Field {field_id} RGB Imagery: {len(rgb_images)} Days Around Peak Harvest Probability\nPeak: {peak_prob:.2f} on {peak_date_only} | Green = Growing | Brown/Bare = Harvested',
+                    fontsize=13, fontweight='bold', y=0.995)
+        plt.tight_layout()
+        
+        # Save
+        output_file = Path(output_dir) / f"field_{field_id}_rgb_validation.png"
+        plt.savefig(output_file, dpi=100, bbox_inches='tight')
+        print(f"  ✓ Saved: {output_file}")
+        plt.close()
+        
+        return True
+        
+    except Exception as e:
+        print(f"  ✗ Field {field_id}: Error - {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def main():
+    print("="*80)
+    print("BATCH RGB VISUALIZATION TOOL")
+    print("Visual check: RGB imagery around predicted harvest date")
+    print("="*80)
+    
+    # Parse arguments
+    fields_to_plot = []
+    
+    if len(sys.argv) < 2:
+        print("\nUsage:")
+        print("  python batch_plot_fields_rgb.py field1,field2,field3")
+        print("  python batch_plot_fields_rgb.py --file fields.csv")
+        print("\nExample:")
+        print("  python batch_plot_fields_rgb.py 10125,88,97,440")
+        return
+    
+    if sys.argv[1] == "--file":
+        if len(sys.argv) < 3:
+            print("ERROR: --file requires a CSV filename")
+            return
+        csv_file = sys.argv[2]
+        print(f"\n[1/4] Loading fields from CSV: {csv_file}")
+        try:
+            df = pd.read_csv(csv_file)
+            fields_to_plot = df['field'].astype(str).str.strip().tolist()
+            print(f"  ✓ Loaded {len(fields_to_plot)} fields")
+        except Exception as e:
+            print(f"  ✗ Error reading CSV: {e}")
+            return
+    else:
+        # Parse comma-separated list
+        fields_to_plot = [f.strip() for f in sys.argv[1].split(',')]
+        print(f"\n[1/4] Processing {len(fields_to_plot)} field(s): {', '.join(fields_to_plot)}")
+    
+    # Load CI data
+    print("\n[2/4] Loading CI data...")
+    try:
+        ci_data = pd.read_csv("ci_data_for_python.csv")
+        ci_data['Date'] = pd.to_datetime(ci_data['Date'])
+        ci_data['field'] = ci_data['field'].astype(str).str.strip()
+        print(f"  ✓ Loaded {len(ci_data)} observations for {ci_data['field'].nunique()} fields")
+    except Exception as e:
+        print(f"  ✗ Error loading CI data: {e}")
+        return
+    
+    # Load model
+    print("\n[3/4] Loading model...")
+    try:
+        model, config, scalers = load_model_and_config(Path("."))
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.eval()
+        print(f"  ✓ Model loaded on {device}")
+    except Exception as e:
+        print(f"  ✗ Error loading model: {e}")
+        return
+    
+    # Process each field
+    print("\n[4/4] Generating RGB validation plots...")
+    success_count = 0
+    for field_id in fields_to_plot:
+        if plot_field_rgb_validation(field_id, ci_data, model, config, scalers, device):
+            success_count += 1
+    
+    # Summary
+    print("\n" + "="*80)
+    print(f"SUMMARY: {success_count}/{len(fields_to_plot)} fields processed successfully")
+    print(f"Output directory: validation_plots_rgb/")
+    print("="*80)
+    print("\nInspect the PNG files to verify predictions:")
+    print("  ✓ Green imagery BEFORE peak date (field growing)")
+    print("  ✓ Brown/Bare imagery AT/AFTER peak date (harvested)")
+    print("  ✓ Peak date marked with red title")
+
+
+if __name__ == "__main__":
+    main()
--- a/python_app/harvest_detection_experiments/experiment_framework/experiments/rgb_visualization.py
+++ b/python_app/harvest_detection_experiments/experiment_framework/experiments/rgb_visualization.py
@ -22,6 +22,8 @@ import numpy as np
 import pandas as pd
 from pathlib import Path
 from datetime import datetime, timedelta
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend to avoid display hangs
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 from matplotlib.colors import Normalize
@ -86,55 +88,95 @@ def load_field_boundaries(geojson_path, field_id):
        return None, None


-def find_closest_tiff(target_date, tiff_dir, days_window=60, field_boundary=None):
+def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60):
    """
-    Find available TIFF file closest to target date.
-    Skips obviously empty files (< 12 MB) without reading data.
+    Find all tile files for target_date (or closest date) that overlap with field_boundary.
    
-    TIFF files are named: YYYY-MM-DD.tif
+    Tile files are organized in subdirectories by date: 5x5/YYYY-MM-DD_HH/*.tif
    
    Args:
-        target_date (pd.Timestamp): Target date to find TIFF near
-        tiff_dir (Path): Directory containing TIFF files
+        target_date (pd.Timestamp): Target date to find tiles near
+        tiff_dir (Path): Directory containing 5x5 date subdirectories
+        field_boundary (shapely.Polygon): Field boundary for overlap detection
        days_window (int): Max days to search before/after target
-        field_boundary (shapely.Polygon): Unused, kept for compatibility
    
    Returns:
-        Path: Path to closest TIFF file or None if not found
-        int: Days difference from target (negative=before, positive=after)
+        tuple: (list of tile paths, actual_date, days_diff)
+               list: tile paths that overlap field
+               pd.Timestamp: actual date of tiles found
+               int: days difference from target to actual date found
    """
    target_date = pd.Timestamp(target_date)
    tiff_dir = Path(tiff_dir)
    
-    available_tiffs = list(tiff_dir.glob('*.tif'))
-    if not available_tiffs:
-        return None, None
+    if not tiff_dir.exists():
+        return [], None, None
    
-    # Parse dates from filenames, skip obviously empty files by size
-    tiff_dates = []
-    min_size_mb = 12.0  # Empty files are ~11.56 MB, valid files are ~12.6-13.0 MB
+    # Find all date subdirectories
+    available_dates = {}  # {date: ([tile file paths], actual_dir_name)}
+    min_size_mb = 12.0  # Empty files are ~11.56 MB
    
-    for tiff_path in available_tiffs:
+    for date_dir in tiff_dir.iterdir():
+        if not date_dir.is_dir():
+            continue
+        
        try:
-            # Quick size check to skip obviously empty files
-            file_size_mb = tiff_path.stat().st_size / (1024 * 1024)
-            if file_size_mb < min_size_mb:
+            # Parse date from directory name (YYYY-MM-DD or YYYY-MM-DD_HH)
+            dir_name = date_dir.name
+            # Extract just the date part before underscore if it exists
+            date_str = dir_name.split('_')[0]
+            tile_date = pd.Timestamp(date_str)
+            days_diff = (tile_date - target_date).days
+            
+            if abs(days_diff) > days_window:
                continue
            
-            date_str = tiff_path.stem  # Remove .tif extension
-            tiff_date = pd.Timestamp(date_str)
-            days_diff = (tiff_date - target_date).days
-            if abs(days_diff) <= days_window:
-                tiff_dates.append((tiff_path, tiff_date, days_diff))
+            # Find all .tif files in this date directory
+            tile_files = []
+            for tile_file in date_dir.glob('*.tif'):
+                # Skip obviously empty files
+                file_size_mb = tile_file.stat().st_size / (1024 * 1024)
+                if file_size_mb >= min_size_mb:
+                    tile_files.append(tile_file)
+            
+            if tile_files:
+                available_dates[tile_date] = (tile_files, dir_name)
        except:
            pass
    
-    if not tiff_dates:
-        return None, None
+    if not available_dates:
+        return [], None, None
    
-    # Return closest by distance
-    closest = min(tiff_dates, key=lambda x: abs(x[2]))
-    return closest[0], closest[2]
+    # Find closest date
+    closest_date = min(available_dates.keys(), key=lambda d: abs((d - target_date).days))
+    days_diff = (closest_date - target_date).days
+    tiles, _ = available_dates[closest_date]
+    
+    # Filter tiles to only those that overlap field boundary
+    if rasterio is None or field_boundary is None:
+        # If rasterio not available, use all tiles (conservative approach)
+        return tiles, closest_date, days_diff
+    
+    overlapping_tiles = []
+    
+    for tile_path in tiles:
+        try:
+            with rasterio.open(tile_path) as src:
+                # Get tile bounds
+                tile_bounds = src.bounds  # (left, bottom, right, top)
+                tile_geom = shgeom.box(*tile_bounds)
+                
+                # Check if tile overlaps field
+                if tile_geom.intersects(field_boundary):
+                    overlapping_tiles.append(tile_path)
+        except:
+            pass
+    
+    if not overlapping_tiles:
+        # No overlapping tiles found, return all tiles for the closest date
+        return tiles, closest_date, days_diff
+    
+    return overlapping_tiles, closest_date, days_diff


 def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
@ -148,12 +190,6 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
    - Band 4: NIR
    - Band 5: CI
    
-    For older merged_tif_8b files (raw Planet Scope 8-band):
-    - Bands 1=Coastal Blue, 2=Blue, 3=Green, 4=Red, 5=Red Edge, 6=NIR, 7=SWIR1, 8=SWIR2
-    - RGB would be bands 4,3,2 = Red, Green, Blue
-    
-    This function auto-detects the format based on band count and descriptions.
-    
    Args:
        tiff_path (Path): Path to TIFF file
        field_boundary (shapely.Polygon): Field boundary for clipping
@ -168,58 +204,106 @@ def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
    
    try:
        with rasterio.open(tiff_path) as src:
-            # Check CRS compatibility
+            # Check band count
            if src.count < 3:
-                print(f"    ⚠ TIFF has only {src.count} bands (need at least 3 for RGB)")
                return None
            
-            # Auto-detect format based on band count and descriptions
-            if src.count == 5 and hasattr(src, 'descriptions') and src.descriptions:
-                # merged_final_tif format: Red, Green, Blue, NIR, CI
-                bands_to_read = (1, 2, 3)
-            elif src.count == 9:
-                # merged_tif_8b format: use bands 4, 3, 2 for Red, Green, Blue
-                bands_to_read = (4, 3, 2)
-            else:
-                # Default: try to use first 3 bands or specified bands
-                bands_to_read = rgb_bands
-                print(f"    ℹ Unknown TIFF format ({src.count} bands), using bands {bands_to_read}")
+            # For merged_final_tif: bands 1,2,3 are R,G,B
+            bands_to_read = (1, 2, 3)
            
-            # Mask and read bands (crop=True reads only the clipped window, not full resolution)
+            # Mask and read bands
            geom = shgeom.mapping(field_boundary)
            try:
-                # Read RGB bands at once, then mask (more efficient than masking 3 times)
                masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read))
                
-                # masked_data shape is (3, height, width) - one layer per band
-                rgb_data = []
-                for i, band_idx in enumerate(bands_to_read):
-                    band_data = masked_data[i]  # Extract the i-th band from masked stack
-                    
-                    # Better debug output that handles NaN values
-                    valid_data = band_data[~np.isnan(band_data)]
-                    if len(valid_data) > 0:
-                        print(f"    DEBUG: Band {band_idx} valid data range: {valid_data.min():.4f} - {valid_data.max():.4f} ({len(valid_data)} valid pixels)")
-                    else:
-                        print(f"    DEBUG: Band {band_idx} - all NaN!")
-                    rgb_data.append(band_data)
-                
                # Stack RGB
-                rgb = np.stack(rgb_data, axis=-1)
+                rgb = np.stack([masked_data[i] for i in range(3)], axis=-1)
                
-                # Data is already normalized 0-1 from the merged_final_tif files
-                # Just ensure it's float32 and clipped
+                # Convert to float32 if not already
                rgb = rgb.astype(np.float32)
+                
+                # Normalize to 0-1 range
+                # Data appears to be 8-bit (0-255 range) stored as float32
+                # Check actual max value to determine normalization
+                max_val = np.nanmax(rgb)
+                if max_val > 0:
+                    # If max is around 255 or less, assume 8-bit
+                    if max_val <= 255:
+                        rgb = rgb / 255.0
+                    # If max is around 65535, assume 16-bit
+                    elif max_val <= 65535:
+                        rgb = rgb / 65535.0
+                    # Otherwise divide by max to normalize
+                    else:
+                        rgb = rgb / max_val
+                
                rgb = np.clip(rgb, 0, 1)
                
-                print(f"    DEBUG: Final RGB range: {rgb.min():.4f} - {rgb.max():.4f}")
+                # Check if result is all NaN
+                if np.all(np.isnan(rgb)):
+                    return None
+                
+                # Replace any remaining NaN with 0 (cloud/invalid pixels)
+                rgb = np.nan_to_num(rgb, nan=0.0)
                
                return rgb
-            except ValueError as e:
-                print(f"    ⚠ Error clipping to field boundary: {e}")
+            except ValueError:
                return None
    except Exception as e:
-        print(f"    ✗ Error loading TIFF {tiff_path.name}: {e}")
+        return None
+
+
+def load_and_composite_tiles_rgb(tile_paths, field_boundary):
+    """
+    Load RGB from multiple overlapping tiles and composite them into a single image.
+    
+    Args:
+        tile_paths (list[Path]): List of tile file paths
+        field_boundary (shapely.Polygon): Field boundary for clipping
+    
+    Returns:
+        np.ndarray: Composited RGB data (height, width, 3) with values 0-1
+        or None if error occurs
+    """
+    if rasterio is None or field_boundary is None or not tile_paths:
+        return None
+    
+    try:
+        # Load and composite all tiles
+        rgb_arrays = []
+        
+        for tile_path in tile_paths:
+            rgb = load_and_clip_tiff_rgb(tile_path, field_boundary)
+            if rgb is not None:
+                rgb_arrays.append(rgb)
+        
+        if not rgb_arrays:
+            return None
+        
+        # If single tile, return it
+        if len(rgb_arrays) == 1:
+            composited = rgb_arrays[0]
+        else:
+            # If multiple tiles, use max composite
+            stacked = np.stack(rgb_arrays, axis=0)
+            composited = np.max(stacked, axis=0)
+        
+        composited = composited.astype(np.float32)
+        
+        # Stretch contrast: normalize to 0-1 range based on actual min/max in the data
+        # This makes dim images visible
+        valid_data = composited[composited > 0]
+        if len(valid_data) > 0:
+            data_min = np.percentile(valid_data, 2)  # 2nd percentile to handle outliers
+            data_max = np.percentile(valid_data, 98)  # 98th percentile
+            
+            if data_max > data_min:
+                composited = (composited - data_min) / (data_max - data_min)
+                composited = np.clip(composited, 0, 1)
+        
+        return composited.astype(np.float32)
+    
+    except Exception as e:
        return None


@ -276,27 +360,24 @@ def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field
    actual_dates = []  # Store actual dates of TIFFs found
    
    for target in target_dates:
-        tiff_path, days_diff = find_closest_tiff(target, tiff_dir, days_window=60, field_boundary=field_boundary)
+        tile_paths, actual_date, days_diff = find_overlapping_tiles(target, tiff_dir, field_boundary, days_window=60)
        
-        if tiff_path is None:
+        if not tile_paths or actual_date is None:
            rgb_images.append(None)
            days_offsets.append(None)
            actual_dates.append(None)
-            print(f"    ⚠ No TIFF found within 60 days of {target.strftime('%Y-%m-%d')} with sufficient data")
+            print(f"    ⚠ No tiles found within 60 days of {target.strftime('%Y-%m-%d')} with sufficient data")
            continue
        
-        # Extract date from filename: YYYY-MM-DD.tif
-        tiff_date = pd.to_datetime(tiff_path.stem)
-        
-        rgb = load_and_clip_tiff_rgb(tiff_path, field_boundary)
+        rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
        rgb_images.append(rgb)
        days_offsets.append(days_diff)
-        actual_dates.append(tiff_date)
+        actual_dates.append(actual_date)
        
        if rgb is not None:
-            print(f"    ✓ Loaded {tiff_path.name} ({days_diff:+d}d from target)")
+            print(f"    ✓ Loaded {len(tile_paths)} tile(s) for {actual_date.strftime('%Y-%m-%d')} ({days_diff:+d}d from target)")
        else:
-            print(f"    ⚠ Loaded {tiff_path.name} but RGB data is None")
+            print(f"    ⚠ Loaded {len(tile_paths)} tile(s) but RGB data is None")
    
    # Create 5x3 grid plot (15 images)
    fig, axes = plt.subplots(3, 5, figsize=(25, 15))
@ -360,11 +441,16 @@ def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field
            filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest_rgb.png'
    
    output_path = Path(output_dir) / filename
-    plt.savefig(output_path, dpi=100, bbox_inches='tight')
-    plt.close()
-    
-    print(f"    ✓ Saved: {filename}")
-    return output_path
+    try:
+        plt.savefig(output_path, dpi=100, format='png')
+        plt.close()
+        
+        print(f"    ✓ Saved: {filename}")
+        return output_path
+    except Exception as e:
+        plt.close()
+        print(f"    ✗ Error saving PNG: {e}")
+        return None


 def generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates,
--- a/r_app/10_create_master_grid_and_split_tiffs.R
+++ b/r_app/10_create_master_grid_and_split_tiffs.R
@ -11,11 +11,28 @@ library(terra)
 library(sf)

 # ============================================================================
-# CONFIGURATION
+# CONFIGURATION & COMMAND-LINE ARGUMENTS
 # ============================================================================

+# Parse command-line arguments for date filtering
+args <- commandArgs(trailingOnly = TRUE)
+
+# Example: Rscript 10_create_master_grid_and_split_tiffs.R 2026-01-13 2026-01-17
+start_date <- NULL
+end_date <- NULL
+
+if (length(args) >= 1) {
+  start_date <- as.Date(args[1])
+  cat("Filtering: start date =", as.character(start_date), "\n")
+}
+
+if (length(args) >= 2) {
+  end_date <- as.Date(args[2])
+  cat("Filtering: end date =", as.character(end_date), "\n")
+}
+
 PROJECT <- "angata"
-TIFF_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "merged_tif_8b")
+TIFF_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "merged_tif_8b")

 # GRID SIZE CONFIGURATION - Change this to use different grid sizes
 # Options: 5x5 (25 tiles), 10x10 (100 tiles), etc.
@ -25,10 +42,10 @@ GRID_NCOLS <- 5

 # Construct grid-specific subfolder path
 GRID_SIZE_LABEL <- paste0(GRID_NCOLS, "x", GRID_NROWS)
-OUTPUT_FOLDER <- file.path("laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)
+OUTPUT_FOLDER <- file.path("..", "laravel_app", "storage", "app", PROJECT, "daily_tiles_split", GRID_SIZE_LABEL)

 # Load field boundaries for overlap checking
-GEOJSON_PATH <- file.path("laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")
+GEOJSON_PATH <- file.path("..", "laravel_app", "storage", "app", PROJECT, "Data", "pivot.geojson")

 cat("Combined: Create Master Grid (", GRID_SIZE_LABEL, ") and Split TIFFs into Tiles\n", sep = "")
 cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
@ -40,31 +57,50 @@ cat("Grid subfolder: daily_tiles_split/", GRID_SIZE_LABEL, "/\n", sep = "")
 cat("\n[PART 1] Creating Master Grid\n")

 # Load field boundaries for overlap checking
-cat("\n[1] Loading field boundaries from GeoJSON...\n")
+cat("\n[1] Checking for existing master grid...\n")

-if (!file.exists(GEOJSON_PATH)) {
-  stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
-       "Please ensure ", PROJECT, " has a pivot.geojson file.")
-}
+# Check if master grid already exists
+MASTER_GRID_PATH <- file.path(OUTPUT_FOLDER, paste0("master_grid_", GRID_SIZE_LABEL, ".geojson"))

-field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
-field_boundaries_vect <- terra::vect(GEOJSON_PATH)
-
-cat("  ✓ Loaded ", nrow(field_boundaries_sf), " field(s)\n", sep = "")
-
-# Try to find a name column (could be 'name', 'field', 'field_name', etc.)
-field_names <- NA
-if ("name" %in% names(field_boundaries_sf)) {
-  field_names <- field_boundaries_sf$name
-} else if ("field" %in% names(field_boundaries_sf)) {
-  field_names <- field_boundaries_sf$field
-} else if ("field_name" %in% names(field_boundaries_sf)) {
-  field_names <- field_boundaries_sf$field_name
+if (file.exists(MASTER_GRID_PATH)) {
+  cat("  ✓ Found existing master grid at:\n    ", MASTER_GRID_PATH, "\n", sep = "")
+  master_grid_sf <- st_read(MASTER_GRID_PATH, quiet = TRUE)
+  field_boundaries_sf <- NULL  # No need to load pivot.geojson
+  field_boundaries_vect <- NULL
+  
+  cat("  ✓ Loaded grid with ", nrow(master_grid_sf), " tiles\n", sep = "")
+  
 } else {
-  field_names <- 1:nrow(field_boundaries_sf)  # Fall back to indices
+  # No existing grid - need to create one from pivot.geojson
+  cat("  No existing grid found. Creating new one from pivot.geojson...\n")
+  
+  if (!file.exists(GEOJSON_PATH)) {
+    stop("GeoJSON file not found at: ", GEOJSON_PATH, "\n",
+         "Please ensure ", PROJECT, " has a pivot.geojson file, or run this script ",
+         "from the same directory as a previous successful run (grid already exists).")
+  }
+  
+  field_boundaries_sf <- st_read(GEOJSON_PATH, quiet = TRUE)
+  field_boundaries_vect <- terra::vect(GEOJSON_PATH)
+  
+  cat("  ✓ Loaded ", nrow(field_boundaries_sf), " field(s) from GeoJSON\n", sep = "")
 }

-cat("  Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
+# Try to find a name column (only if field_boundaries_sf exists)
+if (!is.null(field_boundaries_sf)) {
+  field_names <- NA
+  if ("name" %in% names(field_boundaries_sf)) {
+    field_names <- field_boundaries_sf$name
+  } else if ("field" %in% names(field_boundaries_sf)) {
+    field_names <- field_boundaries_sf$field
+  } else if ("field_name" %in% names(field_boundaries_sf)) {
+    field_names <- field_boundaries_sf$field_name
+  } else {
+    field_names <- 1:nrow(field_boundaries_sf)  # Fall back to indices
+  }
+  
+  cat("  Fields: ", paste(field_names, collapse = ", "), "\n", sep = "")
+}

 # Helper function: Check if a tile overlaps with any field (simple bbox overlap)
 tile_overlaps_fields <- function(tile_extent, field_geoms) {
@ -105,6 +141,27 @@ cat("\n[2] Checking TIFF extents...\n")
 tiff_files <- list.files(TIFF_FOLDER, pattern = "\\.tif$", full.names = FALSE)
 tiff_files <- sort(tiff_files)

+# Filter by date range if specified
+if (!is.null(start_date) || !is.null(end_date)) {
+  cat("\nApplying date filter...\n")
+  
+  file_dates <- as.Date(sub("\\.tif$", "", tiff_files))
+  
+  if (!is.null(start_date) && !is.null(end_date)) {
+    keep_idx <- file_dates >= start_date & file_dates <= end_date
+    cat("  Date range: ", as.character(start_date), " to ", as.character(end_date), "\n", sep = "")
+  } else if (!is.null(start_date)) {
+    keep_idx <- file_dates >= start_date
+    cat("  From: ", as.character(start_date), "\n", sep = "")
+  } else {
+    keep_idx <- file_dates <= end_date
+    cat("  Until: ", as.character(end_date), "\n", sep = "")
+  }
+  
+  tiff_files <- tiff_files[keep_idx]
+  cat("  ✓ Filtered to ", length(tiff_files), " file(s)\n", sep = "")
+}
+
 if (length(tiff_files) == 0) {
  stop("No TIFF files found in ", TIFF_FOLDER)
 }
@ -242,26 +299,33 @@ if (file.exists(master_grid_file)) {

 cat("\n[PART 2] Creating Filtered Grid (only overlapping tiles)\n")

-cat("\n[7] Filtering master grid to only overlapping tiles...\n")
-
-# Check which tiles overlap with any field
-overlapping_tile_indices <- c()
-for (tile_idx in 1:nrow(master_grid_sf)) {
-  tile_geom <- master_grid_sf[tile_idx, ]
+# If grid was loaded from file, it's already filtered. Skip filtering.
+if (!file.exists(MASTER_GRID_PATH)) {
+  cat("\n[7] Filtering master grid to only overlapping tiles...\n")
  
-  # Check overlap with any field
-  if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
-    overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
+  # Check which tiles overlap with any field
+  overlapping_tile_indices <- c()
+  for (tile_idx in 1:nrow(master_grid_sf)) {
+    tile_geom <- master_grid_sf[tile_idx, ]
+    
+    # Check overlap with any field
+    if (tile_overlaps_fields(st_bbox(tile_geom$geometry), field_boundaries_sf$geometry)) {
+      overlapping_tile_indices <- c(overlapping_tile_indices, tile_idx)
+    }
  }
+  
+  cat("  Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
+  cat("  Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
+  
+  # Create filtered grid with only overlapping tiles
+  filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
+  filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
+} else {
+  cat("\n[7] Using pre-filtered grid (already loaded from file)...\n")
+  # Grid was already loaded - it's already filtered
+  filtered_grid_sf <- master_grid_sf
 }

-cat("  Found ", length(overlapping_tile_indices), " overlapping tiles out of ", N_TILES, "\n", sep = "")
-cat("  Reduction: ", N_TILES - length(overlapping_tile_indices), " empty tiles will NOT be created\n", sep = "")
-
-# Create filtered grid with only overlapping tiles
-filtered_grid_sf <- master_grid_sf[overlapping_tile_indices, ]
-filtered_grid_sf$tile_id <- sprintf("%02d", overlapping_tile_indices)
-
 # Convert to SpatVector for makeTiles
 filtered_grid_vect <- terra::vect(filtered_grid_sf)

@ -314,7 +378,7 @@ for (file_idx in seq_along(tiff_files)) {
    dir.create(date_output_folder, recursive = TRUE, showWarnings = FALSE)
  }
  
-  cat("            Creating ", length(overlapping_tile_indices), " tiles...\n", sep = "")
+  cat("            Creating ", nrow(filtered_grid_sf), " tiles...\n", sep = "")
  
  # Use makeTiles with FILTERED grid (only overlapping tiles)
  tiles_list <- terra::makeTiles(
--- a/r_app/40_mosaic_creation.R
+++ b/r_app/40_mosaic_creation.R
@ -54,14 +54,12 @@ main <- function() {
    if (is.na(end_date)) {
      message("Invalid end_date provided. Using current date.")
      end_date <- Sys.Date()
-      end_date <- "2026-01-01"  # Default date for testing
    }
  } else if (exists("end_date_str", envir = .GlobalEnv)) {
    end_date <- as.Date(get("end_date_str", envir = .GlobalEnv))
  } else {
    # Default to current date if no argument is provided
    end_date <- Sys.Date()
-    end_date <-  "2026-01-01"   # Default date for testing
    message("No end_date provided. Using current date: ", format(end_date))
  }