SmartCane/python_app/rgb_visualization.py

#!/usr/bin/env python
"""
RGB Visualization Tool for Harvest Date Validation

Creates 3x3 temporal grids showing satellite imagery around registered and predicted harvest dates.
Extracts RGB from 8-band Planet scope data and clips to field boundaries from GeoJSON.

Functions:
- load_field_boundaries(): Load field geometries from GeoJSON
- find_closest_tiff(): Find available TIFF file closest to target date
- load_and_clip_tiff_rgb(): Load TIFF, extract RGB, clip to field boundary
- create_temporal_grid(): Create 3x3 grid (4 pre-harvest, 1 near, 2-3 post-harvest)
- generate_rgb_grids(): Main orchestration function

Usage:
    from rgb_visualization import generate_rgb_grids
    generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates, output_dir, tiff_dir, geojson_path)
"""

import json
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend to avoid display hangs
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.colors import Normalize

import warnings
warnings.filterwarnings('ignore')

try:
    import rasterio
    from rasterio.mask import mask
    import shapely.geometry as shgeom
except ImportError:
    print("Warning: rasterio not available. RGB visualization will be skipped.")
    rasterio = None


def load_field_boundaries(geojson_path, field_id):
    """
    Load field boundary from GeoJSON file.

    Args:
        geojson_path (Path): Path to pivot.geojson
        field_id (str): Field identifier (e.g., "13973")

    Returns:
        dict: GeoJSON feature or None if not found
        shapely.geometry.Polygon: Field boundary polygon or None
    """
    try:
        with open(geojson_path) as f:
            geojson_data = json.load(f)

        # Match field ID in properties
        for feature in geojson_data.get('features', []):
            props = feature.get('properties', {})
            # Try matching on 'field' or 'sub_field'
            if str(props.get('field', '')) == str(field_id) or \
               str(props.get('sub_field', '')) == str(field_id):
                geometry = feature.get('geometry')
                if geometry:
                    geom_type = geometry.get('type', '')
                    coordinates = geometry.get('coordinates', [])

                    # Handle MultiPolygon: coordinates[i] = [[[ring coords]], [[inner ring coords]], ...]
                    if geom_type == 'MultiPolygon':
                        # Use the first polygon from the multipolygon
                        if coordinates and len(coordinates) > 0:
                            coords = coordinates[0][0]  # First polygon's exterior ring
                            polygon = shgeom.Polygon(coords)
                            return feature, polygon
                    # Handle Polygon: coordinates = [[[ring coords]], [[inner ring coords]], ...]
                    elif geom_type == 'Polygon':
                        if coordinates and len(coordinates) > 0:
                            coords = coordinates[0]  # Exterior ring
                            polygon = shgeom.Polygon(coords)
                            return feature, polygon

        print(f"  ⚠ Field {field_id} not found in GeoJSON")
        return None, None
    except Exception as e:
        print(f"  ✗ Error loading GeoJSON: {e}")
        return None, None


def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=None, debug=False):
    """
    Find tile files with actual data (not cloud-masked) for target_date or nearest date.

    Searches by increasing distance from target date until finding tiles with data.
    Avoids reusing dates in exclude_dates to ensure temporal diversity in grids.

    Args:
        target_date (pd.Timestamp): Target date to find tiles near
        tiff_dir (Path): Directory containing 5x5 date subdirectories
        field_boundary (shapely.Polygon): Field boundary for overlap detection
        days_window (int): Max days to search before/after target
        exclude_dates (list): List of dates to skip (avoid repetition)
        debug (bool): Enable detailed debugging output

    Returns:
        tuple: (list of tile paths, actual_date, days_diff)
    """
    target_date = pd.Timestamp(target_date)
    tiff_dir = Path(tiff_dir)
    exclude_dates = exclude_dates or []
    exclude_dates = [pd.Timestamp(d) for d in exclude_dates]

    if not tiff_dir.exists():
        if debug:
            print(f"        [DEBUG] TIFF dir does not exist: {tiff_dir}")
        return [], None, None

    # Build map of all available dates
    available_dates = {}
    date_parse_errors = 0

    for date_dir in tiff_dir.iterdir():
        if not date_dir.is_dir():
            continue
        try:
            dir_name = date_dir.name
            date_str = dir_name.split('_')[0]
            tile_date = pd.Timestamp(date_str)

            tile_files = []
            for tile_file in date_dir.glob('*.tif'):
                # Include ALL tiles, regardless of size
                # Some tiles may be small but still contain valid data for specific fields
                tile_files.append(tile_file)

            if tile_files:
                available_dates[tile_date] = (tile_files, dir_name)
        except Exception as e:
            date_parse_errors += 1
            if debug:
                print(f"        [DEBUG] Failed to parse date from {date_dir.name}: {e}")

    if debug:
        print(f"        [DEBUG] Found {len(available_dates)} dates with tile files, {date_parse_errors} parse errors")
        print(f"        [DEBUG] Date range: {min(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'} to {max(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'}")

    if not available_dates:
        return [], None, None

    # Search dates by increasing distance from target, looking for tiles with actual data
    sorted_dates = sorted(available_dates.keys(), key=lambda d: abs((d - target_date).days))

    for search_date in sorted_dates:
        # Skip if this date was recently used (avoid temporal repetition)
        if search_date in exclude_dates:
            continue

        tiles, dir_name = available_dates[search_date]
        days_diff = (search_date - target_date).days

        # Try to find overlapping tiles at this date
        overlapping_tiles = []
        tile_check_errors = 0

        for tile_path in tiles:
            try:
                with rasterio.open(tile_path) as src:
                    tile_bounds = src.bounds
                    tile_geom = shgeom.box(*tile_bounds)

                    # Debug first tile
                    if debug and len(overlapping_tiles) == 0 and tile_check_errors == 0:
                        print(f"        [DEBUG] First tile check for {tile_path.name}:")
                        print(f"          Tile bounds: {tile_bounds}")
                        print(f"          Tile CRS: {src.crs}")
                        print(f"          Field bounds: {field_boundary.bounds}")
                        print(f"          Field geom type: {field_boundary.geom_type}")
                        print(f"          Intersects: {tile_geom.intersects(field_boundary)}")

                    if tile_geom.intersects(field_boundary):
                        overlapping_tiles.append(tile_path)
            except Exception as e:
                tile_check_errors += 1
                if debug:
                    print(f"        [DEBUG] Error checking tile {tile_path.name}: {e}")

        if debug:
            print(f"        [DEBUG] Date {search_date.strftime('%Y-%m-%d')}: {len(tiles)} tiles, {len(overlapping_tiles)} overlap field, {tile_check_errors} errors")

        if overlapping_tiles:
            # Found overlapping tiles, return them
            print(f"        [FIND_TILES] Target: {target_date.strftime('%Y-%m-%d')}, Using: {search_date.strftime('%Y-%m-%d')} ({days_diff:+d}d), Tiles: {[Path(t).name for t in overlapping_tiles]}")
            return overlapping_tiles, search_date, days_diff

    # No overlapping tiles found at all
    if debug:
        print(f"        [DEBUG] No overlapping tiles found for {target_date.strftime('%Y-%m-%d')} within {len(sorted_dates)} searched dates")

    return [], None, None


def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
    """
    Load TIFF and extract RGB bands clipped to field boundary.

    For merged_final_tif files (cloud-masked and filtered):
    - Band 1: Red
    - Band 2: Green
    - Band 3: Blue
    - Band 4: NIR
    - Band 5: CI

    Args:
        tiff_path (Path): Path to TIFF file
        field_boundary (shapely.Polygon): Field boundary for clipping
        rgb_bands (tuple): Band indices for RGB (1-indexed, defaults to 1,2,3 for merged_final_tif)

    Returns:
        np.ndarray: RGB data (height, width, 3) with values 0-1
        or None if error occurs
    """
    if rasterio is None or field_boundary is None:
        return None

    try:
        with rasterio.open(tiff_path) as src:
            # Check band count
            if src.count < 3:
                return None

            # For merged_final_tif: bands 1,2,3 are R,G,B
            bands_to_read = (1, 2, 3)

            # Mask and read bands - extract ONLY the specific field polygon
            geom = shgeom.mapping(field_boundary)
            try:
                masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read))
                rgb = np.stack([masked_data[i] for i in range(3)], axis=-1)
            except (ValueError, RuntimeError) as e:
                # Mask failed - field doesn't overlap this tile or geometry issue
                print(f"          MASK ERROR on {Path(tiff_path).name}: {str(e)[:50]}")
                return None

            # Convert to float32 if not already
            rgb = rgb.astype(np.float32)

            # Normalize to 0-1 range
            # Data appears to be 8-bit (0-255 range) stored as float32
            # Check actual max value to determine normalization
            max_val = np.nanmax(rgb)
            if max_val > 0:
                # If max is around 255 or less, assume 8-bit
                if max_val <= 255:
                    rgb = rgb / 255.0
                # If max is around 65535, assume 16-bit
                elif max_val <= 65535:
                    rgb = rgb / 65535.0
                # Otherwise divide by max to normalize
                else:
                    rgb = rgb / max_val

            rgb = np.clip(rgb, 0, 1)

            # Check if result is all NaN
            if np.all(np.isnan(rgb)):
                return None

            # Replace any remaining NaN with 0 (cloud/invalid pixels)
            rgb = np.nan_to_num(rgb, nan=0.0)

            return rgb
    except Exception as e:
        return None


def load_and_composite_tiles_rgb(tile_paths, field_boundary):
    """
    Load RGB from multiple overlapping tiles and composite them into a single image.

    Args:
        tile_paths (list[Path]): List of tile file paths
        field_boundary (shapely.Polygon): Field boundary for clipping

    Returns:
        np.ndarray: Composited RGB data (height, width, 3) with values 0-1
        or None if error occurs
    """
    if rasterio is None or field_boundary is None or not tile_paths:
        return None

    try:
        # Load and composite all tiles
        rgb_arrays = []

        for tile_path in tile_paths:
            rgb = load_and_clip_tiff_rgb(tile_path, field_boundary)
            if rgb is not None:
                rgb_arrays.append(rgb)

        if not rgb_arrays:
            return None

        # If single tile, return it
        if len(rgb_arrays) == 1:
            composited = rgb_arrays[0]
        else:
            # If multiple tiles, need to handle different shapes
            # Find common shape or use max/min approach that handles variable sizes
            try:
                # Try to stack if same shape
                stacked = np.stack(rgb_arrays, axis=0)
                composited = np.max(stacked, axis=0)
            except ValueError:
                # Different shapes - use the largest (most complete) tile
                # This happens when tiles are masked to different field areas
                composited = max(rgb_arrays, key=lambda x: x.size)

        composited = composited.astype(np.float32)

        # Stretch contrast: normalize to 0-1 range based on actual min/max in the data
        # This makes dim images visible
        valid_data = composited[composited > 0]
        if len(valid_data) > 0:
            data_min = np.percentile(valid_data, 2)  # 2nd percentile to handle outliers
            data_max = np.percentile(valid_data, 98)  # 98th percentile

            if data_max > data_min:
                composited = (composited - data_min) / (data_max - data_min)
                composited = np.clip(composited, 0, 1)

        return composited.astype(np.float32)

    except Exception as e:
        return None


def has_valid_rgb_data(rgb_data, threshold=0.05):
    """
    Check if RGB image has actual data (not black/empty).
    Returns True if max value > threshold (not all zeros/black).
    """
    if rgb_data is None:
        return False

    try:
        # Check if there's any variation in the RGB data
        data_max = np.nanmax(rgb_data)
        data_min = np.nanmin(rgb_data)

        # Image is valid if max > threshold AND there's variation
        has_data = data_max > threshold and (data_max - data_min) > 0.01
        return has_data
    except:
        return False


def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field_boundary,
                            title, output_dir, harvest_type='registered', model_name=None, harvest_index=None):
    """
    Create 5x3 temporal grid around harvest date (15 images, 7-day intervals).

    Layout:
        Row 1: T-56d, T-42d, T-35d, T-28d, T-21d  (pre-harvest)
        Row 2: T-14d, T-7d,  T~0d,  T+7d,  T+14d  (near harvest)
        Row 3: T+21d, T+28d, T+35d, T+42d, T+56d  (post-harvest progression)

    Args:
        harvest_date (pd.Timestamp): Target harvest date
        field_data (pd.DataFrame): Field data with Date column
        field_id (str): Field identifier
        tiff_dir (Path): Directory with TIFF files
        field_boundary (shapely.Polygon): Field boundary
        title (str): Plot title
        output_dir (Path): Output directory
        harvest_type (str): 'registered' or 'predicted'
        model_name (str): Model name for predicted harvests (e.g., 'Original', 'Long-Season')
        harvest_index (int): Index of harvest within same model (for multiple harvests)

    Returns:
        Path: Path to saved PNG or None if failed
    """
    harvest_date = pd.Timestamp(harvest_date)

    # Pre-allocate 15 image slots
    rgb_images = [None] * 15
    days_offsets = [None] * 15
    actual_dates = [None] * 15
    used_dates = set()  # Use set for efficient lookups

    # STEP 0: Debug - List all available dates
    print(f"    [STEP 0] Checking available TIFF dates in {tiff_dir}...")
    available_dates = []
    if tiff_dir.exists():
        for date_folder in sorted(tiff_dir.iterdir()):
            if date_folder.is_dir():
                try:
                    date_obj = datetime.strptime(date_folder.name, '%Y-%m-%d').date()
                    available_dates.append(date_obj)
                except:
                    pass
    print(f"    Found {len(available_dates)} dates with data: {available_dates[:5]}... (showing first 5)")

    # STEP 1: Find anchor image (closest to predicted harvest date) FIRST
    # Search within ±14 days of predicted harvest date first, then expand if needed
    print(f"    [STEP 1] Finding anchor (closest to harvest {harvest_date.strftime('%Y-%m-%d')}, searching ±14 days)...")
    anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
        harvest_date, tiff_dir, field_boundary, days_window=14, exclude_dates=[], debug=False
    )

    anchor_rgb = None
    anchor_idx = 8  # Position 8 is the center (T~0d / harvest date position)
    failed_anchor_dates = []  # Track dates that failed validation

    if anchor_tile_paths and anchor_date:
        anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
        if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
            rgb_images[anchor_idx] = anchor_rgb
            days_offsets[anchor_idx] = 0  # Anchor is reference point
            actual_dates[anchor_idx] = anchor_date
            used_dates.add(anchor_date)
            print(f"    ✓ ANCHOR FOUND (±14d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
        else:
            failed_anchor_dates.append(anchor_date)
            print(f"    ⚠ Found date {anchor_date.strftime('%Y-%m-%d')} within ±14d, but image has no valid data")
            print(f"    [RETRY] Expanding anchor search to ±60 days (excluding failed dates)...")
            anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
                harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=set(failed_anchor_dates), debug=False
            )
            if anchor_tile_paths and anchor_date:
                anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
                if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
                    rgb_images[anchor_idx] = anchor_rgb
                    days_offsets[anchor_idx] = 0  # Anchor is reference point
                    actual_dates[anchor_idx] = anchor_date
                    used_dates.add(anchor_date)
                    print(f"    ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
                else:
                    failed_anchor_dates.append(anchor_date)
                    print(f"    ✗ No valid anchor found even within ±60 days")
            else:
                print(f"    ✗ No tiles found for any date within ±60 days")
    else:
        print(f"    ⚠ No tiles found within ±14 days, expanding search...")
        anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
            harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=[], debug=False
        )
        if anchor_tile_paths and anchor_date:
            anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
            if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
                rgb_images[anchor_idx] = anchor_rgb
                days_offsets[anchor_idx] = 0  # Anchor is reference point
                actual_dates[anchor_idx] = anchor_date
                used_dates.add(anchor_date)
                print(f"    ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
            else:
                print(f"    ✗ No valid anchor found even within ±60 days")
        else:
            print(f"    ✗ No tiles found for any date within ±60 days")

    # STEP 2: Dynamically collect images BEFORE anchor date
    # Strategy: Go backwards from anchor with progressively larger search windows
    # Start at 7 days, then try 10, 15, 20, 30+ days apart
    print(f"    [STEP 2] Collecting images BEFORE anchor (going backwards, flexible spacing)...")
    before_positions = [7, 6, 5, 4, 3, 2, 1, 0]  # Will fill in reverse order (7→0)
    before_images = []  # (position, date, rgb, offset)

    pos_idx = 0  # Index into before_positions
    last_found_date = anchor_date

    # Progressive search offsets: try these day offsets in order
    search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120]  # Days before last found image

    while pos_idx < len(before_positions) and last_found_date.year >= 2024:
        found_this_iteration = False

        # Try each offset until we find a valid image
        for days_offset in search_offsets:
            search_target_date = last_found_date - timedelta(days=days_offset)

            tile_paths, actual_date, days_diff = find_overlapping_tiles(
                search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
            )

            if tile_paths and actual_date:
                rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
                if rgb is not None and has_valid_rgb_data(rgb):
                    # Found valid image!
                    overall_max = np.nanmax(rgb)
                    overall_min = np.nanmin(rgb)

                    offset_from_anchor = (actual_date - anchor_date).days
                    before_images.append((before_positions[pos_idx], actual_date, rgb, offset_from_anchor))
                    used_dates.add(actual_date)
                    last_found_date = actual_date  # Move backwards from this date

                    print(f"    ✓ Before[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")

                    pos_idx += 1
                    found_this_iteration = True
                    break  # Found one, stop trying larger offsets

        # If nothing found with any offset, we're done collecting before images
        if not found_this_iteration:
            break

    # Store collected before images
    for pos, actual_date, rgb, offset in before_images:
        rgb_images[pos] = rgb
        actual_dates[pos] = actual_date
        days_offsets[pos] = offset

    # STEP 3: Dynamically collect images AFTER anchor date
    # Strategy: Go forwards from anchor with progressively larger search windows
    # Start at 7 days, then try 10, 15, 20, 30+ days apart
    print(f"    [STEP 3] Collecting images AFTER anchor (going forwards, flexible spacing)...")
    after_positions = [9, 10, 11, 12, 13, 14]  # Will fill in order (9→14)
    after_images = []  # (position, date, rgb, offset)

    pos_idx = 0  # Index into after_positions
    last_found_date = anchor_date
    max_search_date = anchor_date + timedelta(days=200)  # Don't search beyond 200 days forward

    # Progressive search offsets: try these day offsets in order
    search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120]  # Days after last found image

    while pos_idx < len(after_positions) and last_found_date < max_search_date:
        found_this_iteration = False

        # Try each offset until we find a valid image
        for days_offset in search_offsets:
            search_target_date = last_found_date + timedelta(days=days_offset)

            # Don't search beyond max date
            if search_target_date > max_search_date:
                break

            tile_paths, actual_date, days_diff = find_overlapping_tiles(
                search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
            )

            if tile_paths and actual_date:
                rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
                if rgb is not None and has_valid_rgb_data(rgb):
                    # Found valid image!
                    overall_max = np.nanmax(rgb)
                    overall_min = np.nanmin(rgb)

                    offset_from_anchor = (actual_date - anchor_date).days
                    after_images.append((after_positions[pos_idx], actual_date, rgb, offset_from_anchor))
                    used_dates.add(actual_date)
                    last_found_date = actual_date  # Move forwards from this date

                    print(f"    ✓ After[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")

                    pos_idx += 1
                    found_this_iteration = True
                    break  # Found one, stop trying larger offsets

        # If nothing found with any offset, we're done collecting after images
        if not found_this_iteration:
            break

    # Store collected after images
    for pos, actual_date, rgb, offset in after_images:
        rgb_images[pos] = rgb
        actual_dates[pos] = actual_date
        days_offsets[pos] = offset

    # Create 5x3 grid plot (15 images)
    fig, axes = plt.subplots(3, 5, figsize=(25, 15))

    # Build title with anchor offset information
    anchor_offset_from_harvest = (actual_dates[8] - harvest_date).days if actual_dates[8] is not None else None
    if anchor_offset_from_harvest is not None and anchor_offset_from_harvest != 0:
        anchor_info = f"(Anchor: {actual_dates[8].strftime('%Y-%m-%d')}, {anchor_offset_from_harvest:+d}d from predicted harvest)"
    else:
        anchor_info = f"(Exact match with anchor: {actual_dates[8].strftime('%Y-%m-%d')})" if actual_dates[8] is not None else ""

    fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")} {anchor_info}',
                fontsize=16, fontweight='bold')

    # Grid positions (5 columns, 3 rows = 15 images)
    positions = [
        ('T-56d', 0, 0), ('T-49d', 0, 1), ('T-42d', 0, 2), ('T-35d', 0, 3), ('T-28d', 0, 4),
        ('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('HARVEST', 1, 3), ('T+7d', 1, 4),
        ('T+14d', 2, 0), ('T+21d', 2, 1), ('T+28d', 2, 2), ('T+35d', 2, 3), ('T+42d', 2, 4),
    ]

    for idx, (label, row, col) in enumerate(positions):
        ax = axes[row, col]

        if idx < len(rgb_images) and rgb_images[idx] is not None:
            rgb_data = rgb_images[idx]
            # Debug: check data range for ALL bands
            data_min = np.nanmin(rgb_data)
            data_max = np.nanmax(rgb_data)
            data_mean = np.nanmean(rgb_data)
            data_std = np.nanstd(rgb_data)

            # Check per-band stats
            r_min, r_max, r_mean = np.nanmin(rgb_data[:,:,0]), np.nanmax(rgb_data[:,:,0]), np.nanmean(rgb_data[:,:,0])
            g_min, g_max, g_mean = np.nanmin(rgb_data[:,:,1]), np.nanmax(rgb_data[:,:,1]), np.nanmean(rgb_data[:,:,1])
            b_min, b_max, b_mean = np.nanmin(rgb_data[:,:,2]), np.nanmax(rgb_data[:,:,2]), np.nanmean(rgb_data[:,:,2])

            print(f"    DEBUG VALID {label} ({actual_dates[idx].strftime('%Y-%m-%d')}): RGB overall {data_min:.4f}-{data_max:.4f} (mean={data_mean:.4f}, std={data_std:.4f})")
            print(f"              R: {r_min:.4f}-{r_max:.4f} (μ={r_mean:.4f}), G: {g_min:.4f}-{g_max:.4f} (μ={g_mean:.4f}), B: {b_min:.4f}-{b_max:.4f} (μ={b_mean:.4f})")

            # Display with explicit vmin/vmax to handle normalized 0-1 data
            ax.imshow(rgb_data, vmin=0, vmax=1)

            # Build title: show BOTH anchor offset AND harvest offset
            if days_offsets[idx] is not None:
                offset_from_anchor = days_offsets[idx]
                offset_from_harvest = (actual_dates[idx] - harvest_date).days

                if idx == 8:  # ANCHOR/HARVEST position
                    if offset_from_harvest == 0:
                        offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')})"
                    else:
                        offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')}, {offset_from_harvest:+d}d from predicted)"
                else:
                    # Show both offsets: from anchor and from harvest
                    offset_str = f"{offset_from_anchor:+d}d from anchor\n{offset_from_harvest:+d}d from harvest\n{actual_dates[idx].strftime('%Y-%m-%d')}"
            else:
                offset_str = "No Data"

            ax.set_title(offset_str, fontsize=9, fontweight='bold')

            # Add red box around the ANCHOR IMAGE (position 8 is harvest/anchor)
            if idx == 8:  # Position 8 is the anchor
                for spine in ax.spines.values():
                    spine.set_edgecolor('red')
                    spine.set_linewidth(4)
        else:
            ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12, color='gray')
            ax.set_title('No Data', fontsize=10)
            print(f"    DEBUG EMPTY {label}: No image data collected")

            # Add red box for anchor position even if no data
            if idx == 8:  # Position 8 is the anchor
                for spine in ax.spines.values():
                    spine.set_edgecolor('red')
                    spine.set_linewidth(4)

        ax.set_xticks([])
        ax.set_yticks([])

    plt.tight_layout()

    # Save figure with detailed naming: field_ID_harvestdate_model_harvestyle.png
    harvest_date_str = harvest_date.strftime('%Y%m%d')

    if harvest_type == 'registered':
        filename = f'field_{field_id}_{harvest_date_str}_registered_harvest_rgb.png'
    else:
        # For predicted: include model name and harvest index if multiple
        if harvest_index is not None and harvest_index > 0:
            filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest{harvest_index}_rgb.png'
        else:
            filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest_rgb.png'

    output_path = Path(output_dir) / filename
    try:
        plt.savefig(output_path, dpi=100, format='png')
        plt.close()

        print(f"    ✓ Saved: {filename}")
        return output_path
    except Exception as e:
        plt.close()
        print(f"    ✗ Error saving PNG: {e}")
        return None


def generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates,
                      output_dir, tiff_dir, geojson_path):
    """
    Main orchestration function for RGB visualization.

    Creates 3x3 grids for:
    1. Registered harvest dates (if available)
    2. Predicted harvest dates (if available)

    Args:
        field_data (pd.DataFrame): Field data with Date, CI columns
        field_id (str): Field identifier
        registered_harvest_dates (list): List of registered harvest dates (pd.Timestamp)
        predicted_harvest_dates (list): List of predicted harvest dates (dict or pd.Timestamp)
        output_dir (Path): Output directory for plots
        tiff_dir (Path): Directory containing TIFF files
        geojson_path (Path): Path to pivot.geojson

    Returns:
        dict: Summary of generated plots with keys 'registered' and 'predicted'
    """
    if rasterio is None:
        print("  ⚠ Rasterio not available - skipping RGB visualization")
        return {'registered': [], 'predicted': []}

    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    tiff_dir = Path(tiff_dir)
    geojson_path = Path(geojson_path)

    if not tiff_dir.exists():
        print(f"  ✗ TIFF directory not found: {tiff_dir}")
        return {'registered': [], 'predicted': []}

    if not geojson_path.exists():
        print(f"  ✗ GeoJSON not found: {geojson_path}")
        return {'registered': [], 'predicted': []}

    # Load field boundary
    print(f"  Loading field boundary for {field_id}...")
    feature, field_boundary = load_field_boundaries(geojson_path, field_id)

    if field_boundary is None:
        print(f"  ✗ Could not load field boundary for {field_id}")
        return {'registered': [], 'predicted': []}

    results = {'registered': [], 'predicted': []}

    # Process registered harvest dates
    if registered_harvest_dates and len(registered_harvest_dates) > 0:
        print(f"  Processing {len(registered_harvest_dates)} registered harvest dates...")
        for i, harvest_date in enumerate(registered_harvest_dates):
            if pd.isna(harvest_date):
                continue

            print(f"    [{i+1}/{len(registered_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')}")
            output_path = create_temporal_rgb_grid(
                harvest_date, field_data, field_id, tiff_dir, field_boundary,
                title='Registered Harvest Validation',
                output_dir=output_dir,
                harvest_type='registered',
                model_name=None,
                harvest_index=i
            )
            if output_path:
                results['registered'].append(output_path)

    # Process predicted harvest dates - grouped by model
    if predicted_harvest_dates and len(predicted_harvest_dates) > 0:
        print(f"  Processing {len(predicted_harvest_dates)} predicted harvest dates...")

        # Group by model to track index per model
        harvest_by_model = {}
        for harvest_info in predicted_harvest_dates:
            # Handle both dict and Timestamp formats
            if isinstance(harvest_info, dict):
                harvest_date = harvest_info.get('harvest_date')
                model_name = harvest_info.get('model_name', 'predicted')
            else:
                harvest_date = harvest_info
                model_name = 'predicted'

            if model_name not in harvest_by_model:
                harvest_by_model[model_name] = []
            harvest_by_model[model_name].append(harvest_date)

        # Process each model's harvests
        overall_index = 1
        for model_name, harvest_dates in harvest_by_model.items():
            for model_harvest_idx, harvest_date in enumerate(harvest_dates):
                if pd.isna(harvest_date):
                    continue

                print(f"    [{overall_index}/{len(predicted_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')} ({model_name})")
                output_path = create_temporal_rgb_grid(
                    harvest_date, field_data, field_id, tiff_dir, field_boundary,
                    title=f'Predicted Harvest Validation ({model_name})',
                    output_dir=output_dir,
                    harvest_type='predicted',
                    model_name=model_name,
                    harvest_index=model_harvest_idx
                )
                if output_path:
                    results['predicted'].append(output_path)
                overall_index += 1

    return results


if __name__ == '__main__':
    # Example usage
    print("RGB Visualization Tool")
    print("This module is intended to be imported and called from compare_307_models_production.py")
    print("\nExample:")
    print("  from rgb_visualization import generate_rgb_grids")
    print("  generate_rgb_grids(field_data, field_id, registered_dates, predicted_dates, output_dir, tiff_dir, geojson_path)")