#!/usr/bin/env python """ RGB Visualization Tool for Harvest Date Validation Creates 3x3 temporal grids showing satellite imagery around registered and predicted harvest dates. Extracts RGB from 8-band Planet scope data and clips to field boundaries from GeoJSON. Functions: - load_field_boundaries(): Load field geometries from GeoJSON - find_closest_tiff(): Find available TIFF file closest to target date - load_and_clip_tiff_rgb(): Load TIFF, extract RGB, clip to field boundary - create_temporal_grid(): Create 3x3 grid (4 pre-harvest, 1 near, 2-3 post-harvest) - generate_rgb_grids(): Main orchestration function Usage: from rgb_visualization import generate_rgb_grids generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates, output_dir, tiff_dir, geojson_path) """ import json import numpy as np import pandas as pd from pathlib import Path from datetime import datetime, timedelta import matplotlib matplotlib.use('Agg') # Use non-interactive backend to avoid display hangs import matplotlib.pyplot as plt import matplotlib.patches as patches from matplotlib.colors import Normalize import warnings warnings.filterwarnings('ignore') try: import rasterio from rasterio.mask import mask import shapely.geometry as shgeom except ImportError: print("Warning: rasterio not available. RGB visualization will be skipped.") rasterio = None def load_field_boundaries(geojson_path, field_id): """ Load field boundary from GeoJSON file. Args: geojson_path (Path): Path to pivot.geojson field_id (str): Field identifier (e.g., "13973") Returns: dict: GeoJSON feature or None if not found shapely.geometry.Polygon: Field boundary polygon or None """ try: with open(geojson_path) as f: geojson_data = json.load(f) # Match field ID in properties for feature in geojson_data.get('features', []): props = feature.get('properties', {}) # Try matching on 'field' or 'sub_field' if str(props.get('field', '')) == str(field_id) or \ str(props.get('sub_field', '')) == str(field_id): geometry = feature.get('geometry') if geometry: geom_type = geometry.get('type', '') coordinates = geometry.get('coordinates', []) # Handle MultiPolygon: coordinates[i] = [[[ring coords]], [[inner ring coords]], ...] if geom_type == 'MultiPolygon': # Use the first polygon from the multipolygon if coordinates and len(coordinates) > 0: coords = coordinates[0][0] # First polygon's exterior ring polygon = shgeom.Polygon(coords) return feature, polygon # Handle Polygon: coordinates = [[[ring coords]], [[inner ring coords]], ...] elif geom_type == 'Polygon': if coordinates and len(coordinates) > 0: coords = coordinates[0] # Exterior ring polygon = shgeom.Polygon(coords) return feature, polygon print(f" ⚠ Field {field_id} not found in GeoJSON") return None, None except Exception as e: print(f" ✗ Error loading GeoJSON: {e}") return None, None def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=None, debug=False): """ Find tile files with actual data (not cloud-masked) for target_date or nearest date. Searches by increasing distance from target date until finding tiles with data. Avoids reusing dates in exclude_dates to ensure temporal diversity in grids. Args: target_date (pd.Timestamp): Target date to find tiles near tiff_dir (Path): Directory containing 5x5 date subdirectories field_boundary (shapely.Polygon): Field boundary for overlap detection days_window (int): Max days to search before/after target exclude_dates (list): List of dates to skip (avoid repetition) debug (bool): Enable detailed debugging output Returns: tuple: (list of tile paths, actual_date, days_diff) """ target_date = pd.Timestamp(target_date) tiff_dir = Path(tiff_dir) exclude_dates = exclude_dates or [] exclude_dates = [pd.Timestamp(d) for d in exclude_dates] if not tiff_dir.exists(): if debug: print(f" [DEBUG] TIFF dir does not exist: {tiff_dir}") return [], None, None # Build map of all available dates available_dates = {} date_parse_errors = 0 for date_dir in tiff_dir.iterdir(): if not date_dir.is_dir(): continue try: dir_name = date_dir.name date_str = dir_name.split('_')[0] tile_date = pd.Timestamp(date_str) tile_files = [] for tile_file in date_dir.glob('*.tif'): # Include ALL tiles, regardless of size # Some tiles may be small but still contain valid data for specific fields tile_files.append(tile_file) if tile_files: available_dates[tile_date] = (tile_files, dir_name) except Exception as e: date_parse_errors += 1 if debug: print(f" [DEBUG] Failed to parse date from {date_dir.name}: {e}") if debug: print(f" [DEBUG] Found {len(available_dates)} dates with tile files, {date_parse_errors} parse errors") print(f" [DEBUG] Date range: {min(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'} to {max(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'}") if not available_dates: return [], None, None # Search dates by increasing distance from target, looking for tiles with actual data sorted_dates = sorted(available_dates.keys(), key=lambda d: abs((d - target_date).days)) for search_date in sorted_dates: # Skip if this date was recently used (avoid temporal repetition) if search_date in exclude_dates: continue tiles, dir_name = available_dates[search_date] days_diff = (search_date - target_date).days # Try to find overlapping tiles at this date overlapping_tiles = [] tile_check_errors = 0 for tile_path in tiles: try: with rasterio.open(tile_path) as src: tile_bounds = src.bounds tile_geom = shgeom.box(*tile_bounds) # Debug first tile if debug and len(overlapping_tiles) == 0 and tile_check_errors == 0: print(f" [DEBUG] First tile check for {tile_path.name}:") print(f" Tile bounds: {tile_bounds}") print(f" Tile CRS: {src.crs}") print(f" Field bounds: {field_boundary.bounds}") print(f" Field geom type: {field_boundary.geom_type}") print(f" Intersects: {tile_geom.intersects(field_boundary)}") if tile_geom.intersects(field_boundary): overlapping_tiles.append(tile_path) except Exception as e: tile_check_errors += 1 if debug: print(f" [DEBUG] Error checking tile {tile_path.name}: {e}") if debug: print(f" [DEBUG] Date {search_date.strftime('%Y-%m-%d')}: {len(tiles)} tiles, {len(overlapping_tiles)} overlap field, {tile_check_errors} errors") if overlapping_tiles: # Found overlapping tiles, return them print(f" [FIND_TILES] Target: {target_date.strftime('%Y-%m-%d')}, Using: {search_date.strftime('%Y-%m-%d')} ({days_diff:+d}d), Tiles: {[Path(t).name for t in overlapping_tiles]}") return overlapping_tiles, search_date, days_diff # No overlapping tiles found at all if debug: print(f" [DEBUG] No overlapping tiles found for {target_date.strftime('%Y-%m-%d')} within {len(sorted_dates)} searched dates") return [], None, None def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)): """ Load TIFF and extract RGB bands clipped to field boundary. For merged_final_tif files (cloud-masked and filtered): - Band 1: Red - Band 2: Green - Band 3: Blue - Band 4: NIR - Band 5: CI Args: tiff_path (Path): Path to TIFF file field_boundary (shapely.Polygon): Field boundary for clipping rgb_bands (tuple): Band indices for RGB (1-indexed, defaults to 1,2,3 for merged_final_tif) Returns: np.ndarray: RGB data (height, width, 3) with values 0-1 or None if error occurs """ if rasterio is None or field_boundary is None: return None try: with rasterio.open(tiff_path) as src: # Check band count if src.count < 3: return None # For merged_final_tif: bands 1,2,3 are R,G,B bands_to_read = (1, 2, 3) # Mask and read bands - extract ONLY the specific field polygon geom = shgeom.mapping(field_boundary) try: masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read)) rgb = np.stack([masked_data[i] for i in range(3)], axis=-1) except (ValueError, RuntimeError) as e: # Mask failed - field doesn't overlap this tile or geometry issue print(f" MASK ERROR on {Path(tiff_path).name}: {str(e)[:50]}") return None # Convert to float32 if not already rgb = rgb.astype(np.float32) # Normalize to 0-1 range # Data appears to be 8-bit (0-255 range) stored as float32 # Check actual max value to determine normalization max_val = np.nanmax(rgb) if max_val > 0: # If max is around 255 or less, assume 8-bit if max_val <= 255: rgb = rgb / 255.0 # If max is around 65535, assume 16-bit elif max_val <= 65535: rgb = rgb / 65535.0 # Otherwise divide by max to normalize else: rgb = rgb / max_val rgb = np.clip(rgb, 0, 1) # Check if result is all NaN if np.all(np.isnan(rgb)): return None # Replace any remaining NaN with 0 (cloud/invalid pixels) rgb = np.nan_to_num(rgb, nan=0.0) return rgb except Exception as e: return None def load_and_composite_tiles_rgb(tile_paths, field_boundary): """ Load RGB from multiple overlapping tiles and composite them into a single image. Args: tile_paths (list[Path]): List of tile file paths field_boundary (shapely.Polygon): Field boundary for clipping Returns: np.ndarray: Composited RGB data (height, width, 3) with values 0-1 or None if error occurs """ if rasterio is None or field_boundary is None or not tile_paths: return None try: # Load and composite all tiles rgb_arrays = [] for tile_path in tile_paths: rgb = load_and_clip_tiff_rgb(tile_path, field_boundary) if rgb is not None: rgb_arrays.append(rgb) if not rgb_arrays: return None # If single tile, return it if len(rgb_arrays) == 1: composited = rgb_arrays[0] else: # If multiple tiles, need to handle different shapes # Find common shape or use max/min approach that handles variable sizes try: # Try to stack if same shape stacked = np.stack(rgb_arrays, axis=0) composited = np.max(stacked, axis=0) except ValueError: # Different shapes - use the largest (most complete) tile # This happens when tiles are masked to different field areas composited = max(rgb_arrays, key=lambda x: x.size) composited = composited.astype(np.float32) # Stretch contrast: normalize to 0-1 range based on actual min/max in the data # This makes dim images visible valid_data = composited[composited > 0] if len(valid_data) > 0: data_min = np.percentile(valid_data, 2) # 2nd percentile to handle outliers data_max = np.percentile(valid_data, 98) # 98th percentile if data_max > data_min: composited = (composited - data_min) / (data_max - data_min) composited = np.clip(composited, 0, 1) return composited.astype(np.float32) except Exception as e: return None def has_valid_rgb_data(rgb_data, threshold=0.05): """ Check if RGB image has actual data (not black/empty). Returns True if max value > threshold (not all zeros/black). """ if rgb_data is None: return False try: # Check if there's any variation in the RGB data data_max = np.nanmax(rgb_data) data_min = np.nanmin(rgb_data) # Image is valid if max > threshold AND there's variation has_data = data_max > threshold and (data_max - data_min) > 0.01 return has_data except: return False def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field_boundary, title, output_dir, harvest_type='registered', model_name=None, harvest_index=None): """ Create 5x3 temporal grid around harvest date (15 images, 7-day intervals). Layout: Row 1: T-56d, T-42d, T-35d, T-28d, T-21d (pre-harvest) Row 2: T-14d, T-7d, T~0d, T+7d, T+14d (near harvest) Row 3: T+21d, T+28d, T+35d, T+42d, T+56d (post-harvest progression) Args: harvest_date (pd.Timestamp): Target harvest date field_data (pd.DataFrame): Field data with Date column field_id (str): Field identifier tiff_dir (Path): Directory with TIFF files field_boundary (shapely.Polygon): Field boundary title (str): Plot title output_dir (Path): Output directory harvest_type (str): 'registered' or 'predicted' model_name (str): Model name for predicted harvests (e.g., 'Original', 'Long-Season') harvest_index (int): Index of harvest within same model (for multiple harvests) Returns: Path: Path to saved PNG or None if failed """ harvest_date = pd.Timestamp(harvest_date) # Pre-allocate 15 image slots rgb_images = [None] * 15 days_offsets = [None] * 15 actual_dates = [None] * 15 used_dates = set() # Use set for efficient lookups # STEP 0: Debug - List all available dates print(f" [STEP 0] Checking available TIFF dates in {tiff_dir}...") available_dates = [] if tiff_dir.exists(): for date_folder in sorted(tiff_dir.iterdir()): if date_folder.is_dir(): try: date_obj = datetime.strptime(date_folder.name, '%Y-%m-%d').date() available_dates.append(date_obj) except: pass print(f" Found {len(available_dates)} dates with data: {available_dates[:5]}... (showing first 5)") # STEP 1: Find anchor image (closest to predicted harvest date) FIRST # Search within ±14 days of predicted harvest date first, then expand if needed print(f" [STEP 1] Finding anchor (closest to harvest {harvest_date.strftime('%Y-%m-%d')}, searching ±14 days)...") anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles( harvest_date, tiff_dir, field_boundary, days_window=14, exclude_dates=[], debug=False ) anchor_rgb = None anchor_idx = 8 # Position 8 is the center (T~0d / harvest date position) failed_anchor_dates = [] # Track dates that failed validation if anchor_tile_paths and anchor_date: anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary) if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb): rgb_images[anchor_idx] = anchor_rgb days_offsets[anchor_idx] = 0 # Anchor is reference point actual_dates[anchor_idx] = anchor_date used_dates.add(anchor_date) print(f" ✓ ANCHOR FOUND (±14d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)") else: failed_anchor_dates.append(anchor_date) print(f" ⚠ Found date {anchor_date.strftime('%Y-%m-%d')} within ±14d, but image has no valid data") print(f" [RETRY] Expanding anchor search to ±60 days (excluding failed dates)...") anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles( harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=set(failed_anchor_dates), debug=False ) if anchor_tile_paths and anchor_date: anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary) if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb): rgb_images[anchor_idx] = anchor_rgb days_offsets[anchor_idx] = 0 # Anchor is reference point actual_dates[anchor_idx] = anchor_date used_dates.add(anchor_date) print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)") else: failed_anchor_dates.append(anchor_date) print(f" ✗ No valid anchor found even within ±60 days") else: print(f" ✗ No tiles found for any date within ±60 days") else: print(f" ⚠ No tiles found within ±14 days, expanding search...") anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles( harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=[], debug=False ) if anchor_tile_paths and anchor_date: anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary) if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb): rgb_images[anchor_idx] = anchor_rgb days_offsets[anchor_idx] = 0 # Anchor is reference point actual_dates[anchor_idx] = anchor_date used_dates.add(anchor_date) print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)") else: print(f" ✗ No valid anchor found even within ±60 days") else: print(f" ✗ No tiles found for any date within ±60 days") # STEP 2: Dynamically collect images BEFORE anchor date # Strategy: Go backwards from anchor with progressively larger search windows # Start at 7 days, then try 10, 15, 20, 30+ days apart print(f" [STEP 2] Collecting images BEFORE anchor (going backwards, flexible spacing)...") before_positions = [7, 6, 5, 4, 3, 2, 1, 0] # Will fill in reverse order (7→0) before_images = [] # (position, date, rgb, offset) pos_idx = 0 # Index into before_positions last_found_date = anchor_date # Progressive search offsets: try these day offsets in order search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days before last found image while pos_idx < len(before_positions) and last_found_date.year >= 2024: found_this_iteration = False # Try each offset until we find a valid image for days_offset in search_offsets: search_target_date = last_found_date - timedelta(days=days_offset) tile_paths, actual_date, days_diff = find_overlapping_tiles( search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False ) if tile_paths and actual_date: rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary) if rgb is not None and has_valid_rgb_data(rgb): # Found valid image! overall_max = np.nanmax(rgb) overall_min = np.nanmin(rgb) offset_from_anchor = (actual_date - anchor_date).days before_images.append((before_positions[pos_idx], actual_date, rgb, offset_from_anchor)) used_dates.add(actual_date) last_found_date = actual_date # Move backwards from this date print(f" ✓ Before[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}") pos_idx += 1 found_this_iteration = True break # Found one, stop trying larger offsets # If nothing found with any offset, we're done collecting before images if not found_this_iteration: break # Store collected before images for pos, actual_date, rgb, offset in before_images: rgb_images[pos] = rgb actual_dates[pos] = actual_date days_offsets[pos] = offset # STEP 3: Dynamically collect images AFTER anchor date # Strategy: Go forwards from anchor with progressively larger search windows # Start at 7 days, then try 10, 15, 20, 30+ days apart print(f" [STEP 3] Collecting images AFTER anchor (going forwards, flexible spacing)...") after_positions = [9, 10, 11, 12, 13, 14] # Will fill in order (9→14) after_images = [] # (position, date, rgb, offset) pos_idx = 0 # Index into after_positions last_found_date = anchor_date max_search_date = anchor_date + timedelta(days=200) # Don't search beyond 200 days forward # Progressive search offsets: try these day offsets in order search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days after last found image while pos_idx < len(after_positions) and last_found_date < max_search_date: found_this_iteration = False # Try each offset until we find a valid image for days_offset in search_offsets: search_target_date = last_found_date + timedelta(days=days_offset) # Don't search beyond max date if search_target_date > max_search_date: break tile_paths, actual_date, days_diff = find_overlapping_tiles( search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False ) if tile_paths and actual_date: rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary) if rgb is not None and has_valid_rgb_data(rgb): # Found valid image! overall_max = np.nanmax(rgb) overall_min = np.nanmin(rgb) offset_from_anchor = (actual_date - anchor_date).days after_images.append((after_positions[pos_idx], actual_date, rgb, offset_from_anchor)) used_dates.add(actual_date) last_found_date = actual_date # Move forwards from this date print(f" ✓ After[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}") pos_idx += 1 found_this_iteration = True break # Found one, stop trying larger offsets # If nothing found with any offset, we're done collecting after images if not found_this_iteration: break # Store collected after images for pos, actual_date, rgb, offset in after_images: rgb_images[pos] = rgb actual_dates[pos] = actual_date days_offsets[pos] = offset # Create 5x3 grid plot (15 images) fig, axes = plt.subplots(3, 5, figsize=(25, 15)) # Build title with anchor offset information anchor_offset_from_harvest = (actual_dates[8] - harvest_date).days if actual_dates[8] is not None else None if anchor_offset_from_harvest is not None and anchor_offset_from_harvest != 0: anchor_info = f"(Anchor: {actual_dates[8].strftime('%Y-%m-%d')}, {anchor_offset_from_harvest:+d}d from predicted harvest)" else: anchor_info = f"(Exact match with anchor: {actual_dates[8].strftime('%Y-%m-%d')})" if actual_dates[8] is not None else "" fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")} {anchor_info}', fontsize=16, fontweight='bold') # Grid positions (5 columns, 3 rows = 15 images) positions = [ ('T-56d', 0, 0), ('T-49d', 0, 1), ('T-42d', 0, 2), ('T-35d', 0, 3), ('T-28d', 0, 4), ('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('HARVEST', 1, 3), ('T+7d', 1, 4), ('T+14d', 2, 0), ('T+21d', 2, 1), ('T+28d', 2, 2), ('T+35d', 2, 3), ('T+42d', 2, 4), ] for idx, (label, row, col) in enumerate(positions): ax = axes[row, col] if idx < len(rgb_images) and rgb_images[idx] is not None: rgb_data = rgb_images[idx] # Debug: check data range for ALL bands data_min = np.nanmin(rgb_data) data_max = np.nanmax(rgb_data) data_mean = np.nanmean(rgb_data) data_std = np.nanstd(rgb_data) # Check per-band stats r_min, r_max, r_mean = np.nanmin(rgb_data[:,:,0]), np.nanmax(rgb_data[:,:,0]), np.nanmean(rgb_data[:,:,0]) g_min, g_max, g_mean = np.nanmin(rgb_data[:,:,1]), np.nanmax(rgb_data[:,:,1]), np.nanmean(rgb_data[:,:,1]) b_min, b_max, b_mean = np.nanmin(rgb_data[:,:,2]), np.nanmax(rgb_data[:,:,2]), np.nanmean(rgb_data[:,:,2]) print(f" DEBUG VALID {label} ({actual_dates[idx].strftime('%Y-%m-%d')}): RGB overall {data_min:.4f}-{data_max:.4f} (mean={data_mean:.4f}, std={data_std:.4f})") print(f" R: {r_min:.4f}-{r_max:.4f} (μ={r_mean:.4f}), G: {g_min:.4f}-{g_max:.4f} (μ={g_mean:.4f}), B: {b_min:.4f}-{b_max:.4f} (μ={b_mean:.4f})") # Display with explicit vmin/vmax to handle normalized 0-1 data ax.imshow(rgb_data, vmin=0, vmax=1) # Build title: show BOTH anchor offset AND harvest offset if days_offsets[idx] is not None: offset_from_anchor = days_offsets[idx] offset_from_harvest = (actual_dates[idx] - harvest_date).days if idx == 8: # ANCHOR/HARVEST position if offset_from_harvest == 0: offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')})" else: offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')}, {offset_from_harvest:+d}d from predicted)" else: # Show both offsets: from anchor and from harvest offset_str = f"{offset_from_anchor:+d}d from anchor\n{offset_from_harvest:+d}d from harvest\n{actual_dates[idx].strftime('%Y-%m-%d')}" else: offset_str = "No Data" ax.set_title(offset_str, fontsize=9, fontweight='bold') # Add red box around the ANCHOR IMAGE (position 8 is harvest/anchor) if idx == 8: # Position 8 is the anchor for spine in ax.spines.values(): spine.set_edgecolor('red') spine.set_linewidth(4) else: ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12, color='gray') ax.set_title('No Data', fontsize=10) print(f" DEBUG EMPTY {label}: No image data collected") # Add red box for anchor position even if no data if idx == 8: # Position 8 is the anchor for spine in ax.spines.values(): spine.set_edgecolor('red') spine.set_linewidth(4) ax.set_xticks([]) ax.set_yticks([]) plt.tight_layout() # Save figure with detailed naming: field_ID_harvestdate_model_harvestyle.png harvest_date_str = harvest_date.strftime('%Y%m%d') if harvest_type == 'registered': filename = f'field_{field_id}_{harvest_date_str}_registered_harvest_rgb.png' else: # For predicted: include model name and harvest index if multiple if harvest_index is not None and harvest_index > 0: filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest{harvest_index}_rgb.png' else: filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest_rgb.png' output_path = Path(output_dir) / filename try: plt.savefig(output_path, dpi=100, format='png') plt.close() print(f" ✓ Saved: {filename}") return output_path except Exception as e: plt.close() print(f" ✗ Error saving PNG: {e}") return None def generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates, output_dir, tiff_dir, geojson_path): """ Main orchestration function for RGB visualization. Creates 3x3 grids for: 1. Registered harvest dates (if available) 2. Predicted harvest dates (if available) Args: field_data (pd.DataFrame): Field data with Date, CI columns field_id (str): Field identifier registered_harvest_dates (list): List of registered harvest dates (pd.Timestamp) predicted_harvest_dates (list): List of predicted harvest dates (dict or pd.Timestamp) output_dir (Path): Output directory for plots tiff_dir (Path): Directory containing TIFF files geojson_path (Path): Path to pivot.geojson Returns: dict: Summary of generated plots with keys 'registered' and 'predicted' """ if rasterio is None: print(" ⚠ Rasterio not available - skipping RGB visualization") return {'registered': [], 'predicted': []} output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) tiff_dir = Path(tiff_dir) geojson_path = Path(geojson_path) if not tiff_dir.exists(): print(f" ✗ TIFF directory not found: {tiff_dir}") return {'registered': [], 'predicted': []} if not geojson_path.exists(): print(f" ✗ GeoJSON not found: {geojson_path}") return {'registered': [], 'predicted': []} # Load field boundary print(f" Loading field boundary for {field_id}...") feature, field_boundary = load_field_boundaries(geojson_path, field_id) if field_boundary is None: print(f" ✗ Could not load field boundary for {field_id}") return {'registered': [], 'predicted': []} results = {'registered': [], 'predicted': []} # Process registered harvest dates if registered_harvest_dates and len(registered_harvest_dates) > 0: print(f" Processing {len(registered_harvest_dates)} registered harvest dates...") for i, harvest_date in enumerate(registered_harvest_dates): if pd.isna(harvest_date): continue print(f" [{i+1}/{len(registered_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')}") output_path = create_temporal_rgb_grid( harvest_date, field_data, field_id, tiff_dir, field_boundary, title='Registered Harvest Validation', output_dir=output_dir, harvest_type='registered', model_name=None, harvest_index=i ) if output_path: results['registered'].append(output_path) # Process predicted harvest dates - grouped by model if predicted_harvest_dates and len(predicted_harvest_dates) > 0: print(f" Processing {len(predicted_harvest_dates)} predicted harvest dates...") # Group by model to track index per model harvest_by_model = {} for harvest_info in predicted_harvest_dates: # Handle both dict and Timestamp formats if isinstance(harvest_info, dict): harvest_date = harvest_info.get('harvest_date') model_name = harvest_info.get('model_name', 'predicted') else: harvest_date = harvest_info model_name = 'predicted' if model_name not in harvest_by_model: harvest_by_model[model_name] = [] harvest_by_model[model_name].append(harvest_date) # Process each model's harvests overall_index = 1 for model_name, harvest_dates in harvest_by_model.items(): for model_harvest_idx, harvest_date in enumerate(harvest_dates): if pd.isna(harvest_date): continue print(f" [{overall_index}/{len(predicted_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')} ({model_name})") output_path = create_temporal_rgb_grid( harvest_date, field_data, field_id, tiff_dir, field_boundary, title=f'Predicted Harvest Validation ({model_name})', output_dir=output_dir, harvest_type='predicted', model_name=model_name, harvest_index=model_harvest_idx ) if output_path: results['predicted'].append(output_path) overall_index += 1 return results if __name__ == '__main__': # Example usage print("RGB Visualization Tool") print("This module is intended to be imported and called from compare_307_models_production.py") print("\nExample:") print(" from rgb_visualization import generate_rgb_grids") print(" generate_rgb_grids(field_data, field_id, registered_dates, predicted_dates, output_dir, tiff_dir, geojson_path)")