SmartCane/python_app/rgb_visualization.py

792 lines
35 KiB
Python

#!/usr/bin/env python
"""
RGB Visualization Tool for Harvest Date Validation
Creates 3x3 temporal grids showing satellite imagery around registered and predicted harvest dates.
Extracts RGB from 8-band Planet scope data and clips to field boundaries from GeoJSON.
Functions:
- load_field_boundaries(): Load field geometries from GeoJSON
- find_closest_tiff(): Find available TIFF file closest to target date
- load_and_clip_tiff_rgb(): Load TIFF, extract RGB, clip to field boundary
- create_temporal_grid(): Create 3x3 grid (4 pre-harvest, 1 near, 2-3 post-harvest)
- generate_rgb_grids(): Main orchestration function
Usage:
from rgb_visualization import generate_rgb_grids
generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates, output_dir, tiff_dir, geojson_path)
"""
import json
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import matplotlib
matplotlib.use('Agg') # Use non-interactive backend to avoid display hangs
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.colors import Normalize
import warnings
warnings.filterwarnings('ignore')
try:
import rasterio
from rasterio.mask import mask
import shapely.geometry as shgeom
except ImportError:
print("Warning: rasterio not available. RGB visualization will be skipped.")
rasterio = None
def load_field_boundaries(geojson_path, field_id):
"""
Load field boundary from GeoJSON file.
Args:
geojson_path (Path): Path to pivot.geojson
field_id (str): Field identifier (e.g., "13973")
Returns:
dict: GeoJSON feature or None if not found
shapely.geometry.Polygon: Field boundary polygon or None
"""
try:
with open(geojson_path) as f:
geojson_data = json.load(f)
# Match field ID in properties
for feature in geojson_data.get('features', []):
props = feature.get('properties', {})
# Try matching on 'field' or 'sub_field'
if str(props.get('field', '')) == str(field_id) or \
str(props.get('sub_field', '')) == str(field_id):
geometry = feature.get('geometry')
if geometry:
geom_type = geometry.get('type', '')
coordinates = geometry.get('coordinates', [])
# Handle MultiPolygon: coordinates[i] = [[[ring coords]], [[inner ring coords]], ...]
if geom_type == 'MultiPolygon':
# Use the first polygon from the multipolygon
if coordinates and len(coordinates) > 0:
coords = coordinates[0][0] # First polygon's exterior ring
polygon = shgeom.Polygon(coords)
return feature, polygon
# Handle Polygon: coordinates = [[[ring coords]], [[inner ring coords]], ...]
elif geom_type == 'Polygon':
if coordinates and len(coordinates) > 0:
coords = coordinates[0] # Exterior ring
polygon = shgeom.Polygon(coords)
return feature, polygon
print(f" ⚠ Field {field_id} not found in GeoJSON")
return None, None
except Exception as e:
print(f" ✗ Error loading GeoJSON: {e}")
return None, None
def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=None, debug=False):
"""
Find tile files with actual data (not cloud-masked) for target_date or nearest date.
Searches by increasing distance from target date until finding tiles with data.
Avoids reusing dates in exclude_dates to ensure temporal diversity in grids.
Args:
target_date (pd.Timestamp): Target date to find tiles near
tiff_dir (Path): Directory containing 5x5 date subdirectories
field_boundary (shapely.Polygon): Field boundary for overlap detection
days_window (int): Max days to search before/after target
exclude_dates (list): List of dates to skip (avoid repetition)
debug (bool): Enable detailed debugging output
Returns:
tuple: (list of tile paths, actual_date, days_diff)
"""
target_date = pd.Timestamp(target_date)
tiff_dir = Path(tiff_dir)
exclude_dates = exclude_dates or []
exclude_dates = [pd.Timestamp(d) for d in exclude_dates]
if not tiff_dir.exists():
if debug:
print(f" [DEBUG] TIFF dir does not exist: {tiff_dir}")
return [], None, None
# Build map of all available dates
available_dates = {}
date_parse_errors = 0
for date_dir in tiff_dir.iterdir():
if not date_dir.is_dir():
continue
try:
dir_name = date_dir.name
date_str = dir_name.split('_')[0]
tile_date = pd.Timestamp(date_str)
tile_files = []
for tile_file in date_dir.glob('*.tif'):
# Include ALL tiles, regardless of size
# Some tiles may be small but still contain valid data for specific fields
tile_files.append(tile_file)
if tile_files:
available_dates[tile_date] = (tile_files, dir_name)
except Exception as e:
date_parse_errors += 1
if debug:
print(f" [DEBUG] Failed to parse date from {date_dir.name}: {e}")
if debug:
print(f" [DEBUG] Found {len(available_dates)} dates with tile files, {date_parse_errors} parse errors")
print(f" [DEBUG] Date range: {min(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'} to {max(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'}")
if not available_dates:
return [], None, None
# Search dates by increasing distance from target, looking for tiles with actual data
sorted_dates = sorted(available_dates.keys(), key=lambda d: abs((d - target_date).days))
for search_date in sorted_dates:
# Skip if this date was recently used (avoid temporal repetition)
if search_date in exclude_dates:
continue
tiles, dir_name = available_dates[search_date]
days_diff = (search_date - target_date).days
# Try to find overlapping tiles at this date
overlapping_tiles = []
tile_check_errors = 0
for tile_path in tiles:
try:
with rasterio.open(tile_path) as src:
tile_bounds = src.bounds
tile_geom = shgeom.box(*tile_bounds)
# Debug first tile
if debug and len(overlapping_tiles) == 0 and tile_check_errors == 0:
print(f" [DEBUG] First tile check for {tile_path.name}:")
print(f" Tile bounds: {tile_bounds}")
print(f" Tile CRS: {src.crs}")
print(f" Field bounds: {field_boundary.bounds}")
print(f" Field geom type: {field_boundary.geom_type}")
print(f" Intersects: {tile_geom.intersects(field_boundary)}")
if tile_geom.intersects(field_boundary):
overlapping_tiles.append(tile_path)
except Exception as e:
tile_check_errors += 1
if debug:
print(f" [DEBUG] Error checking tile {tile_path.name}: {e}")
if debug:
print(f" [DEBUG] Date {search_date.strftime('%Y-%m-%d')}: {len(tiles)} tiles, {len(overlapping_tiles)} overlap field, {tile_check_errors} errors")
if overlapping_tiles:
# Found overlapping tiles, return them
print(f" [FIND_TILES] Target: {target_date.strftime('%Y-%m-%d')}, Using: {search_date.strftime('%Y-%m-%d')} ({days_diff:+d}d), Tiles: {[Path(t).name for t in overlapping_tiles]}")
return overlapping_tiles, search_date, days_diff
# No overlapping tiles found at all
if debug:
print(f" [DEBUG] No overlapping tiles found for {target_date.strftime('%Y-%m-%d')} within {len(sorted_dates)} searched dates")
return [], None, None
def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
"""
Load TIFF and extract RGB bands clipped to field boundary.
For merged_final_tif files (cloud-masked and filtered):
- Band 1: Red
- Band 2: Green
- Band 3: Blue
- Band 4: NIR
- Band 5: CI
Args:
tiff_path (Path): Path to TIFF file
field_boundary (shapely.Polygon): Field boundary for clipping
rgb_bands (tuple): Band indices for RGB (1-indexed, defaults to 1,2,3 for merged_final_tif)
Returns:
np.ndarray: RGB data (height, width, 3) with values 0-1
or None if error occurs
"""
if rasterio is None or field_boundary is None:
return None
try:
with rasterio.open(tiff_path) as src:
# Check band count
if src.count < 3:
return None
# For merged_final_tif: bands 1,2,3 are R,G,B
bands_to_read = (1, 2, 3)
# Mask and read bands - extract ONLY the specific field polygon
geom = shgeom.mapping(field_boundary)
try:
masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read))
rgb = np.stack([masked_data[i] for i in range(3)], axis=-1)
except (ValueError, RuntimeError) as e:
# Mask failed - field doesn't overlap this tile or geometry issue
print(f" MASK ERROR on {Path(tiff_path).name}: {str(e)[:50]}")
return None
# Convert to float32 if not already
rgb = rgb.astype(np.float32)
# Normalize to 0-1 range
# Data appears to be 8-bit (0-255 range) stored as float32
# Check actual max value to determine normalization
max_val = np.nanmax(rgb)
if max_val > 0:
# If max is around 255 or less, assume 8-bit
if max_val <= 255:
rgb = rgb / 255.0
# If max is around 65535, assume 16-bit
elif max_val <= 65535:
rgb = rgb / 65535.0
# Otherwise divide by max to normalize
else:
rgb = rgb / max_val
rgb = np.clip(rgb, 0, 1)
# Check if result is all NaN
if np.all(np.isnan(rgb)):
return None
# Replace any remaining NaN with 0 (cloud/invalid pixels)
rgb = np.nan_to_num(rgb, nan=0.0)
return rgb
except Exception as e:
return None
def load_and_composite_tiles_rgb(tile_paths, field_boundary):
"""
Load RGB from multiple overlapping tiles and composite them into a single image.
Args:
tile_paths (list[Path]): List of tile file paths
field_boundary (shapely.Polygon): Field boundary for clipping
Returns:
np.ndarray: Composited RGB data (height, width, 3) with values 0-1
or None if error occurs
"""
if rasterio is None or field_boundary is None or not tile_paths:
return None
try:
# Load and composite all tiles
rgb_arrays = []
for tile_path in tile_paths:
rgb = load_and_clip_tiff_rgb(tile_path, field_boundary)
if rgb is not None:
rgb_arrays.append(rgb)
if not rgb_arrays:
return None
# If single tile, return it
if len(rgb_arrays) == 1:
composited = rgb_arrays[0]
else:
# If multiple tiles, need to handle different shapes
# Find common shape or use max/min approach that handles variable sizes
try:
# Try to stack if same shape
stacked = np.stack(rgb_arrays, axis=0)
composited = np.max(stacked, axis=0)
except ValueError:
# Different shapes - use the largest (most complete) tile
# This happens when tiles are masked to different field areas
composited = max(rgb_arrays, key=lambda x: x.size)
composited = composited.astype(np.float32)
# Stretch contrast: normalize to 0-1 range based on actual min/max in the data
# This makes dim images visible
valid_data = composited[composited > 0]
if len(valid_data) > 0:
data_min = np.percentile(valid_data, 2) # 2nd percentile to handle outliers
data_max = np.percentile(valid_data, 98) # 98th percentile
if data_max > data_min:
composited = (composited - data_min) / (data_max - data_min)
composited = np.clip(composited, 0, 1)
return composited.astype(np.float32)
except Exception as e:
return None
def has_valid_rgb_data(rgb_data, threshold=0.05):
"""
Check if RGB image has actual data (not black/empty).
Returns True if max value > threshold (not all zeros/black).
"""
if rgb_data is None:
return False
try:
# Check if there's any variation in the RGB data
data_max = np.nanmax(rgb_data)
data_min = np.nanmin(rgb_data)
# Image is valid if max > threshold AND there's variation
has_data = data_max > threshold and (data_max - data_min) > 0.01
return has_data
except:
return False
def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field_boundary,
title, output_dir, harvest_type='registered', model_name=None, harvest_index=None):
"""
Create 5x3 temporal grid around harvest date (15 images, 7-day intervals).
Layout:
Row 1: T-56d, T-42d, T-35d, T-28d, T-21d (pre-harvest)
Row 2: T-14d, T-7d, T~0d, T+7d, T+14d (near harvest)
Row 3: T+21d, T+28d, T+35d, T+42d, T+56d (post-harvest progression)
Args:
harvest_date (pd.Timestamp): Target harvest date
field_data (pd.DataFrame): Field data with Date column
field_id (str): Field identifier
tiff_dir (Path): Directory with TIFF files
field_boundary (shapely.Polygon): Field boundary
title (str): Plot title
output_dir (Path): Output directory
harvest_type (str): 'registered' or 'predicted'
model_name (str): Model name for predicted harvests (e.g., 'Original', 'Long-Season')
harvest_index (int): Index of harvest within same model (for multiple harvests)
Returns:
Path: Path to saved PNG or None if failed
"""
harvest_date = pd.Timestamp(harvest_date)
# Pre-allocate 15 image slots
rgb_images = [None] * 15
days_offsets = [None] * 15
actual_dates = [None] * 15
used_dates = set() # Use set for efficient lookups
# STEP 0: Debug - List all available dates
print(f" [STEP 0] Checking available TIFF dates in {tiff_dir}...")
available_dates = []
if tiff_dir.exists():
for date_folder in sorted(tiff_dir.iterdir()):
if date_folder.is_dir():
try:
date_obj = datetime.strptime(date_folder.name, '%Y-%m-%d').date()
available_dates.append(date_obj)
except:
pass
print(f" Found {len(available_dates)} dates with data: {available_dates[:5]}... (showing first 5)")
# STEP 1: Find anchor image (closest to predicted harvest date) FIRST
# Search within ±14 days of predicted harvest date first, then expand if needed
print(f" [STEP 1] Finding anchor (closest to harvest {harvest_date.strftime('%Y-%m-%d')}, searching ±14 days)...")
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
harvest_date, tiff_dir, field_boundary, days_window=14, exclude_dates=[], debug=False
)
anchor_rgb = None
anchor_idx = 8 # Position 8 is the center (T~0d / harvest date position)
failed_anchor_dates = [] # Track dates that failed validation
if anchor_tile_paths and anchor_date:
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
rgb_images[anchor_idx] = anchor_rgb
days_offsets[anchor_idx] = 0 # Anchor is reference point
actual_dates[anchor_idx] = anchor_date
used_dates.add(anchor_date)
print(f" ✓ ANCHOR FOUND (±14d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
else:
failed_anchor_dates.append(anchor_date)
print(f" ⚠ Found date {anchor_date.strftime('%Y-%m-%d')} within ±14d, but image has no valid data")
print(f" [RETRY] Expanding anchor search to ±60 days (excluding failed dates)...")
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=set(failed_anchor_dates), debug=False
)
if anchor_tile_paths and anchor_date:
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
rgb_images[anchor_idx] = anchor_rgb
days_offsets[anchor_idx] = 0 # Anchor is reference point
actual_dates[anchor_idx] = anchor_date
used_dates.add(anchor_date)
print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
else:
failed_anchor_dates.append(anchor_date)
print(f" ✗ No valid anchor found even within ±60 days")
else:
print(f" ✗ No tiles found for any date within ±60 days")
else:
print(f" ⚠ No tiles found within ±14 days, expanding search...")
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=[], debug=False
)
if anchor_tile_paths and anchor_date:
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
rgb_images[anchor_idx] = anchor_rgb
days_offsets[anchor_idx] = 0 # Anchor is reference point
actual_dates[anchor_idx] = anchor_date
used_dates.add(anchor_date)
print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
else:
print(f" ✗ No valid anchor found even within ±60 days")
else:
print(f" ✗ No tiles found for any date within ±60 days")
# STEP 2: Dynamically collect images BEFORE anchor date
# Strategy: Go backwards from anchor with progressively larger search windows
# Start at 7 days, then try 10, 15, 20, 30+ days apart
print(f" [STEP 2] Collecting images BEFORE anchor (going backwards, flexible spacing)...")
before_positions = [7, 6, 5, 4, 3, 2, 1, 0] # Will fill in reverse order (7→0)
before_images = [] # (position, date, rgb, offset)
pos_idx = 0 # Index into before_positions
last_found_date = anchor_date
# Progressive search offsets: try these day offsets in order
search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days before last found image
while pos_idx < len(before_positions) and last_found_date.year >= 2024:
found_this_iteration = False
# Try each offset until we find a valid image
for days_offset in search_offsets:
search_target_date = last_found_date - timedelta(days=days_offset)
tile_paths, actual_date, days_diff = find_overlapping_tiles(
search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
)
if tile_paths and actual_date:
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
if rgb is not None and has_valid_rgb_data(rgb):
# Found valid image!
overall_max = np.nanmax(rgb)
overall_min = np.nanmin(rgb)
offset_from_anchor = (actual_date - anchor_date).days
before_images.append((before_positions[pos_idx], actual_date, rgb, offset_from_anchor))
used_dates.add(actual_date)
last_found_date = actual_date # Move backwards from this date
print(f" ✓ Before[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
pos_idx += 1
found_this_iteration = True
break # Found one, stop trying larger offsets
# If nothing found with any offset, we're done collecting before images
if not found_this_iteration:
break
# Store collected before images
for pos, actual_date, rgb, offset in before_images:
rgb_images[pos] = rgb
actual_dates[pos] = actual_date
days_offsets[pos] = offset
# STEP 3: Dynamically collect images AFTER anchor date
# Strategy: Go forwards from anchor with progressively larger search windows
# Start at 7 days, then try 10, 15, 20, 30+ days apart
print(f" [STEP 3] Collecting images AFTER anchor (going forwards, flexible spacing)...")
after_positions = [9, 10, 11, 12, 13, 14] # Will fill in order (9→14)
after_images = [] # (position, date, rgb, offset)
pos_idx = 0 # Index into after_positions
last_found_date = anchor_date
max_search_date = anchor_date + timedelta(days=200) # Don't search beyond 200 days forward
# Progressive search offsets: try these day offsets in order
search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days after last found image
while pos_idx < len(after_positions) and last_found_date < max_search_date:
found_this_iteration = False
# Try each offset until we find a valid image
for days_offset in search_offsets:
search_target_date = last_found_date + timedelta(days=days_offset)
# Don't search beyond max date
if search_target_date > max_search_date:
break
tile_paths, actual_date, days_diff = find_overlapping_tiles(
search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
)
if tile_paths and actual_date:
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
if rgb is not None and has_valid_rgb_data(rgb):
# Found valid image!
overall_max = np.nanmax(rgb)
overall_min = np.nanmin(rgb)
offset_from_anchor = (actual_date - anchor_date).days
after_images.append((after_positions[pos_idx], actual_date, rgb, offset_from_anchor))
used_dates.add(actual_date)
last_found_date = actual_date # Move forwards from this date
print(f" ✓ After[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
pos_idx += 1
found_this_iteration = True
break # Found one, stop trying larger offsets
# If nothing found with any offset, we're done collecting after images
if not found_this_iteration:
break
# Store collected after images
for pos, actual_date, rgb, offset in after_images:
rgb_images[pos] = rgb
actual_dates[pos] = actual_date
days_offsets[pos] = offset
# Create 5x3 grid plot (15 images)
fig, axes = plt.subplots(3, 5, figsize=(25, 15))
# Build title with anchor offset information
anchor_offset_from_harvest = (actual_dates[8] - harvest_date).days if actual_dates[8] is not None else None
if anchor_offset_from_harvest is not None and anchor_offset_from_harvest != 0:
anchor_info = f"(Anchor: {actual_dates[8].strftime('%Y-%m-%d')}, {anchor_offset_from_harvest:+d}d from predicted harvest)"
else:
anchor_info = f"(Exact match with anchor: {actual_dates[8].strftime('%Y-%m-%d')})" if actual_dates[8] is not None else ""
fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")} {anchor_info}',
fontsize=16, fontweight='bold')
# Grid positions (5 columns, 3 rows = 15 images)
positions = [
('T-56d', 0, 0), ('T-49d', 0, 1), ('T-42d', 0, 2), ('T-35d', 0, 3), ('T-28d', 0, 4),
('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('HARVEST', 1, 3), ('T+7d', 1, 4),
('T+14d', 2, 0), ('T+21d', 2, 1), ('T+28d', 2, 2), ('T+35d', 2, 3), ('T+42d', 2, 4),
]
for idx, (label, row, col) in enumerate(positions):
ax = axes[row, col]
if idx < len(rgb_images) and rgb_images[idx] is not None:
rgb_data = rgb_images[idx]
# Debug: check data range for ALL bands
data_min = np.nanmin(rgb_data)
data_max = np.nanmax(rgb_data)
data_mean = np.nanmean(rgb_data)
data_std = np.nanstd(rgb_data)
# Check per-band stats
r_min, r_max, r_mean = np.nanmin(rgb_data[:,:,0]), np.nanmax(rgb_data[:,:,0]), np.nanmean(rgb_data[:,:,0])
g_min, g_max, g_mean = np.nanmin(rgb_data[:,:,1]), np.nanmax(rgb_data[:,:,1]), np.nanmean(rgb_data[:,:,1])
b_min, b_max, b_mean = np.nanmin(rgb_data[:,:,2]), np.nanmax(rgb_data[:,:,2]), np.nanmean(rgb_data[:,:,2])
print(f" DEBUG VALID {label} ({actual_dates[idx].strftime('%Y-%m-%d')}): RGB overall {data_min:.4f}-{data_max:.4f} (mean={data_mean:.4f}, std={data_std:.4f})")
print(f" R: {r_min:.4f}-{r_max:.4f} (μ={r_mean:.4f}), G: {g_min:.4f}-{g_max:.4f} (μ={g_mean:.4f}), B: {b_min:.4f}-{b_max:.4f} (μ={b_mean:.4f})")
# Display with explicit vmin/vmax to handle normalized 0-1 data
ax.imshow(rgb_data, vmin=0, vmax=1)
# Build title: show BOTH anchor offset AND harvest offset
if days_offsets[idx] is not None:
offset_from_anchor = days_offsets[idx]
offset_from_harvest = (actual_dates[idx] - harvest_date).days
if idx == 8: # ANCHOR/HARVEST position
if offset_from_harvest == 0:
offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')})"
else:
offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')}, {offset_from_harvest:+d}d from predicted)"
else:
# Show both offsets: from anchor and from harvest
offset_str = f"{offset_from_anchor:+d}d from anchor\n{offset_from_harvest:+d}d from harvest\n{actual_dates[idx].strftime('%Y-%m-%d')}"
else:
offset_str = "No Data"
ax.set_title(offset_str, fontsize=9, fontweight='bold')
# Add red box around the ANCHOR IMAGE (position 8 is harvest/anchor)
if idx == 8: # Position 8 is the anchor
for spine in ax.spines.values():
spine.set_edgecolor('red')
spine.set_linewidth(4)
else:
ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12, color='gray')
ax.set_title('No Data', fontsize=10)
print(f" DEBUG EMPTY {label}: No image data collected")
# Add red box for anchor position even if no data
if idx == 8: # Position 8 is the anchor
for spine in ax.spines.values():
spine.set_edgecolor('red')
spine.set_linewidth(4)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
# Save figure with detailed naming: field_ID_harvestdate_model_harvestyle.png
harvest_date_str = harvest_date.strftime('%Y%m%d')
if harvest_type == 'registered':
filename = f'field_{field_id}_{harvest_date_str}_registered_harvest_rgb.png'
else:
# For predicted: include model name and harvest index if multiple
if harvest_index is not None and harvest_index > 0:
filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest{harvest_index}_rgb.png'
else:
filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest_rgb.png'
output_path = Path(output_dir) / filename
try:
plt.savefig(output_path, dpi=100, format='png')
plt.close()
print(f" ✓ Saved: {filename}")
return output_path
except Exception as e:
plt.close()
print(f" ✗ Error saving PNG: {e}")
return None
def generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates,
output_dir, tiff_dir, geojson_path):
"""
Main orchestration function for RGB visualization.
Creates 3x3 grids for:
1. Registered harvest dates (if available)
2. Predicted harvest dates (if available)
Args:
field_data (pd.DataFrame): Field data with Date, CI columns
field_id (str): Field identifier
registered_harvest_dates (list): List of registered harvest dates (pd.Timestamp)
predicted_harvest_dates (list): List of predicted harvest dates (dict or pd.Timestamp)
output_dir (Path): Output directory for plots
tiff_dir (Path): Directory containing TIFF files
geojson_path (Path): Path to pivot.geojson
Returns:
dict: Summary of generated plots with keys 'registered' and 'predicted'
"""
if rasterio is None:
print(" ⚠ Rasterio not available - skipping RGB visualization")
return {'registered': [], 'predicted': []}
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
tiff_dir = Path(tiff_dir)
geojson_path = Path(geojson_path)
if not tiff_dir.exists():
print(f" ✗ TIFF directory not found: {tiff_dir}")
return {'registered': [], 'predicted': []}
if not geojson_path.exists():
print(f" ✗ GeoJSON not found: {geojson_path}")
return {'registered': [], 'predicted': []}
# Load field boundary
print(f" Loading field boundary for {field_id}...")
feature, field_boundary = load_field_boundaries(geojson_path, field_id)
if field_boundary is None:
print(f" ✗ Could not load field boundary for {field_id}")
return {'registered': [], 'predicted': []}
results = {'registered': [], 'predicted': []}
# Process registered harvest dates
if registered_harvest_dates and len(registered_harvest_dates) > 0:
print(f" Processing {len(registered_harvest_dates)} registered harvest dates...")
for i, harvest_date in enumerate(registered_harvest_dates):
if pd.isna(harvest_date):
continue
print(f" [{i+1}/{len(registered_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')}")
output_path = create_temporal_rgb_grid(
harvest_date, field_data, field_id, tiff_dir, field_boundary,
title='Registered Harvest Validation',
output_dir=output_dir,
harvest_type='registered',
model_name=None,
harvest_index=i
)
if output_path:
results['registered'].append(output_path)
# Process predicted harvest dates - grouped by model
if predicted_harvest_dates and len(predicted_harvest_dates) > 0:
print(f" Processing {len(predicted_harvest_dates)} predicted harvest dates...")
# Group by model to track index per model
harvest_by_model = {}
for harvest_info in predicted_harvest_dates:
# Handle both dict and Timestamp formats
if isinstance(harvest_info, dict):
harvest_date = harvest_info.get('harvest_date')
model_name = harvest_info.get('model_name', 'predicted')
else:
harvest_date = harvest_info
model_name = 'predicted'
if model_name not in harvest_by_model:
harvest_by_model[model_name] = []
harvest_by_model[model_name].append(harvest_date)
# Process each model's harvests
overall_index = 1
for model_name, harvest_dates in harvest_by_model.items():
for model_harvest_idx, harvest_date in enumerate(harvest_dates):
if pd.isna(harvest_date):
continue
print(f" [{overall_index}/{len(predicted_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')} ({model_name})")
output_path = create_temporal_rgb_grid(
harvest_date, field_data, field_id, tiff_dir, field_boundary,
title=f'Predicted Harvest Validation ({model_name})',
output_dir=output_dir,
harvest_type='predicted',
model_name=model_name,
harvest_index=model_harvest_idx
)
if output_path:
results['predicted'].append(output_path)
overall_index += 1
return results
if __name__ == '__main__':
# Example usage
print("RGB Visualization Tool")
print("This module is intended to be imported and called from compare_307_models_production.py")
print("\nExample:")
print(" from rgb_visualization import generate_rgb_grids")
print(" generate_rgb_grids(field_data, field_id, registered_dates, predicted_dates, output_dir, tiff_dir, geojson_path)")