792 lines
35 KiB
Python
792 lines
35 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
RGB Visualization Tool for Harvest Date Validation
|
|
|
|
Creates 3x3 temporal grids showing satellite imagery around registered and predicted harvest dates.
|
|
Extracts RGB from 8-band Planet scope data and clips to field boundaries from GeoJSON.
|
|
|
|
Functions:
|
|
- load_field_boundaries(): Load field geometries from GeoJSON
|
|
- find_closest_tiff(): Find available TIFF file closest to target date
|
|
- load_and_clip_tiff_rgb(): Load TIFF, extract RGB, clip to field boundary
|
|
- create_temporal_grid(): Create 3x3 grid (4 pre-harvest, 1 near, 2-3 post-harvest)
|
|
- generate_rgb_grids(): Main orchestration function
|
|
|
|
Usage:
|
|
from rgb_visualization import generate_rgb_grids
|
|
generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates, output_dir, tiff_dir, geojson_path)
|
|
"""
|
|
|
|
import json
|
|
import numpy as np
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
import matplotlib
|
|
matplotlib.use('Agg') # Use non-interactive backend to avoid display hangs
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.patches as patches
|
|
from matplotlib.colors import Normalize
|
|
|
|
import warnings
|
|
warnings.filterwarnings('ignore')
|
|
|
|
try:
|
|
import rasterio
|
|
from rasterio.mask import mask
|
|
import shapely.geometry as shgeom
|
|
except ImportError:
|
|
print("Warning: rasterio not available. RGB visualization will be skipped.")
|
|
rasterio = None
|
|
|
|
|
|
def load_field_boundaries(geojson_path, field_id):
|
|
"""
|
|
Load field boundary from GeoJSON file.
|
|
|
|
Args:
|
|
geojson_path (Path): Path to pivot.geojson
|
|
field_id (str): Field identifier (e.g., "13973")
|
|
|
|
Returns:
|
|
dict: GeoJSON feature or None if not found
|
|
shapely.geometry.Polygon: Field boundary polygon or None
|
|
"""
|
|
try:
|
|
with open(geojson_path) as f:
|
|
geojson_data = json.load(f)
|
|
|
|
# Match field ID in properties
|
|
for feature in geojson_data.get('features', []):
|
|
props = feature.get('properties', {})
|
|
# Try matching on 'field' or 'sub_field'
|
|
if str(props.get('field', '')) == str(field_id) or \
|
|
str(props.get('sub_field', '')) == str(field_id):
|
|
geometry = feature.get('geometry')
|
|
if geometry:
|
|
geom_type = geometry.get('type', '')
|
|
coordinates = geometry.get('coordinates', [])
|
|
|
|
# Handle MultiPolygon: coordinates[i] = [[[ring coords]], [[inner ring coords]], ...]
|
|
if geom_type == 'MultiPolygon':
|
|
# Use the first polygon from the multipolygon
|
|
if coordinates and len(coordinates) > 0:
|
|
coords = coordinates[0][0] # First polygon's exterior ring
|
|
polygon = shgeom.Polygon(coords)
|
|
return feature, polygon
|
|
# Handle Polygon: coordinates = [[[ring coords]], [[inner ring coords]], ...]
|
|
elif geom_type == 'Polygon':
|
|
if coordinates and len(coordinates) > 0:
|
|
coords = coordinates[0] # Exterior ring
|
|
polygon = shgeom.Polygon(coords)
|
|
return feature, polygon
|
|
|
|
print(f" ⚠ Field {field_id} not found in GeoJSON")
|
|
return None, None
|
|
except Exception as e:
|
|
print(f" ✗ Error loading GeoJSON: {e}")
|
|
return None, None
|
|
|
|
|
|
def find_overlapping_tiles(target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=None, debug=False):
|
|
"""
|
|
Find tile files with actual data (not cloud-masked) for target_date or nearest date.
|
|
|
|
Searches by increasing distance from target date until finding tiles with data.
|
|
Avoids reusing dates in exclude_dates to ensure temporal diversity in grids.
|
|
|
|
Args:
|
|
target_date (pd.Timestamp): Target date to find tiles near
|
|
tiff_dir (Path): Directory containing 5x5 date subdirectories
|
|
field_boundary (shapely.Polygon): Field boundary for overlap detection
|
|
days_window (int): Max days to search before/after target
|
|
exclude_dates (list): List of dates to skip (avoid repetition)
|
|
debug (bool): Enable detailed debugging output
|
|
|
|
Returns:
|
|
tuple: (list of tile paths, actual_date, days_diff)
|
|
"""
|
|
target_date = pd.Timestamp(target_date)
|
|
tiff_dir = Path(tiff_dir)
|
|
exclude_dates = exclude_dates or []
|
|
exclude_dates = [pd.Timestamp(d) for d in exclude_dates]
|
|
|
|
if not tiff_dir.exists():
|
|
if debug:
|
|
print(f" [DEBUG] TIFF dir does not exist: {tiff_dir}")
|
|
return [], None, None
|
|
|
|
# Build map of all available dates
|
|
available_dates = {}
|
|
date_parse_errors = 0
|
|
|
|
for date_dir in tiff_dir.iterdir():
|
|
if not date_dir.is_dir():
|
|
continue
|
|
try:
|
|
dir_name = date_dir.name
|
|
date_str = dir_name.split('_')[0]
|
|
tile_date = pd.Timestamp(date_str)
|
|
|
|
tile_files = []
|
|
for tile_file in date_dir.glob('*.tif'):
|
|
# Include ALL tiles, regardless of size
|
|
# Some tiles may be small but still contain valid data for specific fields
|
|
tile_files.append(tile_file)
|
|
|
|
if tile_files:
|
|
available_dates[tile_date] = (tile_files, dir_name)
|
|
except Exception as e:
|
|
date_parse_errors += 1
|
|
if debug:
|
|
print(f" [DEBUG] Failed to parse date from {date_dir.name}: {e}")
|
|
|
|
if debug:
|
|
print(f" [DEBUG] Found {len(available_dates)} dates with tile files, {date_parse_errors} parse errors")
|
|
print(f" [DEBUG] Date range: {min(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'} to {max(available_dates.keys()).strftime('%Y-%m-%d') if available_dates else 'N/A'}")
|
|
|
|
if not available_dates:
|
|
return [], None, None
|
|
|
|
# Search dates by increasing distance from target, looking for tiles with actual data
|
|
sorted_dates = sorted(available_dates.keys(), key=lambda d: abs((d - target_date).days))
|
|
|
|
for search_date in sorted_dates:
|
|
# Skip if this date was recently used (avoid temporal repetition)
|
|
if search_date in exclude_dates:
|
|
continue
|
|
|
|
tiles, dir_name = available_dates[search_date]
|
|
days_diff = (search_date - target_date).days
|
|
|
|
# Try to find overlapping tiles at this date
|
|
overlapping_tiles = []
|
|
tile_check_errors = 0
|
|
|
|
for tile_path in tiles:
|
|
try:
|
|
with rasterio.open(tile_path) as src:
|
|
tile_bounds = src.bounds
|
|
tile_geom = shgeom.box(*tile_bounds)
|
|
|
|
# Debug first tile
|
|
if debug and len(overlapping_tiles) == 0 and tile_check_errors == 0:
|
|
print(f" [DEBUG] First tile check for {tile_path.name}:")
|
|
print(f" Tile bounds: {tile_bounds}")
|
|
print(f" Tile CRS: {src.crs}")
|
|
print(f" Field bounds: {field_boundary.bounds}")
|
|
print(f" Field geom type: {field_boundary.geom_type}")
|
|
print(f" Intersects: {tile_geom.intersects(field_boundary)}")
|
|
|
|
if tile_geom.intersects(field_boundary):
|
|
overlapping_tiles.append(tile_path)
|
|
except Exception as e:
|
|
tile_check_errors += 1
|
|
if debug:
|
|
print(f" [DEBUG] Error checking tile {tile_path.name}: {e}")
|
|
|
|
if debug:
|
|
print(f" [DEBUG] Date {search_date.strftime('%Y-%m-%d')}: {len(tiles)} tiles, {len(overlapping_tiles)} overlap field, {tile_check_errors} errors")
|
|
|
|
if overlapping_tiles:
|
|
# Found overlapping tiles, return them
|
|
print(f" [FIND_TILES] Target: {target_date.strftime('%Y-%m-%d')}, Using: {search_date.strftime('%Y-%m-%d')} ({days_diff:+d}d), Tiles: {[Path(t).name for t in overlapping_tiles]}")
|
|
return overlapping_tiles, search_date, days_diff
|
|
|
|
# No overlapping tiles found at all
|
|
if debug:
|
|
print(f" [DEBUG] No overlapping tiles found for {target_date.strftime('%Y-%m-%d')} within {len(sorted_dates)} searched dates")
|
|
|
|
return [], None, None
|
|
|
|
|
|
def load_and_clip_tiff_rgb(tiff_path, field_boundary, rgb_bands=(1, 2, 3)):
|
|
"""
|
|
Load TIFF and extract RGB bands clipped to field boundary.
|
|
|
|
For merged_final_tif files (cloud-masked and filtered):
|
|
- Band 1: Red
|
|
- Band 2: Green
|
|
- Band 3: Blue
|
|
- Band 4: NIR
|
|
- Band 5: CI
|
|
|
|
Args:
|
|
tiff_path (Path): Path to TIFF file
|
|
field_boundary (shapely.Polygon): Field boundary for clipping
|
|
rgb_bands (tuple): Band indices for RGB (1-indexed, defaults to 1,2,3 for merged_final_tif)
|
|
|
|
Returns:
|
|
np.ndarray: RGB data (height, width, 3) with values 0-1
|
|
or None if error occurs
|
|
"""
|
|
if rasterio is None or field_boundary is None:
|
|
return None
|
|
|
|
try:
|
|
with rasterio.open(tiff_path) as src:
|
|
# Check band count
|
|
if src.count < 3:
|
|
return None
|
|
|
|
# For merged_final_tif: bands 1,2,3 are R,G,B
|
|
bands_to_read = (1, 2, 3)
|
|
|
|
# Mask and read bands - extract ONLY the specific field polygon
|
|
geom = shgeom.mapping(field_boundary)
|
|
try:
|
|
masked_data, _ = mask(src, [geom], crop=True, indexes=list(bands_to_read))
|
|
rgb = np.stack([masked_data[i] for i in range(3)], axis=-1)
|
|
except (ValueError, RuntimeError) as e:
|
|
# Mask failed - field doesn't overlap this tile or geometry issue
|
|
print(f" MASK ERROR on {Path(tiff_path).name}: {str(e)[:50]}")
|
|
return None
|
|
|
|
# Convert to float32 if not already
|
|
rgb = rgb.astype(np.float32)
|
|
|
|
# Normalize to 0-1 range
|
|
# Data appears to be 8-bit (0-255 range) stored as float32
|
|
# Check actual max value to determine normalization
|
|
max_val = np.nanmax(rgb)
|
|
if max_val > 0:
|
|
# If max is around 255 or less, assume 8-bit
|
|
if max_val <= 255:
|
|
rgb = rgb / 255.0
|
|
# If max is around 65535, assume 16-bit
|
|
elif max_val <= 65535:
|
|
rgb = rgb / 65535.0
|
|
# Otherwise divide by max to normalize
|
|
else:
|
|
rgb = rgb / max_val
|
|
|
|
rgb = np.clip(rgb, 0, 1)
|
|
|
|
# Check if result is all NaN
|
|
if np.all(np.isnan(rgb)):
|
|
return None
|
|
|
|
# Replace any remaining NaN with 0 (cloud/invalid pixels)
|
|
rgb = np.nan_to_num(rgb, nan=0.0)
|
|
|
|
return rgb
|
|
except Exception as e:
|
|
return None
|
|
|
|
|
|
def load_and_composite_tiles_rgb(tile_paths, field_boundary):
|
|
"""
|
|
Load RGB from multiple overlapping tiles and composite them into a single image.
|
|
|
|
Args:
|
|
tile_paths (list[Path]): List of tile file paths
|
|
field_boundary (shapely.Polygon): Field boundary for clipping
|
|
|
|
Returns:
|
|
np.ndarray: Composited RGB data (height, width, 3) with values 0-1
|
|
or None if error occurs
|
|
"""
|
|
if rasterio is None or field_boundary is None or not tile_paths:
|
|
return None
|
|
|
|
try:
|
|
# Load and composite all tiles
|
|
rgb_arrays = []
|
|
|
|
for tile_path in tile_paths:
|
|
rgb = load_and_clip_tiff_rgb(tile_path, field_boundary)
|
|
if rgb is not None:
|
|
rgb_arrays.append(rgb)
|
|
|
|
if not rgb_arrays:
|
|
return None
|
|
|
|
# If single tile, return it
|
|
if len(rgb_arrays) == 1:
|
|
composited = rgb_arrays[0]
|
|
else:
|
|
# If multiple tiles, need to handle different shapes
|
|
# Find common shape or use max/min approach that handles variable sizes
|
|
try:
|
|
# Try to stack if same shape
|
|
stacked = np.stack(rgb_arrays, axis=0)
|
|
composited = np.max(stacked, axis=0)
|
|
except ValueError:
|
|
# Different shapes - use the largest (most complete) tile
|
|
# This happens when tiles are masked to different field areas
|
|
composited = max(rgb_arrays, key=lambda x: x.size)
|
|
|
|
composited = composited.astype(np.float32)
|
|
|
|
# Stretch contrast: normalize to 0-1 range based on actual min/max in the data
|
|
# This makes dim images visible
|
|
valid_data = composited[composited > 0]
|
|
if len(valid_data) > 0:
|
|
data_min = np.percentile(valid_data, 2) # 2nd percentile to handle outliers
|
|
data_max = np.percentile(valid_data, 98) # 98th percentile
|
|
|
|
if data_max > data_min:
|
|
composited = (composited - data_min) / (data_max - data_min)
|
|
composited = np.clip(composited, 0, 1)
|
|
|
|
return composited.astype(np.float32)
|
|
|
|
except Exception as e:
|
|
return None
|
|
|
|
|
|
def has_valid_rgb_data(rgb_data, threshold=0.05):
|
|
"""
|
|
Check if RGB image has actual data (not black/empty).
|
|
Returns True if max value > threshold (not all zeros/black).
|
|
"""
|
|
if rgb_data is None:
|
|
return False
|
|
|
|
try:
|
|
# Check if there's any variation in the RGB data
|
|
data_max = np.nanmax(rgb_data)
|
|
data_min = np.nanmin(rgb_data)
|
|
|
|
# Image is valid if max > threshold AND there's variation
|
|
has_data = data_max > threshold and (data_max - data_min) > 0.01
|
|
return has_data
|
|
except:
|
|
return False
|
|
|
|
|
|
def create_temporal_rgb_grid(harvest_date, field_data, field_id, tiff_dir, field_boundary,
|
|
title, output_dir, harvest_type='registered', model_name=None, harvest_index=None):
|
|
"""
|
|
Create 5x3 temporal grid around harvest date (15 images, 7-day intervals).
|
|
|
|
Layout:
|
|
Row 1: T-56d, T-42d, T-35d, T-28d, T-21d (pre-harvest)
|
|
Row 2: T-14d, T-7d, T~0d, T+7d, T+14d (near harvest)
|
|
Row 3: T+21d, T+28d, T+35d, T+42d, T+56d (post-harvest progression)
|
|
|
|
Args:
|
|
harvest_date (pd.Timestamp): Target harvest date
|
|
field_data (pd.DataFrame): Field data with Date column
|
|
field_id (str): Field identifier
|
|
tiff_dir (Path): Directory with TIFF files
|
|
field_boundary (shapely.Polygon): Field boundary
|
|
title (str): Plot title
|
|
output_dir (Path): Output directory
|
|
harvest_type (str): 'registered' or 'predicted'
|
|
model_name (str): Model name for predicted harvests (e.g., 'Original', 'Long-Season')
|
|
harvest_index (int): Index of harvest within same model (for multiple harvests)
|
|
|
|
Returns:
|
|
Path: Path to saved PNG or None if failed
|
|
"""
|
|
harvest_date = pd.Timestamp(harvest_date)
|
|
|
|
# Pre-allocate 15 image slots
|
|
rgb_images = [None] * 15
|
|
days_offsets = [None] * 15
|
|
actual_dates = [None] * 15
|
|
used_dates = set() # Use set for efficient lookups
|
|
|
|
# STEP 0: Debug - List all available dates
|
|
print(f" [STEP 0] Checking available TIFF dates in {tiff_dir}...")
|
|
available_dates = []
|
|
if tiff_dir.exists():
|
|
for date_folder in sorted(tiff_dir.iterdir()):
|
|
if date_folder.is_dir():
|
|
try:
|
|
date_obj = datetime.strptime(date_folder.name, '%Y-%m-%d').date()
|
|
available_dates.append(date_obj)
|
|
except:
|
|
pass
|
|
print(f" Found {len(available_dates)} dates with data: {available_dates[:5]}... (showing first 5)")
|
|
|
|
# STEP 1: Find anchor image (closest to predicted harvest date) FIRST
|
|
# Search within ±14 days of predicted harvest date first, then expand if needed
|
|
print(f" [STEP 1] Finding anchor (closest to harvest {harvest_date.strftime('%Y-%m-%d')}, searching ±14 days)...")
|
|
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
|
|
harvest_date, tiff_dir, field_boundary, days_window=14, exclude_dates=[], debug=False
|
|
)
|
|
|
|
anchor_rgb = None
|
|
anchor_idx = 8 # Position 8 is the center (T~0d / harvest date position)
|
|
failed_anchor_dates = [] # Track dates that failed validation
|
|
|
|
if anchor_tile_paths and anchor_date:
|
|
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
|
|
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
|
|
rgb_images[anchor_idx] = anchor_rgb
|
|
days_offsets[anchor_idx] = 0 # Anchor is reference point
|
|
actual_dates[anchor_idx] = anchor_date
|
|
used_dates.add(anchor_date)
|
|
print(f" ✓ ANCHOR FOUND (±14d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
|
|
else:
|
|
failed_anchor_dates.append(anchor_date)
|
|
print(f" ⚠ Found date {anchor_date.strftime('%Y-%m-%d')} within ±14d, but image has no valid data")
|
|
print(f" [RETRY] Expanding anchor search to ±60 days (excluding failed dates)...")
|
|
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
|
|
harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=set(failed_anchor_dates), debug=False
|
|
)
|
|
if anchor_tile_paths and anchor_date:
|
|
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
|
|
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
|
|
rgb_images[anchor_idx] = anchor_rgb
|
|
days_offsets[anchor_idx] = 0 # Anchor is reference point
|
|
actual_dates[anchor_idx] = anchor_date
|
|
used_dates.add(anchor_date)
|
|
print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
|
|
else:
|
|
failed_anchor_dates.append(anchor_date)
|
|
print(f" ✗ No valid anchor found even within ±60 days")
|
|
else:
|
|
print(f" ✗ No tiles found for any date within ±60 days")
|
|
else:
|
|
print(f" ⚠ No tiles found within ±14 days, expanding search...")
|
|
anchor_tile_paths, anchor_date, anchor_days_diff = find_overlapping_tiles(
|
|
harvest_date, tiff_dir, field_boundary, days_window=60, exclude_dates=[], debug=False
|
|
)
|
|
if anchor_tile_paths and anchor_date:
|
|
anchor_rgb = load_and_composite_tiles_rgb(anchor_tile_paths, field_boundary)
|
|
if anchor_rgb is not None and has_valid_rgb_data(anchor_rgb):
|
|
rgb_images[anchor_idx] = anchor_rgb
|
|
days_offsets[anchor_idx] = 0 # Anchor is reference point
|
|
actual_dates[anchor_idx] = anchor_date
|
|
used_dates.add(anchor_date)
|
|
print(f" ✓ ANCHOR FOUND (±60d): {anchor_date.strftime('%Y-%m-%d')} ({anchor_days_diff:+d}d from predicted harvest)")
|
|
else:
|
|
print(f" ✗ No valid anchor found even within ±60 days")
|
|
else:
|
|
print(f" ✗ No tiles found for any date within ±60 days")
|
|
|
|
# STEP 2: Dynamically collect images BEFORE anchor date
|
|
# Strategy: Go backwards from anchor with progressively larger search windows
|
|
# Start at 7 days, then try 10, 15, 20, 30+ days apart
|
|
print(f" [STEP 2] Collecting images BEFORE anchor (going backwards, flexible spacing)...")
|
|
before_positions = [7, 6, 5, 4, 3, 2, 1, 0] # Will fill in reverse order (7→0)
|
|
before_images = [] # (position, date, rgb, offset)
|
|
|
|
pos_idx = 0 # Index into before_positions
|
|
last_found_date = anchor_date
|
|
|
|
# Progressive search offsets: try these day offsets in order
|
|
search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days before last found image
|
|
|
|
while pos_idx < len(before_positions) and last_found_date.year >= 2024:
|
|
found_this_iteration = False
|
|
|
|
# Try each offset until we find a valid image
|
|
for days_offset in search_offsets:
|
|
search_target_date = last_found_date - timedelta(days=days_offset)
|
|
|
|
tile_paths, actual_date, days_diff = find_overlapping_tiles(
|
|
search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
|
|
)
|
|
|
|
if tile_paths and actual_date:
|
|
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
|
|
if rgb is not None and has_valid_rgb_data(rgb):
|
|
# Found valid image!
|
|
overall_max = np.nanmax(rgb)
|
|
overall_min = np.nanmin(rgb)
|
|
|
|
offset_from_anchor = (actual_date - anchor_date).days
|
|
before_images.append((before_positions[pos_idx], actual_date, rgb, offset_from_anchor))
|
|
used_dates.add(actual_date)
|
|
last_found_date = actual_date # Move backwards from this date
|
|
|
|
print(f" ✓ Before[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
|
|
|
|
pos_idx += 1
|
|
found_this_iteration = True
|
|
break # Found one, stop trying larger offsets
|
|
|
|
# If nothing found with any offset, we're done collecting before images
|
|
if not found_this_iteration:
|
|
break
|
|
|
|
# Store collected before images
|
|
for pos, actual_date, rgb, offset in before_images:
|
|
rgb_images[pos] = rgb
|
|
actual_dates[pos] = actual_date
|
|
days_offsets[pos] = offset
|
|
|
|
# STEP 3: Dynamically collect images AFTER anchor date
|
|
# Strategy: Go forwards from anchor with progressively larger search windows
|
|
# Start at 7 days, then try 10, 15, 20, 30+ days apart
|
|
print(f" [STEP 3] Collecting images AFTER anchor (going forwards, flexible spacing)...")
|
|
after_positions = [9, 10, 11, 12, 13, 14] # Will fill in order (9→14)
|
|
after_images = [] # (position, date, rgb, offset)
|
|
|
|
pos_idx = 0 # Index into after_positions
|
|
last_found_date = anchor_date
|
|
max_search_date = anchor_date + timedelta(days=200) # Don't search beyond 200 days forward
|
|
|
|
# Progressive search offsets: try these day offsets in order
|
|
search_offsets = [7, 10, 15, 20, 30, 40, 60, 90, 120] # Days after last found image
|
|
|
|
while pos_idx < len(after_positions) and last_found_date < max_search_date:
|
|
found_this_iteration = False
|
|
|
|
# Try each offset until we find a valid image
|
|
for days_offset in search_offsets:
|
|
search_target_date = last_found_date + timedelta(days=days_offset)
|
|
|
|
# Don't search beyond max date
|
|
if search_target_date > max_search_date:
|
|
break
|
|
|
|
tile_paths, actual_date, days_diff = find_overlapping_tiles(
|
|
search_target_date, tiff_dir, field_boundary, days_window=60, exclude_dates=used_dates, debug=False
|
|
)
|
|
|
|
if tile_paths and actual_date:
|
|
rgb = load_and_composite_tiles_rgb(tile_paths, field_boundary)
|
|
if rgb is not None and has_valid_rgb_data(rgb):
|
|
# Found valid image!
|
|
overall_max = np.nanmax(rgb)
|
|
overall_min = np.nanmin(rgb)
|
|
|
|
offset_from_anchor = (actual_date - anchor_date).days
|
|
after_images.append((after_positions[pos_idx], actual_date, rgb, offset_from_anchor))
|
|
used_dates.add(actual_date)
|
|
last_found_date = actual_date # Move forwards from this date
|
|
|
|
print(f" ✓ After[{pos_idx}]: {actual_date.strftime('%Y-%m-%d')} ({offset_from_anchor:+d}d from anchor) - RGB: {overall_min:.4f}-{overall_max:.4f}")
|
|
|
|
pos_idx += 1
|
|
found_this_iteration = True
|
|
break # Found one, stop trying larger offsets
|
|
|
|
# If nothing found with any offset, we're done collecting after images
|
|
if not found_this_iteration:
|
|
break
|
|
|
|
# Store collected after images
|
|
for pos, actual_date, rgb, offset in after_images:
|
|
rgb_images[pos] = rgb
|
|
actual_dates[pos] = actual_date
|
|
days_offsets[pos] = offset
|
|
|
|
# Create 5x3 grid plot (15 images)
|
|
fig, axes = plt.subplots(3, 5, figsize=(25, 15))
|
|
|
|
# Build title with anchor offset information
|
|
anchor_offset_from_harvest = (actual_dates[8] - harvest_date).days if actual_dates[8] is not None else None
|
|
if anchor_offset_from_harvest is not None and anchor_offset_from_harvest != 0:
|
|
anchor_info = f"(Anchor: {actual_dates[8].strftime('%Y-%m-%d')}, {anchor_offset_from_harvest:+d}d from predicted harvest)"
|
|
else:
|
|
anchor_info = f"(Exact match with anchor: {actual_dates[8].strftime('%Y-%m-%d')})" if actual_dates[8] is not None else ""
|
|
|
|
fig.suptitle(f'{title}\nField {field_id} - {harvest_type.upper()} Harvest: {harvest_date.strftime("%Y-%m-%d")} {anchor_info}',
|
|
fontsize=16, fontweight='bold')
|
|
|
|
# Grid positions (5 columns, 3 rows = 15 images)
|
|
positions = [
|
|
('T-56d', 0, 0), ('T-49d', 0, 1), ('T-42d', 0, 2), ('T-35d', 0, 3), ('T-28d', 0, 4),
|
|
('T-21d', 1, 0), ('T-14d', 1, 1), ('T-7d', 1, 2), ('HARVEST', 1, 3), ('T+7d', 1, 4),
|
|
('T+14d', 2, 0), ('T+21d', 2, 1), ('T+28d', 2, 2), ('T+35d', 2, 3), ('T+42d', 2, 4),
|
|
]
|
|
|
|
for idx, (label, row, col) in enumerate(positions):
|
|
ax = axes[row, col]
|
|
|
|
if idx < len(rgb_images) and rgb_images[idx] is not None:
|
|
rgb_data = rgb_images[idx]
|
|
# Debug: check data range for ALL bands
|
|
data_min = np.nanmin(rgb_data)
|
|
data_max = np.nanmax(rgb_data)
|
|
data_mean = np.nanmean(rgb_data)
|
|
data_std = np.nanstd(rgb_data)
|
|
|
|
# Check per-band stats
|
|
r_min, r_max, r_mean = np.nanmin(rgb_data[:,:,0]), np.nanmax(rgb_data[:,:,0]), np.nanmean(rgb_data[:,:,0])
|
|
g_min, g_max, g_mean = np.nanmin(rgb_data[:,:,1]), np.nanmax(rgb_data[:,:,1]), np.nanmean(rgb_data[:,:,1])
|
|
b_min, b_max, b_mean = np.nanmin(rgb_data[:,:,2]), np.nanmax(rgb_data[:,:,2]), np.nanmean(rgb_data[:,:,2])
|
|
|
|
print(f" DEBUG VALID {label} ({actual_dates[idx].strftime('%Y-%m-%d')}): RGB overall {data_min:.4f}-{data_max:.4f} (mean={data_mean:.4f}, std={data_std:.4f})")
|
|
print(f" R: {r_min:.4f}-{r_max:.4f} (μ={r_mean:.4f}), G: {g_min:.4f}-{g_max:.4f} (μ={g_mean:.4f}), B: {b_min:.4f}-{b_max:.4f} (μ={b_mean:.4f})")
|
|
|
|
# Display with explicit vmin/vmax to handle normalized 0-1 data
|
|
ax.imshow(rgb_data, vmin=0, vmax=1)
|
|
|
|
# Build title: show BOTH anchor offset AND harvest offset
|
|
if days_offsets[idx] is not None:
|
|
offset_from_anchor = days_offsets[idx]
|
|
offset_from_harvest = (actual_dates[idx] - harvest_date).days
|
|
|
|
if idx == 8: # ANCHOR/HARVEST position
|
|
if offset_from_harvest == 0:
|
|
offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')})"
|
|
else:
|
|
offset_str = f"HARVEST\n(Image: {actual_dates[idx].strftime('%Y-%m-%d')}, {offset_from_harvest:+d}d from predicted)"
|
|
else:
|
|
# Show both offsets: from anchor and from harvest
|
|
offset_str = f"{offset_from_anchor:+d}d from anchor\n{offset_from_harvest:+d}d from harvest\n{actual_dates[idx].strftime('%Y-%m-%d')}"
|
|
else:
|
|
offset_str = "No Data"
|
|
|
|
ax.set_title(offset_str, fontsize=9, fontweight='bold')
|
|
|
|
# Add red box around the ANCHOR IMAGE (position 8 is harvest/anchor)
|
|
if idx == 8: # Position 8 is the anchor
|
|
for spine in ax.spines.values():
|
|
spine.set_edgecolor('red')
|
|
spine.set_linewidth(4)
|
|
else:
|
|
ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12, color='gray')
|
|
ax.set_title('No Data', fontsize=10)
|
|
print(f" DEBUG EMPTY {label}: No image data collected")
|
|
|
|
# Add red box for anchor position even if no data
|
|
if idx == 8: # Position 8 is the anchor
|
|
for spine in ax.spines.values():
|
|
spine.set_edgecolor('red')
|
|
spine.set_linewidth(4)
|
|
|
|
ax.set_xticks([])
|
|
ax.set_yticks([])
|
|
|
|
plt.tight_layout()
|
|
|
|
# Save figure with detailed naming: field_ID_harvestdate_model_harvestyle.png
|
|
harvest_date_str = harvest_date.strftime('%Y%m%d')
|
|
|
|
if harvest_type == 'registered':
|
|
filename = f'field_{field_id}_{harvest_date_str}_registered_harvest_rgb.png'
|
|
else:
|
|
# For predicted: include model name and harvest index if multiple
|
|
if harvest_index is not None and harvest_index > 0:
|
|
filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest{harvest_index}_rgb.png'
|
|
else:
|
|
filename = f'field_{field_id}_{harvest_date_str}_{model_name}_harvest_rgb.png'
|
|
|
|
output_path = Path(output_dir) / filename
|
|
try:
|
|
plt.savefig(output_path, dpi=100, format='png')
|
|
plt.close()
|
|
|
|
print(f" ✓ Saved: {filename}")
|
|
return output_path
|
|
except Exception as e:
|
|
plt.close()
|
|
print(f" ✗ Error saving PNG: {e}")
|
|
return None
|
|
|
|
|
|
def generate_rgb_grids(field_data, field_id, registered_harvest_dates, predicted_harvest_dates,
|
|
output_dir, tiff_dir, geojson_path):
|
|
"""
|
|
Main orchestration function for RGB visualization.
|
|
|
|
Creates 3x3 grids for:
|
|
1. Registered harvest dates (if available)
|
|
2. Predicted harvest dates (if available)
|
|
|
|
Args:
|
|
field_data (pd.DataFrame): Field data with Date, CI columns
|
|
field_id (str): Field identifier
|
|
registered_harvest_dates (list): List of registered harvest dates (pd.Timestamp)
|
|
predicted_harvest_dates (list): List of predicted harvest dates (dict or pd.Timestamp)
|
|
output_dir (Path): Output directory for plots
|
|
tiff_dir (Path): Directory containing TIFF files
|
|
geojson_path (Path): Path to pivot.geojson
|
|
|
|
Returns:
|
|
dict: Summary of generated plots with keys 'registered' and 'predicted'
|
|
"""
|
|
if rasterio is None:
|
|
print(" ⚠ Rasterio not available - skipping RGB visualization")
|
|
return {'registered': [], 'predicted': []}
|
|
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
tiff_dir = Path(tiff_dir)
|
|
geojson_path = Path(geojson_path)
|
|
|
|
if not tiff_dir.exists():
|
|
print(f" ✗ TIFF directory not found: {tiff_dir}")
|
|
return {'registered': [], 'predicted': []}
|
|
|
|
if not geojson_path.exists():
|
|
print(f" ✗ GeoJSON not found: {geojson_path}")
|
|
return {'registered': [], 'predicted': []}
|
|
|
|
# Load field boundary
|
|
print(f" Loading field boundary for {field_id}...")
|
|
feature, field_boundary = load_field_boundaries(geojson_path, field_id)
|
|
|
|
if field_boundary is None:
|
|
print(f" ✗ Could not load field boundary for {field_id}")
|
|
return {'registered': [], 'predicted': []}
|
|
|
|
results = {'registered': [], 'predicted': []}
|
|
|
|
# Process registered harvest dates
|
|
if registered_harvest_dates and len(registered_harvest_dates) > 0:
|
|
print(f" Processing {len(registered_harvest_dates)} registered harvest dates...")
|
|
for i, harvest_date in enumerate(registered_harvest_dates):
|
|
if pd.isna(harvest_date):
|
|
continue
|
|
|
|
print(f" [{i+1}/{len(registered_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')}")
|
|
output_path = create_temporal_rgb_grid(
|
|
harvest_date, field_data, field_id, tiff_dir, field_boundary,
|
|
title='Registered Harvest Validation',
|
|
output_dir=output_dir,
|
|
harvest_type='registered',
|
|
model_name=None,
|
|
harvest_index=i
|
|
)
|
|
if output_path:
|
|
results['registered'].append(output_path)
|
|
|
|
# Process predicted harvest dates - grouped by model
|
|
if predicted_harvest_dates and len(predicted_harvest_dates) > 0:
|
|
print(f" Processing {len(predicted_harvest_dates)} predicted harvest dates...")
|
|
|
|
# Group by model to track index per model
|
|
harvest_by_model = {}
|
|
for harvest_info in predicted_harvest_dates:
|
|
# Handle both dict and Timestamp formats
|
|
if isinstance(harvest_info, dict):
|
|
harvest_date = harvest_info.get('harvest_date')
|
|
model_name = harvest_info.get('model_name', 'predicted')
|
|
else:
|
|
harvest_date = harvest_info
|
|
model_name = 'predicted'
|
|
|
|
if model_name not in harvest_by_model:
|
|
harvest_by_model[model_name] = []
|
|
harvest_by_model[model_name].append(harvest_date)
|
|
|
|
# Process each model's harvests
|
|
overall_index = 1
|
|
for model_name, harvest_dates in harvest_by_model.items():
|
|
for model_harvest_idx, harvest_date in enumerate(harvest_dates):
|
|
if pd.isna(harvest_date):
|
|
continue
|
|
|
|
print(f" [{overall_index}/{len(predicted_harvest_dates)}] {harvest_date.strftime('%Y-%m-%d')} ({model_name})")
|
|
output_path = create_temporal_rgb_grid(
|
|
harvest_date, field_data, field_id, tiff_dir, field_boundary,
|
|
title=f'Predicted Harvest Validation ({model_name})',
|
|
output_dir=output_dir,
|
|
harvest_type='predicted',
|
|
model_name=model_name,
|
|
harvest_index=model_harvest_idx
|
|
)
|
|
if output_path:
|
|
results['predicted'].append(output_path)
|
|
overall_index += 1
|
|
|
|
return results
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Example usage
|
|
print("RGB Visualization Tool")
|
|
print("This module is intended to be imported and called from compare_307_models_production.py")
|
|
print("\nExample:")
|
|
print(" from rgb_visualization import generate_rgb_grids")
|
|
print(" generate_rgb_grids(field_data, field_id, registered_dates, predicted_dates, output_dir, tiff_dir, geojson_path)")
|