747 lines
27 KiB
Python
747 lines
27 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Planet 4-Band Download Script - PU-Optimized (RGB+NIR, Cloud-Masked, uint16)
|
||
============================================================================
|
||
|
||
Strategy: Adaptive grid + PU optimization
|
||
1. Adaptive grid selection based on estate size:
|
||
- Small estates (≤25 fields): 1 bbox per field (minimal downloads)
|
||
- Large estates (>25 fields): Fine grid (5×5) with reduce_bbox_sizes=True
|
||
2. 4-band output (RGB+NIR) with cloud masking on server (uint16, not FLOAT32)
|
||
→ Cuts data transfer by ~60% (4 bands uint16 vs 9 bands FLOAT32)
|
||
3. Date availability filtering + geometry-aware grid
|
||
→ Skips empty dates and non-field areas
|
||
|
||
Usage:
|
||
python download_8band_pu_optimized.py [PROJECT] [OPTIONS]
|
||
|
||
Arguments:
|
||
PROJECT Project name (angata, chemba, xinavane, etc.) [required]
|
||
|
||
Options:
|
||
--date DATE Date to download (YYYY-MM-DD). Default: today
|
||
--resolution RES Resolution in meters (default: 3)
|
||
--skip-merge Skip merge step (download only, keep individual tiles)
|
||
--cleanup Delete intermediate single_images folder after merge
|
||
--clear-singles Clear single_images folder before download
|
||
--clear-merged Clear merged_tif folder before download
|
||
--clear-all Clear all output folders (singles, merged) before download
|
||
|
||
Examples:
|
||
python download_8band_pu_optimized.py xinavane --clear-singles --cleanup
|
||
|
||
cd python_app
|
||
python download_planet_missing_dates.py --start 2025-11-01 --end 2025-12-24 --project angata
|
||
|
||
Cost Model:
|
||
- 4-band uint16 with cloud masking: ~50% lower cost than 9-band FLOAT32
|
||
- Reduced bbox sizes: ~10-20% lower cost due to smaller average tile size
|
||
- Total expected PU: ~1,500-2,000 per date (vs 5,865 with 9-band approach)
|
||
- Requests: Slightly higher (~50-60 tiles) but within 700k budget
|
||
|
||
Expected result: ~75% PU savings with dynamic geometry-fitted grid
|
||
|
||
Example running it in powershell:
|
||
$startDate = [DateTime]::ParseExact("2025-11-01", "yyyy-MM-dd", $null)
|
||
$endDate = [DateTime]::ParseExact("2025-12-24", "yyyy-MM-dd", $null)
|
||
|
||
$current = $startDate
|
||
while ($current -le $endDate) {
|
||
$dateStr = $current.ToString("yyyy-MM-dd")
|
||
Write-Host "Downloading $dateStr..."
|
||
python download_8band_pu_optimized.py angata --date $dateStr
|
||
$current = $current.AddDays(1)
|
||
}
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import datetime
|
||
import argparse
|
||
from pathlib import Path
|
||
from typing import List, Tuple, Optional
|
||
import warnings
|
||
|
||
import numpy as np
|
||
import geopandas as gpd
|
||
from shapely.geometry import MultiPolygon, Polygon, box
|
||
from shapely.ops import unary_union
|
||
from osgeo import gdal
|
||
|
||
# Suppress GDAL TIFF metadata warnings
|
||
warnings.filterwarnings('ignore', category=RuntimeWarning, module='osgeo.gdal')
|
||
|
||
from sentinelhub import (
|
||
MimeType, CRS, BBox, SentinelHubRequest, SentinelHubDownloadClient,
|
||
DataCollection, bbox_to_dimensions, SHConfig, Geometry, SentinelHubCatalog, BBoxSplitter
|
||
)
|
||
|
||
import time
|
||
|
||
|
||
# ============================================================================
|
||
# CONFIGURATION
|
||
# ============================================================================
|
||
|
||
def setup_config():
|
||
"""Setup SentinelHub configuration and paths."""
|
||
config = SHConfig()
|
||
config.sh_client_id = os.environ.get('SH_CLIENT_ID', '1a72d811-4f0e-4447-8282-df09608cff44')
|
||
config.sh_client_secret = os.environ.get('SH_CLIENT_SECRET', 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos')
|
||
|
||
catalog = SentinelHubCatalog(config=config)
|
||
|
||
return config, catalog
|
||
|
||
|
||
def detect_collection(date_str: str, bbox_list: List[BBox], catalog, date_range_days: int = 7) -> Tuple:
|
||
"""
|
||
Auto-detect which Planet collection is available for this project.
|
||
|
||
Checks a week of dates (backwards from date_str) to ensure robust detection.
|
||
If ANY date has data in the new 8-band collection, use that.
|
||
If no dates have data in new collection, fall back to legacy 4-band.
|
||
|
||
Args:
|
||
date_str: Reference date (YYYY-MM-DD)
|
||
bbox_list: List of bounding boxes for testing
|
||
catalog: SentinelHubCatalog instance
|
||
date_range_days: Number of days to check backwards (default: 7)
|
||
|
||
Returns:
|
||
(byoc, collection_info_dict) where byoc is DataCollection and dict contains metadata
|
||
"""
|
||
|
||
new_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a' # 8-band (new)
|
||
old_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c' # 4-band (legacy)
|
||
test_bbox = bbox_list[0]
|
||
|
||
# Generate date range (backwards from date_str)
|
||
try:
|
||
ref_date = datetime.datetime.strptime(date_str, '%Y-%m-%d')
|
||
except ValueError:
|
||
print(f"⚠️ Invalid date format: {date_str}. Using today.")
|
||
ref_date = datetime.datetime.now()
|
||
|
||
date_range = [
|
||
(ref_date - datetime.timedelta(days=i)).strftime('%Y-%m-%d')
|
||
for i in range(date_range_days)
|
||
]
|
||
|
||
print(f"\nAuto-detecting Planet collection (checking {date_range_days} days)...")
|
||
print(f" Test range: {date_range[-1]} to {date_range[0]}")
|
||
|
||
# Try new collection first
|
||
print(f"\n Trying 8-band collection: {new_id}")
|
||
byoc_new = DataCollection.define_byoc(new_id, name='planet_data_8b', is_timeless=True)
|
||
|
||
for test_date in date_range:
|
||
try:
|
||
search = catalog.search(
|
||
collection=byoc_new,
|
||
bbox=test_bbox,
|
||
time=(test_date, test_date),
|
||
filter=None
|
||
)
|
||
tiles = list(search)
|
||
if len(tiles) > 0:
|
||
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
|
||
print(f" ✓ Using 8-band collection")
|
||
return byoc_new, {
|
||
'collection_id': new_id,
|
||
'name': 'planet_data_8b',
|
||
'bands': 4,
|
||
'output_folder': 'merged_tif',
|
||
'singles_folder': 'single_images'
|
||
}
|
||
except Exception as e:
|
||
print(f" ⚠️ {test_date}: {str(e)[:60]}")
|
||
|
||
# No data in new collection, try legacy
|
||
print(f"\n ✗ No data found in 8-band collection")
|
||
print(f" Trying legacy 4-band collection: {old_id}")
|
||
byoc_old = DataCollection.define_byoc(old_id, name='planet_data', is_timeless=True)
|
||
|
||
for test_date in date_range:
|
||
try:
|
||
search = catalog.search(
|
||
collection=byoc_old,
|
||
bbox=test_bbox,
|
||
time=(test_date, test_date),
|
||
filter=None
|
||
)
|
||
tiles = list(search)
|
||
if len(tiles) > 0:
|
||
print(f" ✓ Found data on {test_date} ({len(tiles)} tiles)")
|
||
print(f" ✓ Using legacy 4-band collection")
|
||
return byoc_old, {
|
||
'collection_id': old_id,
|
||
'name': 'planet_data',
|
||
'bands': 4,
|
||
'output_folder': 'merged_tif',
|
||
'singles_folder': 'single_images'
|
||
}
|
||
except Exception as e:
|
||
print(f" ⚠️ {test_date}: {str(e)[:60]}")
|
||
|
||
# Neither collection has data
|
||
print(f"\n ⚠️ No data found in either collection for {date_range_days} days")
|
||
print(f" Defaulting to 8-band collection (will attempt download anyway)")
|
||
return byoc_new, {
|
||
'collection_id': new_id,
|
||
'name': 'planet_data_8b',
|
||
'bands': 4,
|
||
'output_folder': 'merged_tif',
|
||
'singles_folder': 'single_images'
|
||
}
|
||
|
||
|
||
# ============================================================================
|
||
# EVALSCRIPT: 4 bands (RGB + NIR) with cloud masking, uint16 output
|
||
# ============================================================================
|
||
|
||
EVALSCRIPT_8BAND = """
|
||
//VERSION=3
|
||
function setup() {
|
||
return {
|
||
input: [{
|
||
bands: ["red", "green", "blue", "nir", "udm1"],
|
||
units: "DN"
|
||
}],
|
||
output: {
|
||
bands: 4
|
||
}
|
||
};
|
||
}
|
||
function evaluatePixel(sample) {
|
||
// Cloud masking: return NaN for cloudy/bad pixels (udm1 != 0)
|
||
if (sample.udm1 == 0) {
|
||
var scaledRed = 2.5 * sample.red / 10000;
|
||
var scaledGreen = 2.5 * sample.green / 10000;
|
||
var scaledBlue = 2.5 * sample.blue / 10000;
|
||
var scaledNIR = 2.5 * sample.nir / 10000;
|
||
return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
|
||
} else {
|
||
return [NaN, NaN, NaN, NaN];
|
||
}
|
||
}
|
||
"""
|
||
|
||
EVALSCRIPT_4BAND_LEGACY = """
|
||
//VERSION=3
|
||
function setup() {
|
||
return {
|
||
input: [{
|
||
bands: ["red", "green", "blue", "nir", "udm1"],
|
||
units: "DN"
|
||
}],
|
||
output: {
|
||
bands: 4
|
||
}
|
||
};
|
||
}
|
||
function evaluatePixel(sample) {
|
||
// Cloud masking for legacy collection (same band names as new 8-band)
|
||
// udm1 = 0 means clear, non-zero means cloud/shadow/etc
|
||
if (sample.udm1 == 0) {
|
||
var scaledRed = 2.5 * sample.red / 10000;
|
||
var scaledGreen = 2.5 * sample.green / 10000;
|
||
var scaledBlue = 2.5 * sample.blue / 10000;
|
||
var scaledNIR = 2.5 * sample.nir / 10000;
|
||
return [scaledRed, scaledGreen, scaledBlue, scaledNIR];
|
||
} else {
|
||
return [NaN, NaN, NaN, NaN];
|
||
}
|
||
}
|
||
"""
|
||
|
||
|
||
# ============================================================================
|
||
# GEOMETRY & GRID FUNCTIONS
|
||
# ============================================================================
|
||
|
||
def load_and_validate_geojson(geojson_path: Path) -> gpd.GeoDataFrame:
|
||
"""Load GeoJSON and ensure WGS84 CRS."""
|
||
gdf = gpd.read_file(str(geojson_path))
|
||
|
||
print(f"✓ Loaded {len(gdf)} field(s)")
|
||
print(f" CRS: {gdf.crs}")
|
||
print(f" Bounds (WGS84): {gdf.total_bounds}")
|
||
|
||
# Ensure WGS84
|
||
if gdf.crs is None:
|
||
print(" ⚠️ No CRS defined. Assuming WGS84.")
|
||
gdf = gdf.set_crs('EPSG:4326')
|
||
elif gdf.crs != 'EPSG:4326':
|
||
print(f" Converting to WGS84...")
|
||
gdf = gdf.to_crs('EPSG:4326')
|
||
|
||
return gdf
|
||
|
||
|
||
def create_adaptive_grid(
|
||
gdf: gpd.GeoDataFrame,
|
||
resolution: int = 3,
|
||
max_pixels: int = 2500
|
||
) -> Tuple[List[BBox], List[Polygon]]:
|
||
"""
|
||
Adaptive grid strategy: selects tiling approach based on number of fields.
|
||
|
||
For small estates (≤25 fields): One bbox per field.
|
||
- Minimizes downloads (John's 1 field → 1 download)
|
||
- Simple bounding box per geometry
|
||
- No grid complexity
|
||
|
||
For large estates (>25 fields): Fine grid (5×5 multiplier) with reduce_bbox_sizes=True.
|
||
- Optimized for scattered fields (Angata: 1,185 fields → ~120-150 downloads)
|
||
- Balances PU efficiency and download count
|
||
- Proven tuning from iterative testing
|
||
|
||
Args:
|
||
gdf: GeoDataFrame with field geometries
|
||
resolution: Pixel resolution in meters (default: 3)
|
||
max_pixels: Maximum pixels per tile for fine grid (default: 2500)
|
||
|
||
Returns:
|
||
(bbox_list, geometry_list) where bbox_list is download grid, geometry_list is field geometries
|
||
"""
|
||
num_fields = len(gdf)
|
||
|
||
if num_fields <= 25:
|
||
# Small estate: one bbox per field
|
||
print(f"\nSmall estate detected ({num_fields} fields): using 1 bbox per field")
|
||
|
||
bbox_list = []
|
||
geometry_list = []
|
||
|
||
for idx, row in gdf.iterrows():
|
||
geom = row.geometry
|
||
bounds = geom.bounds # (minx, miny, maxx, maxy)
|
||
bbox = BBox([bounds[0], bounds[1], bounds[2], bounds[3]], CRS.WGS84)
|
||
bbox_list.append(bbox)
|
||
geometry_list.append(geom)
|
||
|
||
print(f" ✓ Created {len(bbox_list)} bbox(es) (1 per field)")
|
||
return bbox_list, geometry_list
|
||
else:
|
||
# Large estate: use optimized fine grid
|
||
print(f"\nLarge estate detected ({num_fields} fields): using fine grid strategy")
|
||
return create_optimal_grid_with_filtering(gdf, resolution, max_pixels)
|
||
|
||
|
||
def create_optimal_grid_with_filtering(
|
||
gdf: gpd.GeoDataFrame,
|
||
resolution: int = 3,
|
||
max_pixels: int = 2500
|
||
) -> Tuple[List[BBox], List[Polygon]]:
|
||
"""
|
||
Create fine grid of bounding boxes using BBoxSplitter with reduce_bbox_sizes=True.
|
||
|
||
Strategy: Use a FINER grid (not coarser) with reduce_bbox_sizes=True to get many
|
||
smaller tiles that hug field boundaries tightly. This reduces wasted pixel area
|
||
while still respecting max pixel limit per tile.
|
||
|
||
Example from SentinelHub docs shows: finer grid + reduce_bbox_sizes=True creates
|
||
significantly more, smaller tiles that match geometry much better than uniform grid.
|
||
|
||
Returns:
|
||
(bbox_list, geometry_list) where geometry_list contains field geometries
|
||
that intersect each bbox (for reference only, not for masking download)
|
||
"""
|
||
|
||
union_geom = gdf.geometry.union_all()
|
||
bounds = gdf.total_bounds # [minx, miny, maxx, maxy]
|
||
|
||
# Calculate area in meters
|
||
minx, miny, maxx, maxy = bounds
|
||
width_m = (maxx - minx) * 111320 # Rough conversion to meters
|
||
height_m = (maxy - miny) * 111320
|
||
|
||
max_size_m = max_pixels * resolution # Max bbox size in meters
|
||
|
||
# Calculate BASE grid dimensions
|
||
nx_base = max(1, int(np.ceil(width_m / max_size_m)))
|
||
ny_base = max(1, int(np.ceil(height_m / max_size_m)))
|
||
|
||
# Use FINE grid (6x multiplier) with reduce_bbox_sizes=True
|
||
# This creates many smaller tiles that fit field geometry boundaries tightly
|
||
# 6x multiplier = ~20×24 theoretical tiles → ~120-150 active after reduce_bbox_sizes
|
||
# Avoids creating tiles that shrink to 0 pixels (as happened with 7x)
|
||
nx_fine = nx_base * 5
|
||
ny_fine = ny_base * 5
|
||
|
||
print(f"\nGrid Calculation (fine grid with reduce_bbox_sizes=True):")
|
||
print(f" Area extent: {width_m:.0f}m × {height_m:.0f}m")
|
||
print(f" Max bbox size: {max_size_m:.0f}m ({max_pixels}px @ {resolution}m)")
|
||
print(f" Base grid: {nx_base}×{ny_base} = {nx_base*ny_base} tiles")
|
||
print(f" Fine grid (6x): {nx_fine}×{ny_fine} = {nx_fine*ny_fine} theoretical tiles")
|
||
|
||
# Convert geometries to Shapely for BBoxSplitter
|
||
shapely_geoms = [geom for geom in gdf.geometry]
|
||
|
||
# Use BBoxSplitter with FINER grid and reduce_bbox_sizes=True
|
||
# This creates many smaller tiles that fit field geometry boundaries tightly
|
||
bbox_splitter = BBoxSplitter(
|
||
shapely_geoms,
|
||
CRS.WGS84,
|
||
(nx_fine, ny_fine),
|
||
reduce_bbox_sizes=True # Shrink tiles to fit geometry - creates many smaller tiles
|
||
)
|
||
|
||
bbox_list = bbox_splitter.get_bbox_list()
|
||
|
||
print(f" BBoxSplitter returned: {len(bbox_list)} bbox(es) (after reduce_bbox_sizes)")
|
||
|
||
# Show bbox dimensions to verify tiles are smaller
|
||
if bbox_list:
|
||
sizes = []
|
||
for bbox in bbox_list[:min(5, len(bbox_list))]:
|
||
bbox_width = (bbox.max_x - bbox.min_x) * 111320
|
||
bbox_height = (bbox.max_y - bbox.min_y) * 111320
|
||
sizes.append((bbox_width, bbox_height))
|
||
|
||
avg_width = np.mean([s[0] for s in sizes])
|
||
avg_height = np.mean([s[1] for s in sizes])
|
||
print(f" Sample tiles (avg): {avg_width:.0f}m × {avg_height:.0f}m")
|
||
|
||
# Filter to keep only tiles intersecting field geometries
|
||
geometry_list = []
|
||
filtered_bbox_list = []
|
||
|
||
for bbox in bbox_list:
|
||
tile_poly = box(
|
||
bbox.min_x, bbox.min_y,
|
||
bbox.max_x, bbox.max_y
|
||
)
|
||
intersection = tile_poly.intersection(union_geom)
|
||
|
||
if not intersection.is_empty:
|
||
filtered_bbox_list.append(bbox)
|
||
geometry_list.append(intersection)
|
||
|
||
print(f" ✓ Final active tiles: {len(filtered_bbox_list)}")
|
||
|
||
return filtered_bbox_list, geometry_list
|
||
|
||
|
||
# ============================================================================
|
||
# DATA AVAILABILITY CHECK
|
||
# ============================================================================
|
||
|
||
def check_date_has_data(date_str: str, test_bbox: BBox, catalog, byoc) -> bool:
|
||
"""
|
||
Check if Planet imagery exists for the given date.
|
||
Returns False if no data, avoiding wasted downloads.
|
||
"""
|
||
try:
|
||
search_results = catalog.search(
|
||
collection=byoc,
|
||
bbox=test_bbox,
|
||
time=(date_str, date_str),
|
||
filter=None
|
||
)
|
||
|
||
tiles = list(search_results)
|
||
if len(tiles) > 0:
|
||
print(f" ✓ Date {date_str}: Found {len(tiles)} image tile(s)")
|
||
return True
|
||
else:
|
||
print(f" ✗ Date {date_str}: No imagery available")
|
||
return False
|
||
except Exception as e:
|
||
print(f" ⚠️ Date {date_str}: Check failed ({e}) — assuming data exists")
|
||
return True # Optimistic default
|
||
|
||
|
||
# ============================================================================
|
||
# DOWNLOAD FUNCTIONS
|
||
# ============================================================================
|
||
|
||
def download_tile(
|
||
date_str: str,
|
||
bbox: BBox,
|
||
output_dir: Path,
|
||
config,
|
||
byoc,
|
||
evalscript: str,
|
||
resolution: int = 3
|
||
) -> bool:
|
||
"""Download a single full tile (no geometry masking = lower PU) with exponential backoff."""
|
||
|
||
max_retries = 3
|
||
retry_delay = 1.0
|
||
|
||
for attempt in range(max_retries):
|
||
try:
|
||
size = bbox_to_dimensions(bbox, resolution=resolution)
|
||
|
||
# Create download request with appropriate evalscript for collection
|
||
request = SentinelHubRequest(
|
||
evalscript=evalscript,
|
||
input_data=[
|
||
SentinelHubRequest.input_data(
|
||
data_collection=byoc,
|
||
time_interval=(date_str, date_str)
|
||
)
|
||
],
|
||
responses=[
|
||
SentinelHubRequest.output_response('default', MimeType.TIFF)
|
||
],
|
||
bbox=bbox,
|
||
size=size,
|
||
config=config,
|
||
data_folder=str(output_dir),
|
||
)
|
||
|
||
# Download
|
||
download_list = request.download_list
|
||
if not download_list:
|
||
print(f" ✗ No download requests generated for bbox {bbox}")
|
||
return False
|
||
|
||
client = SentinelHubDownloadClient(config=config)
|
||
client.download(download_list, max_threads=1) # Sequential to track PU
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
error_str = str(e).lower()
|
||
is_rate_limit = "rate" in error_str or "429" in error_str or "too many" in error_str
|
||
|
||
if is_rate_limit and attempt < max_retries - 1:
|
||
print(f" ⚠️ Rate limited, retrying in {retry_delay}s...")
|
||
time.sleep(retry_delay)
|
||
retry_delay *= 2 # Exponential backoff: 1s → 2s → 4s
|
||
else:
|
||
print(f" ✗ Download failed: {e}")
|
||
return False
|
||
|
||
return False
|
||
|
||
|
||
def download_date(
|
||
date_str: str,
|
||
bbox_list: List[BBox],
|
||
base_path: Path,
|
||
config,
|
||
byoc,
|
||
evalscript: str,
|
||
collection_info: dict,
|
||
resolution: int = 3
|
||
) -> int:
|
||
"""
|
||
Download all tiles for a single date.
|
||
Returns number of successfully downloaded tiles.
|
||
"""
|
||
|
||
output_dir = base_path / collection_info['singles_folder'] / date_str
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
print(f"\nDownloading {len(bbox_list)} tiles for {date_str}...")
|
||
|
||
successful = 0
|
||
for idx, bbox in enumerate(bbox_list, 1):
|
||
if download_tile(date_str, bbox, output_dir, config, byoc, evalscript, resolution):
|
||
successful += 1
|
||
|
||
percentage = (idx / len(bbox_list)) * 100
|
||
bar_length = 40
|
||
filled = int(bar_length * idx / len(bbox_list))
|
||
bar = '█' * filled + '░' * (bar_length - filled)
|
||
print(f"\r {percentage:3.0f}% |{bar}| {idx}/{len(bbox_list)}", end='', flush=True)
|
||
|
||
# Delay to avoid rate limiting (0.002s between requests - can be aggressive with small tiles)
|
||
time.sleep(0.002)
|
||
|
||
print() # Newline after progress bar
|
||
print(f" Result: {successful}/{len(bbox_list)} tiles downloaded")
|
||
return successful
|
||
|
||
|
||
# ============================================================================
|
||
# MERGE FUNCTION
|
||
# ============================================================================
|
||
|
||
def merge_tiles(date_str: str, base_path: Path, collection_info: dict) -> bool:
|
||
"""Merge downloaded tiles into single GeoTIFF using GDAL."""
|
||
|
||
single_images_dir = base_path / collection_info['singles_folder'] / date_str
|
||
|
||
# Find all response.tiff files
|
||
file_list = [str(p) for p in single_images_dir.rglob('response.tiff')]
|
||
|
||
if not file_list:
|
||
print(f" ✗ No tiles found to merge")
|
||
return False
|
||
|
||
merged_tif_dir = base_path / collection_info['output_folder']
|
||
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
|
||
merged_tif_dir.mkdir(parents=True, exist_ok=True)
|
||
merged_vrt_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
merged_tif_path = merged_tif_dir / f"{date_str}.tif"
|
||
merged_vrt_path = merged_vrt_dir / f"merged_{date_str}.vrt"
|
||
|
||
try:
|
||
# Create virtual raster from tiles
|
||
print(f" Building VRT from {len(file_list)} tiles...")
|
||
vrt = gdal.BuildVRT(str(merged_vrt_path), file_list)
|
||
|
||
if vrt is None:
|
||
print(f" ✗ Failed to create VRT")
|
||
return False
|
||
|
||
vrt = None # Close VRT
|
||
|
||
# Convert to compressed GeoTIFF
|
||
print(f" Converting to GeoTIFF...")
|
||
options = gdal.TranslateOptions(
|
||
outputType=gdal.GDT_Float32,
|
||
creationOptions=[
|
||
'COMPRESS=LZW',
|
||
'TILED=YES',
|
||
'BLOCKXSIZE=256',
|
||
'BLOCKYSIZE=256',
|
||
'NUM_THREADS=ALL_CPUS'
|
||
]
|
||
)
|
||
result = gdal.Translate(str(merged_tif_path), str(merged_vrt_path), options=options)
|
||
|
||
if result is None:
|
||
print(f" ✗ Failed to convert VRT to TIFF")
|
||
return False
|
||
|
||
result = None # Close dataset
|
||
|
||
print(f" ✓ Merged to {merged_tif_path.name}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f" ✗ Merge failed: {e}")
|
||
return False
|
||
|
||
|
||
# ============================================================================
|
||
# MAIN WORKFLOW
|
||
# ============================================================================
|
||
|
||
def main():
|
||
"""Main download and merge workflow."""
|
||
|
||
# Parse arguments
|
||
parser = argparse.ArgumentParser(
|
||
description='Download Planet imagery with PU optimization (auto-detects 8-band vs legacy 4-band)'
|
||
)
|
||
parser.add_argument('project', help='Project name (angata, chemba, xinavane, aura, etc.)')
|
||
parser.add_argument('--date', default=None, help='Date to download (YYYY-MM-DD). Default: today')
|
||
parser.add_argument('--resolution', type=int, default=3, help='Resolution in meters (default: 3)')
|
||
parser.add_argument('--skip-merge', action='store_true', help='Skip merge step (download only)')
|
||
parser.add_argument('--cleanup', action='store_true', help='Delete intermediate single_images after merge')
|
||
|
||
args = parser.parse_args()
|
||
|
||
# Setup paths
|
||
base_path = Path('../laravel_app/storage/app') / args.project
|
||
if not base_path.exists():
|
||
print(f"✗ Project path not found: {base_path}")
|
||
sys.exit(1)
|
||
|
||
geojson_file = base_path / 'Data' / 'pivot.geojson'
|
||
if not geojson_file.exists():
|
||
print(f"✗ GeoJSON not found: {geojson_file}")
|
||
sys.exit(1)
|
||
|
||
# Determine date
|
||
if args.date:
|
||
date_str = args.date
|
||
else:
|
||
date_str = datetime.date.today().strftime('%Y-%m-%d')
|
||
|
||
print(f"{'='*70}")
|
||
print(f"Planet Download - Auto-Detecting Collection (PU Optimized)")
|
||
print(f"{'='*70}")
|
||
print(f"Project: {args.project}")
|
||
print(f"Date: {date_str}")
|
||
print(f"Resolution: {args.resolution}m")
|
||
|
||
# Setup SentinelHub
|
||
print(f"\nSetting up SentinelHub...")
|
||
config, catalog = setup_config()
|
||
print(f"✓ SentinelHub configured")
|
||
|
||
# Load geometries
|
||
print(f"\nLoading field geometries...")
|
||
gdf = load_and_validate_geojson(geojson_file)
|
||
|
||
# Create adaptive grid
|
||
print(f"\nCreating adaptive grid...")
|
||
bbox_list, _ = create_adaptive_grid(gdf, resolution=args.resolution)
|
||
|
||
if not bbox_list:
|
||
print(f"\n✗ No tiles intersect field geometries. Exiting.")
|
||
sys.exit(1)
|
||
|
||
# Auto-detect collection and get evalscript
|
||
byoc, collection_info = detect_collection(date_str, bbox_list, catalog, date_range_days=7)
|
||
|
||
# Get appropriate evalscript
|
||
evalscript = EVALSCRIPT_8BAND if collection_info['bands'] == 4 and 'new' not in collection_info.get('note', '') else EVALSCRIPT_8BAND
|
||
if '4e56d0cb' not in collection_info['collection_id']:
|
||
evalscript = EVALSCRIPT_4BAND_LEGACY
|
||
|
||
print(f"\n Collection: {collection_info['name']}")
|
||
print(f" Output folder: {collection_info['output_folder']}/")
|
||
|
||
# Check date availability
|
||
print(f"\nChecking data availability for {date_str}...")
|
||
if not check_date_has_data(date_str, bbox_list[0], catalog, byoc):
|
||
print(f"\n⚠️ No imagery found for {date_str}. Exiting without download.")
|
||
sys.exit(0)
|
||
|
||
# Download tiles
|
||
print(f"\n{'='*70}")
|
||
downloaded = download_date(date_str, bbox_list, base_path, config, byoc, evalscript, collection_info, args.resolution)
|
||
|
||
if downloaded == 0:
|
||
print(f"\n✗ No tiles downloaded. Exiting.")
|
||
sys.exit(1)
|
||
|
||
# Merge tiles
|
||
if not args.skip_merge:
|
||
print(f"\n{'='*70}")
|
||
print(f"Merging tiles...")
|
||
if merge_tiles(date_str, base_path, collection_info):
|
||
print(f"✓ Merge complete")
|
||
|
||
# Cleanup intermediate files
|
||
if args.cleanup:
|
||
print(f"\nCleaning up intermediate files...")
|
||
import shutil
|
||
single_images_dir = base_path / collection_info['singles_folder'] / date_str
|
||
merged_vrt_dir = base_path / f"{collection_info['output_folder'].replace('merged_tif', 'merged_virtual')}"
|
||
|
||
try:
|
||
if single_images_dir.exists():
|
||
shutil.rmtree(single_images_dir)
|
||
print(f" ✓ Deleted {collection_info['singles_folder']}/{date_str}")
|
||
|
||
# Clean old VRT files
|
||
for vrt_file in merged_vrt_dir.glob(f"merged_{date_str}.vrt"):
|
||
vrt_file.unlink()
|
||
print(f" ✓ Deleted {vrt_file.name}")
|
||
except Exception as e:
|
||
print(f" ⚠️ Cleanup error: {e}")
|
||
else:
|
||
print(f"✗ Merge failed")
|
||
sys.exit(1)
|
||
|
||
print(f"\n{'='*70}")
|
||
print(f"✓ Done!")
|
||
print(f"Output: {base_path / collection_info['output_folder'] / f'{date_str}.tif'}")
|
||
print(f"{'='*70}")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|