SmartCane/r_app/experiments/interactive_sar_visualization/download_s1_aura.py
2025-09-05 15:23:41 +02:00

372 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Sentinel-1 SAR Data Download Script for Aura Fields
===================================================
This script downloads Sentinel-1 SAR data (VV and VH polarizations) using the SentinelHub API
for the last 8 w responses=[
{"identifier": "VV", "format": {"type": "image/tiff"}},
{"identifier": "VH", "format": {"type": "image/tiff"}},
{"identifier": "VV_dB", "format": {"type": "image/tiff"}},
{"identifier": "VH_dB", "format": {"type": "image/tiff"}}
],focusing on the Aura estate fields.
Requirements:
- sentinelhub-py library
- Valid SentinelHub account with credentials
- Field boundaries (geojson file)
Usage:
python download_s1_aura.py
The script will:
1. Load field boundaries from geojson
2. Calculate date range for last 8 weeks
3. Download Sentinel-1 VV and VH data
4. Save as weekly mosaics in GeoTIFF format
5. Apply basic preprocessing (speckle filtering, calibration)
Author: Timon
Date: August 2025
"""
import os
import sys
import json
import logging
from datetime import datetime, timedelta
from pathlib import Path
import numpy as np
import geopandas as gpd
from shapely.geometry import box
import rasterio
from rasterio.transform import from_bounds
from rasterio.crs import CRS
# SentinelHub imports
try:
from sentinelhub import (
SHConfig,
BBox,
CRS as SH_CRS,
DataCollection,
SentinelHubRequest,
MimeType,
bbox_to_dimensions
)
except ImportError as e:
print("Error: sentinelhub-py library not installed.")
print("Please install it using: pip install sentinelhub")
sys.exit(1)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('sar_download.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
class SentinelHubConfig:
"""Configuration class for SentinelHub API"""
def __init__(self):
self.config = SHConfig()
self.setup_credentials()
def setup_credentials(self):
"""Setup SentinelHub credentials"""
# Use the same credentials as in your existing notebooks
self.config.sh_client_id = '1a72d811-4f0e-4447-8282-df09608cff44'
self.config.sh_client_secret = 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos'
# Save the configuration
self.config.save()
logger.info("SentinelHub credentials configured successfully")
class SARDownloader:
"""Main class for downloading Sentinel-1 SAR data"""
def __init__(self, output_dir="data/aura/weekly_SAR_mosaic"):
self.config = SentinelHubConfig()
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
# SAR processing parameters
self.resolution = 10 # 10m resolution
self.max_cloud_coverage = 1.0 # SAR doesn't have clouds, but keeping for consistency
logger.info(f"Initialized SAR downloader with output directory: {self.output_dir}")
def load_field_boundaries(self, geojson_path="data/aura/field_boundaries/aura_fields.geojson"):
"""Load field boundaries from geojson file"""
try:
# Try multiple possible locations for the field boundaries
possible_paths = [
geojson_path,
"pivot.geojson",
"pivot_20210625.geojson",
"data/aura_fields.geojson",
"../pivot.geojson"
]
gdf = None
for path in possible_paths:
if os.path.exists(path):
logger.info(f"Loading field boundaries from: {path}")
gdf = gpd.read_file(path)
break
if gdf is None:
raise FileNotFoundError("Could not find field boundaries file")
# Get overall bounding box
bounds = gdf.total_bounds
# Convert to proper format for SentinelHub BBox: [min_x, min_y, max_x, max_y]
self.bbox = BBox(bbox=[bounds[0], bounds[1], bounds[2], bounds[3]], crs=SH_CRS.WGS84)
logger.info(f"Loaded {len(gdf)} field boundaries")
logger.info(f"Bounding box: {bounds}")
return gdf
except Exception as e:
logger.error(f"Error loading field boundaries: {e}")
# Fallback: create a default bounding box for Aura area (from your pivot.geojson)
logger.warning("Using default field area coordinates from pivot.geojson bounds")
# These coordinates are from your actual field boundaries
default_bounds = [34.510012, -0.96665732, 34.57719348, -0.88375534] # [min_lon, min_lat, max_lon, max_lat]
self.bbox = BBox(bbox=default_bounds, crs=SH_CRS.WGS84)
return None
def calculate_date_range(self, weeks_back=8):
"""Calculate date range for the last N weeks"""
end_date = datetime.now()
start_date = end_date - timedelta(weeks=weeks_back)
# Round to weekly intervals (Monday to Sunday)
days_since_monday = end_date.weekday()
end_date = end_date - timedelta(days=days_since_monday) + timedelta(days=6) # Sunday
start_date = end_date - timedelta(weeks=weeks_back)
logger.info(f"Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
return start_date, end_date
def create_evalscript(self):
"""Create evalscript for Sentinel-1 VV and VH bands"""
evalscript = """
//VERSION=3
function setup() {
return {
input: [{
bands: ["VV", "VH", "dataMask"]
}],
output: {
bands: 3,
sampleType: "FLOAT32"
}
};
}
function evaluatePixel(sample) {
// Return VV, VH and data mask
return [sample.VV, sample.VH, sample.dataMask];
}
"""
return evalscript
def save_geotiff(self, data_array, filepath, bbox):
"""Save data array as GeoTIFF with proper georeferencing"""
height, width = data_array.shape
# Create transform from bounding box
transform = from_bounds(
bbox.min_x, bbox.min_y, bbox.max_x, bbox.max_y,
width, height
)
# Create profile for GeoTIFF
profile = {
'driver': 'GTiff',
'dtype': rasterio.float32,
'nodata': np.nan,
'width': width,
'height': height,
'count': 1,
'crs': 'EPSG:4326',
'transform': transform,
'compress': 'lzw'
}
# Write GeoTIFF
with rasterio.open(filepath, 'w', **profile) as dst:
dst.write(data_array.astype(rasterio.float32), 1)
def download_weekly_sar(self, start_date, end_date):
"""Download SAR data for weekly intervals"""
# Calculate bbox dimensions
bbox_size = bbox_to_dimensions(self.bbox, resolution=self.resolution)
logger.info(f"Image dimensions: {bbox_size}")
# Create evalscript
evalscript = self.create_evalscript()
# Calculate weekly intervals
current_date = start_date
week_num = 1
while current_date < end_date:
week_start = current_date
week_end = current_date + timedelta(days=6)
if week_end > end_date:
week_end = end_date
logger.info(f"Downloading week {week_num}: {week_start.strftime('%Y-%m-%d')} to {week_end.strftime('%Y-%m-%d')}")
try:
# Create request using the working format from your example
request = SentinelHubRequest(
evalscript=evalscript,
input_data=[
SentinelHubRequest.input_data(
data_collection=DataCollection.SENTINEL1_IW,
time_interval=(week_start.strftime('%Y-%m-%d'), week_end.strftime('%Y-%m-%d')),
)
],
responses=[
SentinelHubRequest.output_response('default', MimeType.TIFF)
],
bbox=self.bbox,
size=bbox_size,
config=self.config.config
)
# Execute request
response = request.get_data(save_data=False)
if response and len(response) > 0:
# Get the data array (3 bands: VV, VH, dataMask)
data_array = response[0]
# Extract bands
vv_band = data_array[:, :, 0]
vh_band = data_array[:, :, 1]
mask_band = data_array[:, :, 2]
# Calculate dB versions
vv_db = 10 * np.log10(vv_band + 1e-10) # Add small value to avoid log(0)
vh_db = 10 * np.log10(vh_band + 1e-10)
# Create week identifier
week_str = f"{current_date.isocalendar()[1]:02d}" # ISO week number
year = current_date.year
# Save each band as separate GeoTIFF
bands_to_save = {
'VV': vv_band,
'VH': vh_band,
'VV_dB': vv_db,
'VH_dB': vh_db,
'mask': mask_band
}
for band_name, band_data in bands_to_save.items():
filename = f"week_{week_str}_{year}_{band_name}.tif"
filepath = self.output_dir / filename
# Create GeoTIFF with proper georeferencing
self.save_geotiff(band_data, filepath, self.bbox)
logger.info(f"Saved: {filename}")
week_num += 1
else:
logger.warning(f"No data found for week {week_start.strftime('%Y-%m-%d')}")
except Exception as e:
logger.error(f"Error downloading week {week_start.strftime('%Y-%m-%d')}: {e}")
# Move to next week
current_date = week_end + timedelta(days=1)
def process_downloaded_data(self):
"""Apply basic post-processing to downloaded SAR data"""
logger.info("Starting post-processing of downloaded SAR data...")
# Find all downloaded files
tif_files = list(self.output_dir.glob("*.tif"))
for tif_file in tif_files:
try:
logger.info(f"Processing: {tif_file.name}")
# Read the data
with rasterio.open(tif_file) as src:
data = src.read(1)
profile = src.profile
# Apply basic speckle filtering (simple median filter)
if '_dB' in tif_file.name:
# For dB data, apply median filter
from scipy.ndimage import median_filter
filtered_data = median_filter(data, size=3)
# Save filtered version
filtered_name = tif_file.name.replace('.tif', '_filtered.tif')
filtered_path = self.output_dir / filtered_name
profile.update(dtype=rasterio.float32, count=1)
with rasterio.open(filtered_path, 'w', **profile) as dst:
dst.write(filtered_data.astype(rasterio.float32), 1)
logger.info(f"Created filtered version: {filtered_name}")
except Exception as e:
logger.error(f"Error processing {tif_file.name}: {e}")
def main():
"""Main function to execute the SAR download workflow"""
logger.info("Starting Sentinel-1 SAR download for Aura fields...")
try:
# Initialize downloader
downloader = SARDownloader()
# Load field boundaries
fields = downloader.load_field_boundaries()
# Calculate date range (last 8 weeks)
start_date, end_date = downloader.calculate_date_range(weeks_back=8)
# Download SAR data
downloader.download_weekly_sar(start_date, end_date)
# Post-process data
downloader.process_downloaded_data()
logger.info("SAR download completed successfully!")
logger.info(f"Data saved to: {downloader.output_dir}")
# Print summary
tif_files = list(downloader.output_dir.glob("*.tif"))
logger.info(f"Total files downloaded: {len(tif_files)}")
return True
except Exception as e:
logger.error(f"Error in main execution: {e}")
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)