sat downloader only starts tiling at 25+ fields in geojson

This commit is contained in:
Timon 2026-02-10 19:51:02 +01:00
parent e5a48ca447
commit cfd29fa84d

View file

@ -3,11 +3,12 @@
Planet 4-Band Download Script - PU-Optimized (RGB+NIR, Cloud-Masked, uint16)
============================================================================
Strategy: Minimize Processing Units using three techniques:
1. 4-band output (RGB+NIR) with cloud masking on server (uint16, not FLOAT32)
Strategy: Adaptive grid + PU optimization
1. Adaptive grid selection based on estate size:
- Small estates (25 fields): 1 bbox per field (minimal downloads)
- Large estates (>25 fields): Fine grid (5×5) with reduce_bbox_sizes=True
2. 4-band output (RGB+NIR) with cloud masking on server (uint16, not FLOAT32)
Cuts data transfer by ~60% (4 bands uint16 vs 9 bands FLOAT32)
2. Dynamically reduced bounding boxes (reduce_bbox_sizes=True)
Shrinks tiles to fit field geometry boundaries, reducing wasted pixels
3. Date availability filtering + geometry-aware grid
Skips empty dates and non-field areas
@ -279,6 +280,56 @@ def load_and_validate_geojson(geojson_path: Path) -> gpd.GeoDataFrame:
return gdf
def create_adaptive_grid(
gdf: gpd.GeoDataFrame,
resolution: int = 3,
max_pixels: int = 2500
) -> Tuple[List[BBox], List[Polygon]]:
"""
Adaptive grid strategy: selects tiling approach based on number of fields.
For small estates (25 fields): One bbox per field.
- Minimizes downloads (John's 1 field → 1 download)
- Simple bounding box per geometry
- No grid complexity
For large estates (>25 fields): Fine grid (5×5 multiplier) with reduce_bbox_sizes=True.
- Optimized for scattered fields (Angata: 1,185 fields ~120-150 downloads)
- Balances PU efficiency and download count
- Proven tuning from iterative testing
Args:
gdf: GeoDataFrame with field geometries
resolution: Pixel resolution in meters (default: 3)
max_pixels: Maximum pixels per tile for fine grid (default: 2500)
Returns:
(bbox_list, geometry_list) where bbox_list is download grid, geometry_list is field geometries
"""
num_fields = len(gdf)
if num_fields <= 25:
# Small estate: one bbox per field
print(f"\nSmall estate detected ({num_fields} fields): using 1 bbox per field")
bbox_list = []
geometry_list = []
for idx, row in gdf.iterrows():
geom = row.geometry
bounds = geom.bounds # (minx, miny, maxx, maxy)
bbox = BBox([bounds[0], bounds[1], bounds[2], bounds[3]], CRS.WGS84)
bbox_list.append(bbox)
geometry_list.append(geom)
print(f" ✓ Created {len(bbox_list)} bbox(es) (1 per field)")
return bbox_list, geometry_list
else:
# Large estate: use optimized fine grid
print(f"\nLarge estate detected ({num_fields} fields): using fine grid strategy")
return create_optimal_grid_with_filtering(gdf, resolution, max_pixels)
def create_optimal_grid_with_filtering(
gdf: gpd.GeoDataFrame,
resolution: int = 3,
@ -623,9 +674,9 @@ def main():
print(f"\nLoading field geometries...")
gdf = load_and_validate_geojson(geojson_file)
# Create optimal grid
print(f"\nCreating optimal grid...")
bbox_list, _ = create_optimal_grid_with_filtering(gdf, resolution=args.resolution)
# Create adaptive grid
print(f"\nCreating adaptive grid...")
bbox_list, _ = create_adaptive_grid(gdf, resolution=args.resolution)
if not bbox_list:
print(f"\n✗ No tiles intersect field geometries. Exiting.")