From cfd29fa84dd2f917cc037df5b9ba134ce4e3670a Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 10 Feb 2026 19:51:02 +0100 Subject: [PATCH] sat downloader only starts tiling at 25+ fields in geojson --- python_app/00_download_8band_pu_optimized.py | 65 +++++++++++++++++--- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/python_app/00_download_8band_pu_optimized.py b/python_app/00_download_8band_pu_optimized.py index 1059ae9..5450944 100644 --- a/python_app/00_download_8band_pu_optimized.py +++ b/python_app/00_download_8band_pu_optimized.py @@ -3,11 +3,12 @@ Planet 4-Band Download Script - PU-Optimized (RGB+NIR, Cloud-Masked, uint16) ============================================================================ -Strategy: Minimize Processing Units using three techniques: - 1. 4-band output (RGB+NIR) with cloud masking on server (uint16, not FLOAT32) +Strategy: Adaptive grid + PU optimization + 1. Adaptive grid selection based on estate size: + - Small estates (≤25 fields): 1 bbox per field (minimal downloads) + - Large estates (>25 fields): Fine grid (5×5) with reduce_bbox_sizes=True + 2. 4-band output (RGB+NIR) with cloud masking on server (uint16, not FLOAT32) → Cuts data transfer by ~60% (4 bands uint16 vs 9 bands FLOAT32) - 2. Dynamically reduced bounding boxes (reduce_bbox_sizes=True) - → Shrinks tiles to fit field geometry boundaries, reducing wasted pixels 3. Date availability filtering + geometry-aware grid → Skips empty dates and non-field areas @@ -279,6 +280,56 @@ def load_and_validate_geojson(geojson_path: Path) -> gpd.GeoDataFrame: return gdf +def create_adaptive_grid( + gdf: gpd.GeoDataFrame, + resolution: int = 3, + max_pixels: int = 2500 +) -> Tuple[List[BBox], List[Polygon]]: + """ + Adaptive grid strategy: selects tiling approach based on number of fields. + + For small estates (≤25 fields): One bbox per field. + - Minimizes downloads (John's 1 field → 1 download) + - Simple bounding box per geometry + - No grid complexity + + For large estates (>25 fields): Fine grid (5×5 multiplier) with reduce_bbox_sizes=True. + - Optimized for scattered fields (Angata: 1,185 fields → ~120-150 downloads) + - Balances PU efficiency and download count + - Proven tuning from iterative testing + + Args: + gdf: GeoDataFrame with field geometries + resolution: Pixel resolution in meters (default: 3) + max_pixels: Maximum pixels per tile for fine grid (default: 2500) + + Returns: + (bbox_list, geometry_list) where bbox_list is download grid, geometry_list is field geometries + """ + num_fields = len(gdf) + + if num_fields <= 25: + # Small estate: one bbox per field + print(f"\nSmall estate detected ({num_fields} fields): using 1 bbox per field") + + bbox_list = [] + geometry_list = [] + + for idx, row in gdf.iterrows(): + geom = row.geometry + bounds = geom.bounds # (minx, miny, maxx, maxy) + bbox = BBox([bounds[0], bounds[1], bounds[2], bounds[3]], CRS.WGS84) + bbox_list.append(bbox) + geometry_list.append(geom) + + print(f" ✓ Created {len(bbox_list)} bbox(es) (1 per field)") + return bbox_list, geometry_list + else: + # Large estate: use optimized fine grid + print(f"\nLarge estate detected ({num_fields} fields): using fine grid strategy") + return create_optimal_grid_with_filtering(gdf, resolution, max_pixels) + + def create_optimal_grid_with_filtering( gdf: gpd.GeoDataFrame, resolution: int = 3, @@ -623,9 +674,9 @@ def main(): print(f"\nLoading field geometries...") gdf = load_and_validate_geojson(geojson_file) - # Create optimal grid - print(f"\nCreating optimal grid...") - bbox_list, _ = create_optimal_grid_with_filtering(gdf, resolution=args.resolution) + # Create adaptive grid + print(f"\nCreating adaptive grid...") + bbox_list, _ = create_adaptive_grid(gdf, resolution=args.resolution) if not bbox_list: print(f"\n✗ No tiles intersect field geometries. Exiting.")