{ "cells": [ { "cell_type": "markdown", "id": "31cf9b13", "metadata": {}, "source": [ "#### Load packages and connect to SentinelHub" ] }, { "cell_type": "code", "execution_count": 43, "id": "bc73a8d4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Packages loaded\n", "✓ GDAL warnings suppressed\n" ] } ], "source": [ "import os\n", "import json\n", "import datetime\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from pathlib import Path\n", "from osgeo import gdal\n", "from concurrent.futures import ThreadPoolExecutor, as_completed\n", "from typing import List, Tuple\n", "\n", "from sentinelhub import (\n", " MimeType, CRS, BBox, SentinelHubRequest, SentinelHubDownloadClient,\n", " DataCollection, bbox_to_dimensions, SHConfig, Geometry, SentinelHubCatalog\n", ")\n", "\n", "import time\n", "import shutil\n", "import geopandas as gpd\n", "from shapely.geometry import MultiPolygon, Polygon, box\n", "from shapely.ops import unary_union\n", "\n", "# Configure GDAL to suppress TIFF metadata warnings\n", "gdal.SetConfigOption('CPL_LOG', 'NUL') # Suppress all GDAL warnings on Windows\n", "# Alternative: Only suppress specific warnings\n", "# import warnings\n", "# warnings.filterwarnings('ignore', message='.*TIFFReadDirectory.*')\n", "\n", "config = SHConfig()\n", "catalog = SentinelHubCatalog(config=config)\n", "\n", "print(\"✓ Packages loaded\")\n", "print(\"✓ GDAL warnings suppressed\")" ] }, { "cell_type": "markdown", "id": "9592f960", "metadata": {}, "source": [ "#### Configure credentials" ] }, { "cell_type": "code", "execution_count": 44, "id": "1f1c42ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Credentials configured\n" ] } ], "source": [ "config.sh_client_id = '1a72d811-4f0e-4447-8282-df09608cff44'\n", "config.sh_client_secret = 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos'\n", "\n", "collection_id = '4e56d0cb-c402-40ff-97bb-c2b9e6bfcf2a'\n", "byoc = DataCollection.define_byoc(\n", " collection_id,\n", " name='planet_data_8b',\n", " is_timeless=True\n", ")\n", "\n", "print(\"✓ Credentials configured\")" ] }, { "cell_type": "markdown", "id": "ac09862e", "metadata": {}, "source": [ "#### Set project variables" ] }, { "cell_type": "code", "execution_count": 45, "id": "c09088cf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Project: angata\n", "✓ Days: 7\n", "✓ Resolution: 3m\n", "✓ Max workers (suggested): 5\n", "✓ Download mode: full (geometry=masked, full=full tiles then local clip)\n" ] } ], "source": [ "project = 'angata' # Options: 'chemba', 'xinavane', 'angata'\n", "days = 7 # Number of days to download\n", "empty_folder_question = True # Delete intermediate files after processing\n", "resolution = 3 # Spatial resolution in meters\n", "max_workers = 5 # Number of concurrent downloads\n", "\n", "# New download-mode controls to manage Processing Units (PUs)\n", "# - 'geometry' = use geometry masks for each tile (saves transfer but costs PUs)\n", "# - 'full' = download whole tiles and clip locally (lower PUs, larger transfer)\n", "download_mode = os.environ.get('DOWNLOAD_MODE','full') # 'geometry' or 'full'\n", "# optional: simplify geometries locally before requests (meters)\n", "geometry_simplify_tolerance_m = float(os.environ.get('GEOM_SIMPLIFY_M', 0.0)) # set in meters, 0=off\n", "\n", "print(f\"✓ Project: {project}\")\n", "print(f\"✓ Days: {days}\")\n", "print(f\"✓ Resolution: {resolution}m\")\n", "print(f\"✓ Max workers (suggested): {max_workers}\")\n", "print(f\"✓ Download mode: {download_mode} (geometry=masked, full=full tiles then local clip)\")\n", "if geometry_simplify_tolerance_m and geometry_simplify_tolerance_m > 0:\n", " print(f\"✓ Geometry simplification enabled: {geometry_simplify_tolerance_m} m\")" ] }, { "cell_type": "markdown", "id": "513c186d", "metadata": {}, "source": [ "#### Setup paths" ] }, { "cell_type": "code", "execution_count": 46, "id": "7643c990", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Paths configured\n", " GeoJSON: ..\\laravel_app\\storage\\app\\angata\\Data\\pivot.geojson\n", " Output: ..\\laravel_app\\storage\\app\\angata\\merged_tif_8b_opt\n" ] } ], "source": [ "BASE_PATH = Path('../laravel_app/storage/app') / os.getenv('PROJECT_DIR', project)\n", "BASE_PATH_SINGLE_IMAGES = Path(BASE_PATH / 'single_images_8b_opt')\n", "folder_for_merged_tifs = str(BASE_PATH / 'merged_tif_8b_opt')\n", "folder_for_virtual_raster = str(BASE_PATH / 'merged_virtual_8b_opt')\n", "geojson_file = Path(BASE_PATH / 'Data' / 'pivot.geojson')\n", "\n", "# Create directories\n", "for path in [BASE_PATH_SINGLE_IMAGES, folder_for_merged_tifs, folder_for_virtual_raster]:\n", " Path(path).mkdir(parents=True, exist_ok=True)\n", "\n", "print(f\"✓ Paths configured\")\n", "print(f\" GeoJSON: {geojson_file}\")\n", "print(f\" Output: {folder_for_merged_tifs}\")" ] }, { "cell_type": "markdown", "id": "c152f197", "metadata": {}, "source": [ "#### Define date range" ] }, { "cell_type": "code", "execution_count": 47, "id": "ef3d779a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Date range: 2025-11-21 to 2025-11-27\n", "Slots (7): ['2025-11-21', '2025-11-22', '2025-11-23']...['2025-11-25', '2025-11-26', '2025-11-27']\n" ] } ], "source": [ "days_needed = int(os.environ.get(\"DAYS\", days))\n", "date_str = os.environ.get(\"DATE\")\n", "\n", "if date_str:\n", " end = datetime.datetime.strptime(date_str, \"%Y-%m-%d\").date()\n", "else:\n", " end = datetime.date.today()\n", "\n", "start = end - datetime.timedelta(days=days_needed - 1)\n", "slots = [(start + datetime.timedelta(days=i)).strftime('%Y-%m-%d') for i in range(days_needed)]\n", "\n", "print(f\"Date range: {start} to {end}\")\n", "print(f\"Slots ({len(slots)}): {slots[:3]}...{slots[-3:]}\" if len(slots) > 6 else f\"Slots: {slots}\")" ] }, { "cell_type": "markdown", "id": "ef20b6b1", "metadata": {}, "source": [ "#### Define evalscript (9-band output)" ] }, { "cell_type": "code", "execution_count": 48, "id": "ec14e2e2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Evalscript: 8 spectral bands + UDM1\n" ] } ], "source": [ "evalscript_with_udm = \"\"\"\n", " //VERSION=3\n", " function setup() {\n", " return {\n", " input: [{\n", " bands: [\"coastal_blue\", \"blue\", \"green_i\", \"green\", \"yellow\", \"red\", \"rededge\", \"nir\", \"udm1\"],\n", " units: \"DN\"\n", " }],\n", " output: {\n", " bands: 9,\n", " sampleType: \"FLOAT32\"\n", " }\n", " };\n", " }\n", " function evaluatePixel(sample) {\n", " var scaledCoastalBlue = 2.5 * sample.coastal_blue / 10000;\n", " var scaledBlue = 2.5 * sample.blue / 10000;\n", " var scaledGreenI = 2.5 * sample.green_i / 10000;\n", " var scaledGreen = 2.5 * sample.green / 10000;\n", " var scaledYellow = 2.5 * sample.yellow / 10000;\n", " var scaledRed = 2.5 * sample.red / 10000;\n", " var scaledRedEdge = 2.5 * sample.rededge / 10000;\n", " var scaledNIR = 2.5 * sample.nir / 10000;\n", " var udm1 = sample.udm1;\n", " \n", " return [scaledCoastalBlue, scaledBlue, scaledGreenI, scaledGreen, \n", " scaledYellow, scaledRed, scaledRedEdge, scaledNIR, udm1];\n", " }\n", "\"\"\"\n", "\n", "print(\"✓ Evalscript: 8 spectral bands + UDM1\")" ] }, { "cell_type": "markdown", "id": "2bb94ece", "metadata": {}, "source": [ "#### Load and optimize field geometries" ] }, { "cell_type": "code", "execution_count": 49, "id": "570428d5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Loaded 74 field(s) from GeoJSON\n", " CRS: EPSG:4326\n", " Total area: 43.30 hectares\n", " Bounds: [34.43225471 -1.06123327 34.54932756 -0.75046289]\n" ] } ], "source": [ "# Load GeoJSON\n", "geo_json = gpd.read_file(str(geojson_file))\n", "\n", "# Optionally simplify geometry to reduce server processing units (specified in meters)\n", "if geometry_simplify_tolerance_m and geometry_simplify_tolerance_m > 0:\n", " # approximate meter->degree conversion (valid for small areas)\n", " tol_deg = geometry_simplify_tolerance_m / 111320.0\n", " geo_json['geometry'] = geo_json.geometry.simplify(tol_deg, preserve_topology=True)\n", " print(f\"✓ Simplified geometries by ~{geometry_simplify_tolerance_m} m (≈{tol_deg:.6f}°)\")\n", "\n", "# Calculate area in projected CRS (UTM) for accurate measurement\n", "geo_json_projected = geo_json.to_crs('EPSG:32736') # UTM Zone 36S for Kenya\n", "total_area_ha = geo_json_projected.geometry.area.sum() / 10000\n", "\n", "print(f\"✓ Loaded {len(geo_json)} field(s) from GeoJSON\")\n", "print(f\" CRS: {geo_json.crs}\")\n", "print(f\" Total area: {total_area_ha:.2f} hectares\")\n", "\n", "# Calculate overall bounding box\n", "overall_bounds = geo_json.total_bounds # [minx, miny, maxx, maxy]\n", "print(f\" Bounds: {overall_bounds}\")" ] }, { "cell_type": "markdown", "id": "004f6767", "metadata": {}, "source": [ "#### Create optimized bbox strategy\n", "**Strategy:** Instead of uniform grid, create minimal bboxes that:\n", "1. Cover actual field geometries with small buffer\n", "2. Respect SentinelHub size limits (~2500x2500 px)\n", "3. Minimize overlap and empty space" ] }, { "cell_type": "code", "execution_count": 50, "id": "e095786f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "⚠ Area too large (13033m x 34595m), splitting...\n", " Creating 2x5 grid (10 tiles)\n", "✓ Optimized to 5 tiles (skipped 5 empty tiles)\n", " + Geometry masks save quota by downloading only field pixels\n", "\n", "✓ Using full-tile downloads for 5 tiles — will clip locally to preserve PUs.\n" ] } ], "source": [ "def create_optimal_bboxes(gdf: gpd.GeoDataFrame, resolution: int, max_pixels: int = 2500) -> Tuple[List[BBox], List[Geometry]]:\n", " \"\"\"\n", " Create optimized bounding boxes AND geometries based on actual field polygons.\n", " Using Geometry parameter saves API quota by only downloading field areas.\n", " \n", " Args:\n", " gdf: GeoDataFrame with field geometries\n", " resolution: Target resolution in meters\n", " max_pixels: Maximum image dimension (SentinelHub limit)\n", " \n", " Returns:\n", " Tuple of (bbox_list, geometry_list) - paired for each tile\n", " \"\"\"\n", " bboxes = []\n", " geometries = []\n", " max_size_m = max_pixels * resolution # Maximum bbox size in meters\n", " \n", " # Strategy 1: Try single bbox if area is small enough\n", " total_bounds = gdf.total_bounds\n", " width_m = (total_bounds[2] - total_bounds[0]) * 111320 # Rough conversion to meters\n", " height_m = (total_bounds[3] - total_bounds[1]) * 111320\n", " \n", " # Union all geometries\n", " union_geom = gdf.geometry.union_all()\n", " \n", " if width_m <= max_size_m and height_m <= max_size_m:\n", " # Single bbox covers everything\n", " bbox = BBox(bbox=total_bounds, crs=CRS.WGS84)\n", " bboxes.append(bbox)\n", " # Use actual geometry to mask download area\n", " geometries.append(Geometry(union_geom, crs=CRS.WGS84))\n", " print(f\"✓ Using single bbox: {width_m:.0f}m x {height_m:.0f}m\")\n", " print(f\" + Geometry mask to download only field pixels\")\n", " else:\n", " # Strategy 2: Split into optimal tiles\n", " print(f\"⚠ Area too large ({width_m:.0f}m x {height_m:.0f}m), splitting...\")\n", " \n", " # Calculate grid size needed\n", " nx = int(np.ceil(width_m / max_size_m))\n", " ny = int(np.ceil(height_m / max_size_m))\n", " \n", " print(f\" Creating {nx}x{ny} grid ({nx*ny} tiles)\")\n", " \n", " # Create grid tiles\n", " minx, miny, maxx, maxy = total_bounds\n", " dx = (maxx - minx) / nx\n", " dy = (maxy - miny) / ny\n", " \n", " for i in range(nx):\n", " for j in range(ny):\n", " tile_bbox = [\n", " minx + i * dx,\n", " miny + j * dy,\n", " minx + (i + 1) * dx,\n", " miny + (j + 1) * dy\n", " ]\n", " \n", " # Check if this tile intersects with any field\n", " tile_poly = box(*tile_bbox)\n", " intersection = tile_poly.intersection(union_geom)\n", " \n", " if not intersection.is_empty:\n", " bboxes.append(BBox(bbox=tile_bbox, crs=CRS.WGS84))\n", " # Only download pixels within actual fields\n", " geometries.append(Geometry(intersection, crs=CRS.WGS84))\n", " \n", " print(f\"✓ Optimized to {len(bboxes)} tiles (skipped {nx*ny - len(bboxes)} empty tiles)\")\n", " print(f\" + Geometry masks save quota by downloading only field pixels\")\n", " \n", " return bboxes, geometries\n", "\n", "# Create optimized bboxes with geometry masks\n", "bbox_list, geometry_list = create_optimal_bboxes(geo_json, resolution)\n", "# If user selected full-tile downloads, drop geometry masks and download full tiles then clip locally\n", "if download_mode.lower() in ['full','tile','full_tile'] or download_mode.lower().startswith('f') :\n", " geometry_list = [None] * len(bbox_list)\n", " print(f\"\\n✓ Using full-tile downloads for {len(bbox_list)} tiles — will clip locally to preserve PUs.\")\n", "else:\n", " print(f\"\\n✓ Created {len(bbox_list)} optimized bbox(es) with geometry masks\")\n", " print(f\" This approach downloads ONLY field pixels (saves transfer bandwidth) — but costs PUs!\")" ] }, { "cell_type": "markdown", "id": "c737f54c", "metadata": {}, "source": [ "#### Check image availability (with caching)" ] }, { "cell_type": "code", "execution_count": 51, "id": "c5695e6e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Checking image availability...\n", "✗ 2025-11-21: No images\n", "✓ 2025-11-22: 11 image(s)\n", "✓ 2025-11-23: 7 image(s)\n", "✗ 2025-11-24: No images\n", "✓ 2025-11-22: 11 image(s)\n", "✓ 2025-11-23: 7 image(s)\n", "✗ 2025-11-24: No images\n", "✓ 2025-11-25: 15 image(s)\n", "✗ 2025-11-26: No images\n", "✗ 2025-11-27: No images\n", "\n", "✓ Available: 3/7 dates\n", " Will download: ['2025-11-22', '2025-11-23', '2025-11-25']\n", "✓ 2025-11-25: 15 image(s)\n", "✗ 2025-11-26: No images\n", "✗ 2025-11-27: No images\n", "\n", "✓ Available: 3/7 dates\n", " Will download: ['2025-11-22', '2025-11-23', '2025-11-25']\n" ] } ], "source": [ "def check_availability_batch(slots: List[str], bbox: BBox) -> List[str]:\n", " \"\"\"\n", " Check availability for multiple dates at once (more efficient than one-by-one).\n", " \"\"\"\n", " available = []\n", " \n", " for slot in slots:\n", " try:\n", " search_results = catalog.search(\n", " collection=byoc,\n", " bbox=bbox,\n", " time=(slot, slot),\n", " filter=None\n", " )\n", " \n", " tiles = list(search_results)\n", " if len(tiles) > 0:\n", " available.append(slot)\n", " print(f\"✓ {slot}: {len(tiles)} image(s)\")\n", " else:\n", " print(f\"✗ {slot}: No images\")\n", " except Exception as e:\n", " print(f\"⚠ {slot}: Error - {e}\")\n", " available.append(slot) # Include anyway on error\n", " \n", " return available\n", "\n", "# Check availability using first bbox as representative\n", "print(\"Checking image availability...\")\n", "available_slots = check_availability_batch(slots, bbox_list[0])\n", "\n", "print(f\"\\n✓ Available: {len(available_slots)}/{len(slots)} dates\")\n", "print(f\" Will download: {available_slots}\")" ] }, { "cell_type": "markdown", "id": "71d0cc76", "metadata": {}, "source": [ "#### Batch download with concurrency" ] }, { "cell_type": "code", "execution_count": 52, "id": "10b4f572", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting batch downloads (max 3 concurrent to respect rate limits)...\n", "Mode: full (geometry-masked vs full tiles). Use DOWNLOAD_MODE env var to control.)\n", "\n", "📥 Downloading 2025-11-22 (5 tiles) using mode: full-tile...\n", "✓ 2025-11-22: Downloaded 5 tiles (full-tile)\n", "✓ 2025-11-22: Downloaded 5 tiles (full-tile)\n", "\n", "📥 Downloading 2025-11-23 (5 tiles) using mode: full-tile...\n", "\n", "📥 Downloading 2025-11-23 (5 tiles) using mode: full-tile...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\sentinelhub\\download\\sentinelhub_client.py:93: SHRateLimitWarning: Download rate limit hit\n", " warnings.warn(\"Download rate limit hit\", category=SHRateLimitWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✓ 2025-11-23: Downloaded 5 tiles (full-tile)\n", "\n", "📥 Downloading 2025-11-25 (5 tiles) using mode: full-tile...\n", "\n", "📥 Downloading 2025-11-25 (5 tiles) using mode: full-tile...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\sentinelhub\\download\\sentinelhub_client.py:93: SHRateLimitWarning: Download rate limit hit\n", " warnings.warn(\"Download rate limit hit\", category=SHRateLimitWarning)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\sentinelhub\\download\\sentinelhub_client.py:93: SHRateLimitWarning: Download rate limit hit\n", " warnings.warn(\"Download rate limit hit\", category=SHRateLimitWarning)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\sentinelhub\\download\\sentinelhub_client.py:93: SHRateLimitWarning: Download rate limit hit\n", " warnings.warn(\"Download rate limit hit\", category=SHRateLimitWarning)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\sentinelhub\\download\\sentinelhub_client.py:93: SHRateLimitWarning: Download rate limit hit\n", " warnings.warn(\"Download rate limit hit\", category=SHRateLimitWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✓ 2025-11-25: Downloaded 5 tiles (full-tile)\n", "\n", "✓ All downloads complete in 33.2s\n", " Average: 11.1s per date\n", "\n", "============================================================\n", "MERGING TILES\n", "============================================================\n", " → Performing local cutline (clip to pivot.geojson) to remove off-field pixels and reduce final size\n", " → Created local cut VRT: ..\\laravel_app\\storage\\app\\angata\\merged_virtual_8b_opt\\merged2025-11-22_cut.vrt\n", "\n", "✓ All downloads complete in 33.2s\n", " Average: 11.1s per date\n", "\n", "============================================================\n", "MERGING TILES\n", "============================================================\n", " → Performing local cutline (clip to pivot.geojson) to remove off-field pixels and reduce final size\n", " → Created local cut VRT: ..\\laravel_app\\storage\\app\\angata\\merged_virtual_8b_opt\\merged2025-11-22_cut.vrt\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\44c37a99fe5bb747706a10658affb6de\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\855986baec9161308ff918f62349c6e1\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\abe6b242852711e380bc99123a30da99\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\cac714f168e2c449c0a0ae86a38eb088\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\e63429e25464dbe2a484c1ba97145bf1\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\44c37a99fe5bb747706a10658affb6de\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\abe6b242852711e380bc99123a30da99\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\e63429e25464dbe2a484c1ba97145bf1\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\e63429e25464dbe2a484c1ba97145bf1\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\855986baec9161308ff918f62349c6e1\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\855986baec9161308ff918f62349c6e1\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\cac714f168e2c449c0a0ae86a38eb088\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-22\\cac714f168e2c449c0a0ae86a38eb088\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✓ 2025-11-22: Merged 5 tiles → ..\\laravel_app\\storage\\app\\angata\\merged_tif_8b_opt\\2025-11-22.tif\n", " → Performing local cutline (clip to pivot.geojson) to remove off-field pixels and reduce final size\n", " → Created local cut VRT: ..\\laravel_app\\storage\\app\\angata\\merged_virtual_8b_opt\\merged2025-11-23_cut.vrt\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\04ac50fcaf11336cf94b97345d2f5f9d\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\0dca2be3716bffba34ddd46edb4b4a7c\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\4ae0861e880f110e4e8ba0c715a9986c\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\cfe08c62e52e535a87531f498aeca288\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\d6d988f08279d7e39f8dfe4f93690348\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\04ac50fcaf11336cf94b97345d2f5f9d\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\cfe08c62e52e535a87531f498aeca288\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\d6d988f08279d7e39f8dfe4f93690348\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\d6d988f08279d7e39f8dfe4f93690348\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\4ae0861e880f110e4e8ba0c715a9986c\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\4ae0861e880f110e4e8ba0c715a9986c\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\0dca2be3716bffba34ddd46edb4b4a7c\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-23\\0dca2be3716bffba34ddd46edb4b4a7c\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✓ 2025-11-23: Merged 5 tiles → ..\\laravel_app\\storage\\app\\angata\\merged_tif_8b_opt\\2025-11-23.tif\n", " → Performing local cutline (clip to pivot.geojson) to remove off-field pixels and reduce final size\n", " → Created local cut VRT: ..\\laravel_app\\storage\\app\\angata\\merged_virtual_8b_opt\\merged2025-11-25_cut.vrt\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\9ed3e91999bc22bc1762eb6a9a4e1a11\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\a6497176b35c645a5b85eaa393ab68a5\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\bc02ebb51bca73bdcd030d7584f37756\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\c72a637c832702d2168ca36935ba79be\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4939: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\e8ae64313158e859e50286e563b48fc9\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.BuildVRTInternalNames(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\c72a637c832702d2168ca36935ba79be\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\bc02ebb51bca73bdcd030d7584f37756\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\9ed3e91999bc22bc1762eb6a9a4e1a11\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\9ed3e91999bc22bc1762eb6a9a4e1a11\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\e8ae64313158e859e50286e563b48fc9\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\e8ae64313158e859e50286e563b48fc9\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\a6497176b35c645a5b85eaa393ab68a5\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n", "c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\osgeo\\gdal.py:4793: RuntimeWarning: ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt\\2025-11-25\\a6497176b35c645a5b85eaa393ab68a5\\response.tiff: TIFFReadDirectory:Sum of Photometric type-related color channels and ExtraSamples doesn't match SamplesPerPixel. Defining non-color channels as ExtraSamples.\n", " return _gdal.TranslateInternal(*args)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✓ 2025-11-25: Merged 5 tiles → ..\\laravel_app\\storage\\app\\angata\\merged_tif_8b_opt\\2025-11-25.tif\n", "\n", "✓ Merged 3/3 dates in 21.0s\n" ] } ], "source": [ "def create_download_request(slot: str, bbox: BBox, geometry: Geometry, resolution: int) -> SentinelHubRequest:\n", " \"\"\"\n", " Create a download request for a specific date, bbox, and geometry.\n", " If `geometry` is provided it will be used as a mask; if None, the full bbox tile is requested.\n", " \"\"\"\n", " size = bbox_to_dimensions(bbox, resolution=resolution)\n", " \n", " # Build the base kwargs - optionally include the geometry only when requested\n", " req_kwargs = dict(\n", " evalscript=evalscript_with_udm,\n", " input_data=[\n", " SentinelHubRequest.input_data(\n", " data_collection=byoc,\n", " time_interval=(slot, slot)\n", " )\n", " ],\n", " responses=[\n", " SentinelHubRequest.output_response('default', MimeType.TIFF)\n", " ],\n", " bbox=bbox,\n", " size=size,\n", " config=config,\n", " data_folder=str(BASE_PATH_SINGLE_IMAGES / slot),\n", " )\n", " # Only pass geometry when it's provided AND we're in geometry-mask mode (not full-tile mode)\n", " if geometry is not None and not download_mode.lower().startswith('f'):\n", " req_kwargs['geometry'] = geometry\n", " \n", " request = SentinelHubRequest(**req_kwargs)\n", " return request\n", "\n", "def download_batch(slot: str, bboxes: List[BBox], geometries: List[Geometry], resolution: int, max_workers: int = 3):\n", " \"\"\"\n", " Download all tiles for a date using batch requests.\n", " If geometries contain Geometry objects -> masked downloads (cost PUs).\n", " If geometries contain None -> full-tile downloads (lower PU, larger transfer)\n", " \"\"\"\n", " mode = 'geometry-masked' if any(geom is not None for geom in geometries) else 'full-tile'\n", " print(f\"\\n📥 Downloading {slot} ({len(bboxes)} tiles) using mode: {mode}...\")\n", " \n", " # Create all requests with geometry masks when present (or full tiles when geometry is None)\n", " requests = [create_download_request(slot, bbox, geom, resolution) \n", " for bbox, geom in zip(bboxes, geometries)]\n", " \n", " # Flatten download lists\n", " download_list = []\n", " for req in requests:\n", " download_list.extend(req.download_list)\n", " \n", " # Batch download with rate limit handling\n", " try:\n", " client = SentinelHubDownloadClient(config=config)\n", " # Reduce concurrent threads to respect rate limits and reduce temporary parallel PU spikes\n", " data = client.download(download_list, max_threads=max_workers)\n", " print(f\"✓ {slot}: Downloaded {len(data)} tiles ({mode})\")\n", " time.sleep(0.5) # Small pause between slot downloads\n", " return True\n", " except Exception as e:\n", " print(f\"✗ {slot}: Error - {e}\")\n", " return False\n", "\n", "# Download all dates\n", "# Allow user to influence concurrency via `max_workers` above\n", "adjusted_max_workers = max(1, min(max_workers, 3))\n", "print(f\"Starting batch downloads (max {adjusted_max_workers} concurrent to respect rate limits)...\")\n", "print(f\"Mode: {download_mode} (geometry-masked vs full tiles). Use DOWNLOAD_MODE env var to control.)\")\n", "start_time = time.time()\n", "\n", "for slot in available_slots:\n", " download_batch(slot, bbox_list, geometry_list, resolution, adjusted_max_workers)\n", " time.sleep(1.0) # Increased pause between dates to avoid rate limits\n", "\n", "elapsed = time.time() - start_time\n", "print(f\"\\n✓ All downloads complete in {elapsed:.1f}s\")\n", "if len(available_slots) > 0:\n", " print(f\" Average: {elapsed/len(available_slots):.1f}s per date\")\n", "\n", "# Now merge all downloaded tiles\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"MERGING TILES\")\n", "print(\"=\"*60)\n", "\n", "merge_start = time.time()\n", "success_count = 0\n", "for slot in available_slots:\n", " if merge_files_optimized(slot):\n", " success_count += 1\n", "\n", "merge_elapsed = time.time() - merge_start\n", "print(f\"\\n✓ Merged {success_count}/{len(available_slots)} dates in {merge_elapsed:.1f}s\")" ] }, { "cell_type": "markdown", "id": "ab730d4c", "metadata": {}, "source": [ "#### Efficient merge using streaming VRT" ] }, { "cell_type": "code", "execution_count": 53, "id": "969c34f4", "metadata": {}, "outputs": [], "source": [ "def merge_files_optimized(slot: str):\n", " \"\"\"\n", " Merge tiles efficiently using streaming VRT → TIFF pipeline.\n", " If `download_mode` == 'full' this function will locally crop the VRT using the pivot.geojson cutline\n", " so the final TIFF only contains field pixels (no additional PUs used).\n", " \"\"\"\n", " slot_dir = Path(BASE_PATH_SINGLE_IMAGES / slot)\n", " file_list = [str(p) for p in slot_dir.rglob('response.tiff') if p.is_file()]\n", "\n", " if not file_list:\n", " print(f\"⚠ No files for {slot}\")\n", " return False\n", "\n", " merged_tif_path = str(Path(folder_for_merged_tifs) / f\"{slot}.tif\")\n", " merged_vrt_path = str(Path(folder_for_virtual_raster) / f\"merged{slot}.vrt\")\n", "\n", " try:\n", " # Build VRT\n", " vrt = gdal.BuildVRT(merged_vrt_path, file_list)\n", " if vrt is None:\n", " print(f\"✗ {slot}: VRT build failed\")\n", " return False\n", " vrt = None # Close\n", "\n", " # If we did full-tile downloads, cut locally using the geojson pivot to avoid paying PUs\n", " if download_mode.lower().startswith('f'):\n", " cut_vrt = str(Path(folder_for_virtual_raster) / f\"merged{slot}_cut.vrt\")\n", " try:\n", " print(f\" → Performing local cutline (clip to pivot.geojson) to remove off-field pixels and reduce final size\")\n", " # Use gdal.Warp with cutline to clip to geojson\n", " gdal.Warp(\n", " cut_vrt, \n", " merged_vrt_path, \n", " format='VRT',\n", " cutlineDSName=str(geojson_file),\n", " cropToCutline=True,\n", " dstNodata=0\n", " )\n", " merged_vrt_path = cut_vrt\n", " print(f\" → Created local cut VRT: {cut_vrt}\")\n", " except Exception as e:\n", " print(f\" ⚠ Local cutline warning: {e}\")\n", " print(f\" → Continuing with full VRT (will include off-field pixels)\")\n", "\n", " # Translate to TIFF with optimizations\n", " options = gdal.TranslateOptions(\n", " outputType=gdal.GDT_Float32,\n", " creationOptions=[\n", " 'COMPRESS=LZW',\n", " 'TILED=YES',\n", " 'BLOCKXSIZE=256',\n", " 'BLOCKYSIZE=256',\n", " 'NUM_THREADS=ALL_CPUS'\n", " ]\n", " )\n", " result = gdal.Translate(merged_tif_path, merged_vrt_path, options=options)\n", " \n", " if result is None:\n", " print(f\"✗ {slot}: TIFF conversion failed\")\n", " return False\n", " \n", " result = None # Close\n", " print(f\"✓ {slot}: Merged {len(file_list)} tiles → {merged_tif_path}\")\n", " return True\n", " \n", " except Exception as e:\n", " print(f\"✗ {slot}: Exception - {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return False" ] }, { "cell_type": "markdown", "id": "3a18f161", "metadata": {}, "source": [ "#### Cleanup intermediate files" ] }, { "cell_type": "code", "execution_count": 54, "id": "55b40c9b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✗ Error cleaning ..\\laravel_app\\storage\\app\\angata\\merged_virtual_8b_opt: [WinError 5] Toegang geweigerd: '..\\\\laravel_app\\\\storage\\\\app\\\\angata\\\\merged_virtual_8b_opt'\n", "✗ Error cleaning ..\\laravel_app\\storage\\app\\angata\\single_images_8b_opt: [WinError 5] Toegang geweigerd: '..\\\\laravel_app\\\\storage\\\\app\\\\angata\\\\single_images_8b_opt\\\\2025-11-22\\\\44c37a99fe5bb747706a10658affb6de'\n", "\n", "✓ Cleanup complete\n" ] } ], "source": [ "def cleanup_folders(folders: List[Path], run: bool = True):\n", " \"\"\"\n", " Remove intermediate files to save disk space.\n", " \"\"\"\n", " if not run:\n", " print(\"⊘ Skipping cleanup\")\n", " return\n", " \n", " for folder in folders:\n", " folder = Path(folder)\n", " if not folder.exists():\n", " continue\n", " \n", " try:\n", " # Count before\n", " files_before = sum(1 for _ in folder.rglob('*') if _.is_file())\n", " \n", " # Remove\n", " shutil.rmtree(folder)\n", " folder.mkdir(parents=True, exist_ok=True)\n", " \n", " print(f\"✓ Cleaned {folder.name}: removed {files_before} files\")\n", " except Exception as e:\n", " print(f\"✗ Error cleaning {folder}: {e}\")\n", "\n", "# Cleanup\n", "folders_to_clean = [Path(folder_for_virtual_raster), BASE_PATH_SINGLE_IMAGES]\n", "cleanup_folders(folders_to_clean, run=empty_folder_question)\n", "\n", "print(\"\\n✓ Cleanup complete\")" ] }, { "cell_type": "markdown", "id": "20856a16", "metadata": {}, "source": [ "#### Summary statistics" ] }, { "cell_type": "code", "execution_count": 55, "id": "02b63a4a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "DOWNLOAD SUMMARY\n", "============================================================\n", "Project: angata\n", "Date range: 2025-11-21 to 2025-11-27\n", "Requested dates: 7\n", "Available dates: 3\n", "Downloaded dates: 3\n", "Bboxes used: 5\n", "Total tiles: 15\n", "Output size: 9.5 MB\n", "Avg per date: 3.2 MB\n", "============================================================\n", "Output directory: ..\\laravel_app\\storage\\app\\angata\\merged_tif_8b_opt\n", "============================================================\n", "\n", "EFFICIENCY GAINS vs. Original BBoxSplitter:\n", " Original tiles: 25 (5x5 uniform grid)\n", " Optimized tiles: 5 (geometry-based)\n", " Reduction: 80.0%\n", " Fewer requests: 60\n", "\n", " Additional optimizations:\n", " ✓ Batch concurrent downloads\n", " ✓ Streaming VRT → TIFF merge\n", " ✓ Tiled TIFF output with LZW compression\n", " ✓ Multi-threaded GDAL operations\n" ] } ], "source": [ "# Count output files\n", "output_tifs = list(Path(folder_for_merged_tifs).glob('*.tif'))\n", "total_size_mb = sum(f.stat().st_size for f in output_tifs) / (1024 * 1024)\n", "\n", "print(\"=\"*60)\n", "print(\"DOWNLOAD SUMMARY\")\n", "print(\"=\"*60)\n", "print(f\"Project: {project}\")\n", "print(f\"Date range: {start} to {end}\")\n", "print(f\"Requested dates: {len(slots)}\")\n", "print(f\"Available dates: {len(available_slots)}\")\n", "print(f\"Downloaded dates: {len(output_tifs)}\")\n", "print(f\"Bboxes used: {len(bbox_list)}\")\n", "print(f\"Total tiles: {len(available_slots) * len(bbox_list)}\")\n", "print(f\"Output size: {total_size_mb:.1f} MB\")\n", "print(f\"Avg per date: {total_size_mb/len(output_tifs):.1f} MB\")\n", "print(\"=\"*60)\n", "print(f\"Output directory: {folder_for_merged_tifs}\")\n", "print(\"=\"*60)\n", "\n", "# Efficiency comparison\n", "original_tiles = 25 # 5x5 grid from original\n", "optimized_tiles = len(bbox_list)\n", "reduction_pct = (1 - optimized_tiles/original_tiles) * 100 if original_tiles > 0 else 0\n", "\n", "print(\"\\nEFFICIENCY GAINS vs. Original BBoxSplitter:\")\n", "print(f\" Original tiles: {original_tiles} (5x5 uniform grid)\")\n", "print(f\" Optimized tiles: {optimized_tiles} (geometry-based)\")\n", "print(f\" Reduction: {reduction_pct:.1f}%\")\n", "print(f\" Fewer requests: {(original_tiles - optimized_tiles) * len(available_slots)}\")\n", "print(\"\\n Additional optimizations:\")\n", "print(\" ✓ Batch concurrent downloads\")\n", "print(\" ✓ Streaming VRT → TIFF merge\")\n", "print(\" ✓ Tiled TIFF output with LZW compression\")\n", "print(\" ✓ Multi-threaded GDAL operations\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a6d377fb", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }