SmartCane/python_app/experiments/omnicloud/cloud_detection_esa.ipynb
2026-01-06 14:17:37 +01:00

1071 lines
36 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "bd2b8431",
"metadata": {},
"source": [
"# Cloud Detection - ESA Project (August-September 2025)\n",
"\n",
"Download Planet imagery for **ESA** project from **Aug 21 - Sep 5, 2025** to test OmniCloudMask.\n",
"\n",
"**Known cloudy dates:**\n",
"- Aug 25, 2025 (cloudy)\n",
"- Aug 28, 2025 (cloudy)\n",
"- Aug 31, 2025 (possibly cloudy)\n",
"- Sep 5, 2025 (clear)\n",
"\n",
"**Workflow:**\n",
"1. Download images for specified date range\n",
"2. Analyze cloud coverage\n",
"3. Test OmniCloudMask on cloudy images"
]
},
{
"cell_type": "markdown",
"id": "a6ad8657",
"metadata": {},
"source": [
"## 1. Setup and Imports"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "67bde229",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"import datetime\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from pathlib import Path\n",
"from osgeo import gdal\n",
"\n",
"from sentinelhub import (\n",
" MimeType, CRS, BBox, SentinelHubRequest, SentinelHubDownloadClient,\n",
" DataCollection, bbox_to_dimensions, SHConfig, BBoxSplitter, Geometry, SentinelHubCatalog\n",
")\n",
"\n",
"import time\n",
"import shutil\n",
"import geopandas as gpd\n",
"from shapely.geometry import box"
]
},
{
"cell_type": "markdown",
"id": "5f446e5c",
"metadata": {},
"source": [
"## 2. Configure SentinelHub"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "e9c63d3b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✓ SentinelHub configured\n"
]
}
],
"source": [
"config = SHConfig()\n",
"config.sh_client_id = '1a72d811-4f0e-4447-8282-df09608cff44'\n",
"config.sh_client_secret = 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos'\n",
"\n",
"catalog = SentinelHubCatalog(config=config)\n",
"\n",
"# Define BYOC collection\n",
"collection_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c'\n",
"byoc = DataCollection.define_byoc(\n",
" collection_id,\n",
" name='planet_data2',\n",
" is_timeless=True\n",
")\n",
"\n",
"print(\"✓ SentinelHub configured\")"
]
},
{
"cell_type": "markdown",
"id": "9cd89952",
"metadata": {},
"source": [
"## 3. Define Project and Paths"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "f57795c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project: esa\n",
"Base path: ..\\laravel_app\\storage\\app\\esa\n",
"GeoJSON: ..\\laravel_app\\storage\\app\\esa\\Data\\pivot_2.geojson\n",
"✓ Folders created/verified\n"
]
}
],
"source": [
"project = 'esa'\n",
"resolution = 3 # 3m resolution for Planet\n",
"\n",
"# Define paths\n",
"BASE_PATH = Path('../laravel_app/storage/app') / project\n",
"BASE_PATH_SINGLE_IMAGES = BASE_PATH / 'cloud_test_single_images'\n",
"folder_for_merged_tifs = BASE_PATH / 'cloud_test_merged_tif'\n",
"folder_for_virtual_raster = BASE_PATH / 'cloud_test_merged_virtual'\n",
"geojson_file = BASE_PATH / 'Data' / 'pivot_2.geojson' # ESA uses pivot_2.geojson\n",
"\n",
"# Create folders if they don't exist\n",
"for folder in [BASE_PATH_SINGLE_IMAGES, folder_for_merged_tifs, folder_for_virtual_raster]:\n",
" folder.mkdir(parents=True, exist_ok=True)\n",
"\n",
"print(f\"Project: {project}\")\n",
"print(f\"Base path: {BASE_PATH}\")\n",
"print(f\"GeoJSON: {geojson_file}\")\n",
"print(f\"✓ Folders created/verified\")"
]
},
{
"cell_type": "markdown",
"id": "32f683b1",
"metadata": {},
"source": [
"## 4. Define Date Range (Aug 21 - Sep 5, 2025)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "8b0bbe50",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date range: 2024-12-01 to 2024-12-15\n",
"Total days: 15\n",
"\n",
"All dates:\n",
" - 2024-12-01\n",
" - 2024-12-02\n",
" - 2024-12-03\n",
" - 2024-12-04\n",
" - 2024-12-05\n",
" - 2024-12-06\n",
" - 2024-12-07\n",
" - 2024-12-08\n",
" - 2024-12-09\n",
" - 2024-12-10\n",
" - 2024-12-11\n",
" - 2024-12-12\n",
" - 2024-12-13\n",
" - 2024-12-14\n",
" - 2024-12-15\n"
]
}
],
"source": [
"# Specific date range for ESA\n",
"start_date = datetime.date(2024, 12, 1)\n",
"end_date = datetime.date(2024, 12, 15)\n",
"\n",
"# Generate daily slots\n",
"days_needed = (end_date - start_date).days + 1\n",
"slots = [(start_date + datetime.timedelta(days=i)).strftime('%Y-%m-%d') for i in range(days_needed)]\n",
"\n",
"print(f\"Date range: {start_date} to {end_date}\")\n",
"print(f\"Total days: {len(slots)}\")\n",
"print(f\"\\nAll dates:\")\n",
"for slot in slots:\n",
" print(f\" - {slot}\")"
]
},
{
"cell_type": "markdown",
"id": "dbb3847b",
"metadata": {},
"source": [
"## 5. Load Field Boundaries"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "481be17b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded 23 field polygons\n",
"\n",
"Total bounds: [ 31.74544579 -26.83848466 31.87946612 -26.70413176]\n",
"Single bbox would create image of: 4395 x 5002 pixels\n",
"⚠️ Image too large for single download (max 2500x2500)\n",
" Using 2x2 grid to split into smaller tiles...\n",
" Split into 3 tiles\n",
"\n",
"Verifying tile sizes:\n",
" Tile 1: 2213 x 770 pixels ✓\n",
" Tile 2: 918 x 2273 pixels ✓\n",
" Tile 3: 2199 x 2367 pixels ✓\n"
]
}
],
"source": [
"# Load GeoJSON\n",
"geo_json = gpd.read_file(str(geojson_file))\n",
"print(f\"Loaded {len(geo_json)} field polygons\")\n",
"\n",
"# Create geometries\n",
"geometries = [Geometry(geometry, crs=CRS.WGS84) for geometry in geo_json.geometry]\n",
"shapely_geometries = [geometry.geometry for geometry in geometries]\n",
"\n",
"# Get total bounds\n",
"total_bounds = geo_json.total_bounds # [minx, miny, maxx, maxy]\n",
"print(f\"\\nTotal bounds: {total_bounds}\")\n",
"\n",
"# Calculate approximate image size for single bbox\n",
"single_bbox_test = BBox(bbox=tuple(total_bounds), crs=CRS.WGS84)\n",
"single_size = bbox_to_dimensions(single_bbox_test, resolution=resolution)\n",
"print(f\"Single bbox would create image of: {single_size[0]} x {single_size[1]} pixels\")\n",
"\n",
"# SentinelHub limit is 2500x2500 pixels\n",
"if single_size[0] > 2500 or single_size[1] > 2500:\n",
" print(f\"⚠️ Image too large for single download (max 2500x2500)\")\n",
" print(f\" Using 2x2 grid to split into smaller tiles...\")\n",
" \n",
" # Use BBoxSplitter with 2x2 grid\n",
" bbox_splitter = BBoxSplitter(\n",
" shapely_geometries, CRS.WGS84, (2, 2), reduce_bbox_sizes=True\n",
" )\n",
" bbox_list = bbox_splitter.get_bbox_list()\n",
" print(f\" Split into {len(bbox_list)} tiles\")\n",
"else:\n",
" print(f\"✓ Single bbox works - using 1 tile per date\")\n",
" bbox_list = [single_bbox_test]\n",
"\n",
"# Verify tile sizes\n",
"print(f\"\\nVerifying tile sizes:\")\n",
"for i, bbox in enumerate(bbox_list, 1):\n",
" size = bbox_to_dimensions(bbox, resolution=resolution)\n",
" status = \"✓\" if size[0] <= 2500 and size[1] <= 2500 else \"✗\"\n",
" print(f\" Tile {i}: {size[0]} x {size[1]} pixels {status}\")"
]
},
{
"cell_type": "markdown",
"id": "b52639b9",
"metadata": {},
"source": [
"## 6. Check Image Availability"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "88750e5d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Checking image availability...\n",
"\n",
"============================================================\n",
"Total requested dates: 15\n",
"Available dates: 14\n",
"Excluded (no data): 1\n",
"============================================================\n",
"\n",
"Available dates:\n",
" - 2024-12-01 \n",
" - 2024-12-02 \n",
" - 2024-12-03 \n",
" - 2024-12-04 \n",
" - 2024-12-05 \n",
" - 2024-12-06 \n",
" - 2024-12-07 \n",
" - 2024-12-08 \n",
" - 2024-12-09 \n",
" - 2024-12-10 \n",
" - 2024-12-11 \n",
" - 2024-12-12 \n",
" - 2024-12-13 \n",
" - 2024-12-15 \n",
"\n",
"============================================================\n",
"Total requested dates: 15\n",
"Available dates: 14\n",
"Excluded (no data): 1\n",
"============================================================\n",
"\n",
"Available dates:\n",
" - 2024-12-01 \n",
" - 2024-12-02 \n",
" - 2024-12-03 \n",
" - 2024-12-04 \n",
" - 2024-12-05 \n",
" - 2024-12-06 \n",
" - 2024-12-07 \n",
" - 2024-12-08 \n",
" - 2024-12-09 \n",
" - 2024-12-10 \n",
" - 2024-12-11 \n",
" - 2024-12-12 \n",
" - 2024-12-13 \n",
" - 2024-12-15 \n"
]
}
],
"source": [
"def is_image_available(date):\n",
" \"\"\"Check if Planet images are available for a given date.\"\"\"\n",
" for bbox in bbox_list:\n",
" search_iterator = catalog.search(\n",
" collection=byoc,\n",
" bbox=bbox,\n",
" time=(date, date)\n",
" )\n",
" if len(list(search_iterator)) > 0:\n",
" return True\n",
" return False\n",
"\n",
"# Filter to available dates only\n",
"print(\"Checking image availability...\")\n",
"available_slots = [slot for slot in slots if is_image_available(slot)]\n",
"\n",
"print(f\"\\n{'='*60}\")\n",
"print(f\"Total requested dates: {len(slots)}\")\n",
"print(f\"Available dates: {len(available_slots)}\")\n",
"print(f\"Excluded (no data): {len(slots) - len(available_slots)}\")\n",
"print(f\"{'='*60}\")\n",
"print(f\"\\nAvailable dates:\")\n",
"for slot in available_slots:\n",
" # Mark known cloudy dates\n",
" if slot in ['2025-08-25', '2025-08-28']:\n",
" marker = \"☁️ (known cloudy)\"\n",
" elif slot == '2025-08-31':\n",
" marker = \"⛅ (possibly cloudy)\"\n",
" elif slot == '2025-09-05':\n",
" marker = \"☀️ (known clear)\"\n",
" else:\n",
" marker = \"\"\n",
" print(f\" - {slot} {marker}\")"
]
},
{
"cell_type": "markdown",
"id": "8f62e152",
"metadata": {},
"source": [
"## 7. Define Download Functions"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "86a4761c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✓ Download functions defined\n"
]
}
],
"source": [
"# Evalscript to get RGB + NIR + UDM1 mask\n",
"# NOTE: Not specifying sampleType makes SentinelHub auto-convert 0-1 float to 0-255 byte\n",
"evalscript_with_udm = \"\"\"\n",
" //VERSION=3\n",
"\n",
" function setup() {\n",
" return {\n",
" input: [{\n",
" bands: [\"red\", \"green\", \"blue\", \"nir\", \"udm1\"]\n",
" }],\n",
" output: {\n",
" bands: 5\n",
" // sampleType not specified -> auto-converts to 0-255 byte\n",
" }\n",
" };\n",
" }\n",
"\n",
" function evaluatePixel(sample) {\n",
" // Return all bands including udm1 (last band)\n",
" return [\n",
" 2.5 * sample.red / 10000,\n",
" 2.5 * sample.green / 10000,\n",
" 2.5 * sample.blue / 10000,\n",
" 2.5 * sample.nir / 10000,\n",
" sample.udm1 // 0 = usable, 1 = unusable (clouds, shadows, etc.)\n",
" ];\n",
" }\n",
"\"\"\"\n",
"\n",
"def get_download_request(time_interval, bbox, size):\n",
" \"\"\"Create a SentinelHub request for a given date and bbox.\"\"\"\n",
" return SentinelHubRequest(\n",
" evalscript=evalscript_with_udm,\n",
" input_data=[\n",
" SentinelHubRequest.input_data(\n",
" data_collection=DataCollection.planet_data2,\n",
" time_interval=(time_interval, time_interval)\n",
" )\n",
" ],\n",
" responses=[\n",
" SentinelHubRequest.output_response('default', MimeType.TIFF)\n",
" ],\n",
" bbox=bbox,\n",
" size=size,\n",
" config=config,\n",
" data_folder=str(BASE_PATH_SINGLE_IMAGES / time_interval),\n",
" )\n",
"\n",
"def download_for_date_and_bbox(slot, bbox, size):\n",
" \"\"\"Download image for a specific date and bounding box.\"\"\"\n",
" list_of_requests = [get_download_request(slot, bbox, size)]\n",
" list_of_requests = [request.download_list[0] for request in list_of_requests]\n",
" \n",
" data = SentinelHubDownloadClient(config=config).download(list_of_requests, max_threads=5)\n",
" time.sleep(0.1)\n",
" return data\n",
"\n",
"def merge_tiles_for_date(slot):\n",
" \"\"\"Merge all tiles for a given date into one GeoTIFF.\"\"\"\n",
" # List downloaded tiles\n",
" file_list = [str(x / \"response.tiff\") for x in Path(BASE_PATH_SINGLE_IMAGES / slot).iterdir() if x.is_dir()]\n",
" \n",
" if not file_list:\n",
" print(f\" No tiles found for {slot}\")\n",
" return None\n",
" \n",
" vrt_path = str(folder_for_virtual_raster / f\"merged_{slot}.vrt\")\n",
" output_path = str(folder_for_merged_tifs / f\"{slot}.tif\")\n",
" \n",
" # Create virtual raster\n",
" vrt_options = gdal.BuildVRTOptions(\n",
" resolution='highest',\n",
" separate=False,\n",
" addAlpha=False\n",
" )\n",
" vrt = gdal.BuildVRT(vrt_path, file_list, options=vrt_options)\n",
" vrt = None\n",
" \n",
" # Convert to GeoTIFF\n",
" translate_options = gdal.TranslateOptions(\n",
" creationOptions=['COMPRESS=LZW', 'TILED=YES', 'BIGTIFF=IF_SAFER']\n",
" )\n",
" gdal.Translate(output_path, vrt_path, options=translate_options)\n",
" \n",
" return output_path\n",
"\n",
"print(\"✓ Download functions defined\")"
]
},
{
"cell_type": "markdown",
"id": "7daf4805",
"metadata": {},
"source": [
"## 8. Download Images"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "8c48541f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting download for 14 dates...\n",
"\n",
"[1/14] Downloading 2024-12-01...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\timon\\anaconda3\\Lib\\site-packages\\sentinelhub\\geometry.py:137: SHDeprecationWarning: Initializing `BBox` objects from `BBox` objects will no longer be possible in future versions.\n",
" return cls._tuple_from_bbox(bbox)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[2/14] Downloading 2024-12-02...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[2/14] Downloading 2024-12-02...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[3/14] Downloading 2024-12-03...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[3/14] Downloading 2024-12-03...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[4/14] Downloading 2024-12-04...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[4/14] Downloading 2024-12-04...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[5/14] Downloading 2024-12-05...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[5/14] Downloading 2024-12-05...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[6/14] Downloading 2024-12-06...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[6/14] Downloading 2024-12-06...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[7/14] Downloading 2024-12-07...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[7/14] Downloading 2024-12-07...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[8/14] Downloading 2024-12-08...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[8/14] Downloading 2024-12-08...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[9/14] Downloading 2024-12-09...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[9/14] Downloading 2024-12-09...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[10/14] Downloading 2024-12-10...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[10/14] Downloading 2024-12-10...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[11/14] Downloading 2024-12-11...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[11/14] Downloading 2024-12-11...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[12/14] Downloading 2024-12-12...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[12/14] Downloading 2024-12-12...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[13/14] Downloading 2024-12-13...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[13/14] Downloading 2024-12-13...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[14/14] Downloading 2024-12-15...\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"[14/14] Downloading 2024-12-15...\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 1/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 2/3 downloaded\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"\n",
"✓ All downloads complete!\n",
" ✓ Tile 3/3 downloaded\n",
"\n",
"\n",
"✓ All downloads complete!\n"
]
}
],
"source": [
"print(f\"Starting download for {len(available_slots)} dates...\\n\")\n",
"\n",
"for i, slot in enumerate(available_slots, 1):\n",
" print(f\"[{i}/{len(available_slots)}] Downloading {slot}...\")\n",
" \n",
" for j, bbox in enumerate(bbox_list, 1):\n",
" bbox_obj = BBox(bbox=bbox, crs=CRS.WGS84)\n",
" size = bbox_to_dimensions(bbox_obj, resolution=resolution)\n",
" \n",
" try:\n",
" download_for_date_and_bbox(slot, bbox_obj, size)\n",
" print(f\" ✓ Tile {j}/{len(bbox_list)} downloaded\")\n",
" except Exception as e:\n",
" print(f\" ✗ Tile {j}/{len(bbox_list)} failed: {e}\")\n",
" \n",
" time.sleep(0.2)\n",
" \n",
" print()\n",
"\n",
"print(\"\\n✓ All downloads complete!\")"
]
},
{
"cell_type": "markdown",
"id": "d6bae285",
"metadata": {},
"source": [
"## 9. Merge Tiles"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "e6fb1492",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Merging tiles for each date...\n",
"\n",
"Merging 2024-12-01...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-01.tif\n",
"Merging 2024-12-02...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-01.tif\n",
"Merging 2024-12-02...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-02.tif\n",
"Merging 2024-12-03...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-02.tif\n",
"Merging 2024-12-03...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-03.tif\n",
"Merging 2024-12-04...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-03.tif\n",
"Merging 2024-12-04...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-04.tif\n",
"Merging 2024-12-05...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-04.tif\n",
"Merging 2024-12-05...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-05.tif\n",
"Merging 2024-12-06...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-05.tif\n",
"Merging 2024-12-06...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-06.tif\n",
"Merging 2024-12-07...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-06.tif\n",
"Merging 2024-12-07...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-07.tif\n",
"Merging 2024-12-08...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-07.tif\n",
"Merging 2024-12-08...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-08.tif\n",
"Merging 2024-12-09...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-08.tif\n",
"Merging 2024-12-09...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-09.tif\n",
"Merging 2024-12-10...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-09.tif\n",
"Merging 2024-12-10...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-10.tif\n",
"Merging 2024-12-11...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-10.tif\n",
"Merging 2024-12-11...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-11.tif\n",
"Merging 2024-12-12...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-11.tif\n",
"Merging 2024-12-12...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-12.tif\n",
"Merging 2024-12-13...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-12.tif\n",
"Merging 2024-12-13...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-13.tif\n",
"Merging 2024-12-15...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-13.tif\n",
"Merging 2024-12-15...\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-15.tif\n",
"\n",
"✓ Successfully merged 14 images\n",
" ✓ Saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_test_merged_tif\\2024-12-15.tif\n",
"\n",
"✓ Successfully merged 14 images\n"
]
}
],
"source": [
"print(\"Merging tiles for each date...\\n\")\n",
"\n",
"merged_files = {}\n",
"for slot in available_slots:\n",
" print(f\"Merging {slot}...\")\n",
" output_path = merge_tiles_for_date(slot)\n",
" if output_path:\n",
" merged_files[slot] = output_path\n",
" print(f\" ✓ Saved to: {output_path}\")\n",
" else:\n",
" print(f\" ✗ Failed to merge\")\n",
"\n",
"print(f\"\\n✓ Successfully merged {len(merged_files)} images\")"
]
},
{
"cell_type": "markdown",
"id": "e497dc04",
"metadata": {},
"source": [
"## 10. Analyze Cloud Coverage Using UDM1"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "8d69405d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Analyzing cloud coverage...\n",
"\n",
"Date Cloud % Status Note\n",
"----------------------------------------------------------------------\n",
"2024-12-01 0.39% ☀️ Clear \n",
"2024-12-02 0.30% ☀️ Clear \n",
"2024-12-03 0.54% ☀️ Clear \n",
"2024-12-04 0.12% ☀️ Clear \n",
"2024-12-05 0.00% ☀️ Clear \n",
"2024-12-03 0.54% ☀️ Clear \n",
"2024-12-04 0.12% ☀️ Clear \n",
"2024-12-05 0.00% ☀️ Clear \n",
"2024-12-06 0.07% ☀️ Clear \n",
"2024-12-07 2.38% ☀️ Clear \n",
"2024-12-06 0.07% ☀️ Clear \n",
"2024-12-07 2.38% ☀️ Clear \n",
"2024-12-08 0.78% ☀️ Clear \n",
"2024-12-09 2.30% ☀️ Clear \n",
"2024-12-08 0.78% ☀️ Clear \n",
"2024-12-09 2.30% ☀️ Clear \n",
"2024-12-10 0.00% ☀️ Clear \n",
"2024-12-11 0.00% ☀️ Clear \n",
"2024-12-10 0.00% ☀️ Clear \n",
"2024-12-11 0.00% ☀️ Clear \n",
"2024-12-12 0.00% ☀️ Clear \n",
"2024-12-13 0.06% ☀️ Clear \n",
"2024-12-15 0.60% ☀️ Clear \n",
"\n",
"✓ Analysis complete for 14 images\n",
"2024-12-12 0.00% ☀️ Clear \n",
"2024-12-13 0.06% ☀️ Clear \n",
"2024-12-15 0.60% ☀️ Clear \n",
"\n",
"✓ Analysis complete for 14 images\n"
]
}
],
"source": [
"def analyze_cloud_coverage(tif_path):\n",
" \"\"\"Calculate cloud coverage percentage using UDM1 band (band 5).\"\"\"\n",
" ds = gdal.Open(tif_path)\n",
" if ds is None:\n",
" return None, None\n",
" \n",
" # Band 5 is UDM1 (0 = clear, 1 = cloudy/unusable)\n",
" udm_band = ds.GetRasterBand(5).ReadAsArray()\n",
" \n",
" total_pixels = udm_band.size\n",
" cloudy_pixels = np.sum(udm_band > 0) # > 0 to catch any non-zero values\n",
" cloud_percentage = (cloudy_pixels / total_pixels) * 100\n",
" \n",
" ds = None\n",
" return cloud_percentage, udm_band\n",
"\n",
"# Analyze all images\n",
"cloud_stats = {}\n",
"print(\"Analyzing cloud coverage...\\n\")\n",
"print(f\"{'Date':<14} {'Cloud %':<10} {'Status':<20} {'Note'}\")\n",
"print(\"-\" * 70)\n",
"\n",
"for date, path in sorted(merged_files.items()):\n",
" cloud_pct, _ = analyze_cloud_coverage(path)\n",
" if cloud_pct is not None:\n",
" cloud_stats[date] = cloud_pct\n",
" \n",
" # Categorize\n",
" if cloud_pct < 5:\n",
" status = \"☀️ Clear\"\n",
" elif cloud_pct < 20:\n",
" status = \"🌤️ Mostly clear\"\n",
" elif cloud_pct < 50:\n",
" status = \"⛅ Partly cloudy\"\n",
" else:\n",
" status = \"☁️ Very cloudy\"\n",
" \n",
" # Add known status\n",
" if date in ['2025-08-25', '2025-08-28']:\n",
" note = \"(expected cloudy)\"\n",
" elif date == '2025-09-05':\n",
" note = \"(expected clear)\"\n",
" else:\n",
" note = \"\"\n",
" \n",
" print(f\"{date:<14} {cloud_pct:>6.2f}% {status:<20} {note}\")\n",
"\n",
"print(f\"\\n✓ Analysis complete for {len(cloud_stats)} images\")"
]
},
{
"cell_type": "markdown",
"id": "9a7b1152",
"metadata": {},
"source": [
"## 11. Visualize Cloudy Images"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "2e9f40a3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating visualizations for key dates...\n",
"\n",
"\n",
"✓ Visualizations complete\n"
]
}
],
"source": [
"def create_quicklook(tif_path, date, cloud_pct):\n",
" \"\"\"Create RGB quicklook with UDM1 overlay.\"\"\"\n",
" ds = gdal.Open(tif_path)\n",
" if ds is None:\n",
" return None\n",
" \n",
" # Read RGB bands (1=R, 2=G, 3=B) - values in 0-255 range\n",
" red = ds.GetRasterBand(1).ReadAsArray()\n",
" green = ds.GetRasterBand(2).ReadAsArray()\n",
" blue = ds.GetRasterBand(3).ReadAsArray()\n",
" udm = ds.GetRasterBand(5).ReadAsArray()\n",
" \n",
" # Normalize to 0-1 range for display\n",
" rgb = np.dstack([red/255.0, green/255.0, blue/255.0])\n",
" rgb = np.clip(rgb, 0, 1)\n",
" \n",
" # Create figure\n",
" fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n",
" \n",
" # RGB image\n",
" axes[0].imshow(rgb)\n",
" axes[0].set_title(f\"RGB - {date}\", fontsize=14, fontweight='bold')\n",
" axes[0].axis('off')\n",
" \n",
" # UDM1 mask (clouds in red)\n",
" cloud_overlay = rgb.copy()\n",
" cloud_overlay[udm > 0] = [1, 0, 0] # Red for clouds\n",
" axes[1].imshow(cloud_overlay)\n",
" axes[1].set_title(f\"Cloud Mask (UDM1) - {cloud_pct:.1f}% cloudy\", fontsize=14, fontweight='bold')\n",
" axes[1].axis('off')\n",
" \n",
" plt.tight_layout()\n",
" ds = None\n",
" return fig\n",
"\n",
"# Display known cloudy dates\n",
"target_dates = ['2025-08-21','2025-08-25', '2025-08-28', '2025-08-31', '2025-09-01', '2025-09-05']\n",
"print(\"Generating visualizations for key dates...\\n\")\n",
"\n",
"for date in target_dates:\n",
" if date in merged_files and date in cloud_stats:\n",
" print(f\"Visualizing {date} ({cloud_stats[date]:.1f}% cloudy)\")\n",
" fig = create_quicklook(merged_files[date], date, cloud_stats[date])\n",
" if fig:\n",
" plt.show()\n",
" plt.close()\n",
"\n",
"print(\"\\n✓ Visualizations complete\")"
]
},
{
"cell_type": "markdown",
"id": "67d213ab",
"metadata": {},
"source": [
"## 12. Export Summary for OmniCloudMask Testing"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "2414f82a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"======================================================================\n",
"IMAGES FOR OMNICLOUDMASK TESTING (ESA)\n",
"======================================================================\n",
"\n",
"Rank Date Cloud % Path\n",
"----------------------------------------------------------------------------------------------------\n",
"\n",
"✓ Summary saved to: ..\\laravel_app\\storage\\app\\esa\\cloud_detection_summary_esa.json\n",
"\n",
"======================================================================\n",
"NEXT STEP: Use cloud_detection_step2_test_omnicloudmask.ipynb\n",
"Update it to use ESA project and these cloudy images\n",
"======================================================================\n"
]
}
],
"source": [
"# Select cloudy images for testing\n",
"test_candidates = [\n",
" (date, cloud_pct, merged_files[date]) \n",
" for date, cloud_pct in cloud_stats.items() \n",
" if cloud_pct > 10 # At least 10% clouds\n",
"]\n",
"test_candidates.sort(key=lambda x: x[1], reverse=True)\n",
"\n",
"print(\"\\n\" + \"=\"*70)\n",
"print(\"IMAGES FOR OMNICLOUDMASK TESTING (ESA)\")\n",
"print(\"=\"*70)\n",
"print(f\"\\n{'Rank':<6} {'Date':<14} {'Cloud %':<10} {'Path'}\")\n",
"print(\"-\" * 100)\n",
"\n",
"for i, (date, cloud_pct, path) in enumerate(test_candidates, 1):\n",
" print(f\"{i:<6} {date:<14} {cloud_pct:>6.2f}% {path}\")\n",
"\n",
"# Save summary\n",
"summary = {\n",
" \"project\": project,\n",
" \"date_range\": f\"{start_date} to {end_date}\",\n",
" \"total_dates\": len(slots),\n",
" \"available_dates\": len(available_slots),\n",
" \"cloud_statistics\": cloud_stats,\n",
" \"test_candidates\": [\n",
" {\"date\": date, \"cloud_percentage\": cloud_pct, \"path\": path}\n",
" for date, cloud_pct, path in test_candidates\n",
" ],\n",
" \"merged_files\": merged_files\n",
"}\n",
"\n",
"summary_path = BASE_PATH / 'cloud_detection_summary_esa.json'\n",
"with open(summary_path, 'w') as f:\n",
" json.dump(summary, f, indent=2)\n",
"\n",
"print(f\"\\n✓ Summary saved to: {summary_path}\")\n",
"print(\"\\n\" + \"=\"*70)\n",
"print(\"NEXT STEP: Use cloud_detection_step2_test_omnicloudmask.ipynb\")\n",
"print(\"Update it to use ESA project and these cloudy images\")\n",
"print(\"=\"*70)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}