726 lines
23 KiB
Plaintext
726 lines
23 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5ea10771",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Cloud Detection - Step 1: Identify Cloudy Images\n",
|
||
"\n",
|
||
"This notebook downloads Planet imagery for the **Aura** project (last 3 weeks) and helps identify which images contain clouds.\n",
|
||
"\n",
|
||
"**Workflow:**\n",
|
||
"1. Connect to SentinelHub\n",
|
||
"2. Define Aura project area\n",
|
||
"3. Download images from last 3 weeks\n",
|
||
"4. Generate quick-look visualizations\n",
|
||
"5. Identify cloudy images for testing with OmniCloudMask"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "4f43a8b9",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 1. Setup and Imports"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1b300ebc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Install required packages (uncomment if needed)\n",
|
||
"# !pip install sentinelhub\n",
|
||
"# !pip install geopandas matplotlib pillow\n",
|
||
"\n",
|
||
"import os\n",
|
||
"import json\n",
|
||
"import datetime\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"from pathlib import Path\n",
|
||
"from osgeo import gdal\n",
|
||
"\n",
|
||
"from sentinelhub import (\n",
|
||
" MimeType, CRS, BBox, SentinelHubRequest, SentinelHubDownloadClient,\n",
|
||
" DataCollection, bbox_to_dimensions, SHConfig, BBoxSplitter, Geometry, SentinelHubCatalog\n",
|
||
")\n",
|
||
"\n",
|
||
"import time\n",
|
||
"import shutil\n",
|
||
"import geopandas as gpd\n",
|
||
"from shapely.geometry import MultiLineString, MultiPolygon, Polygon\n",
|
||
"from PIL import Image"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "6b0d9534",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 2. Configure SentinelHub"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "72a2d6ca",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"config = SHConfig()\n",
|
||
"config.sh_client_id = '1a72d811-4f0e-4447-8282-df09608cff44'\n",
|
||
"config.sh_client_secret = 'FcBlRL29i9ZmTzhmKTv1etSMFs5PxSos'\n",
|
||
"\n",
|
||
"catalog = SentinelHubCatalog(config=config)\n",
|
||
"\n",
|
||
"# Define BYOC collection\n",
|
||
"collection_id = 'c691479f-358c-46b1-b0f0-e12b70a9856c'\n",
|
||
"byoc = DataCollection.define_byoc(\n",
|
||
" collection_id,\n",
|
||
" name='planet_data2',\n",
|
||
" is_timeless=True\n",
|
||
")\n",
|
||
"\n",
|
||
"print(\"✓ SentinelHub configured\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b43e776d",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 3. Define Project and Paths"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "595021b5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"project = 'aura'\n",
|
||
"resolution = 3 # 3m resolution for Planet\n",
|
||
"\n",
|
||
"# Define paths\n",
|
||
"BASE_PATH = Path('../laravel_app/storage/app') / project\n",
|
||
"BASE_PATH_SINGLE_IMAGES = BASE_PATH / 'cloud_test_single_images'\n",
|
||
"folder_for_merged_tifs = BASE_PATH / 'cloud_test_merged_tif'\n",
|
||
"folder_for_virtual_raster = BASE_PATH / 'cloud_test_merged_virtual'\n",
|
||
"geojson_file = BASE_PATH / 'Data' / 'pivot.geojson'\n",
|
||
"\n",
|
||
"# Create folders if they don't exist\n",
|
||
"for folder in [BASE_PATH_SINGLE_IMAGES, folder_for_merged_tifs, folder_for_virtual_raster]:\n",
|
||
" folder.mkdir(parents=True, exist_ok=True)\n",
|
||
"\n",
|
||
"print(f\"Project: {project}\")\n",
|
||
"print(f\"Base path: {BASE_PATH}\")\n",
|
||
"print(f\"GeoJSON: {geojson_file}\")\n",
|
||
"print(f\"✓ Folders created/verified\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ca46160a",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 4. Define Time Period (Last 3 Weeks)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1e6d4013",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Calculate last 3 weeks (21 days)\n",
|
||
"end_date = datetime.date.today()\n",
|
||
"start_date = end_date - datetime.timedelta(days=21)\n",
|
||
"\n",
|
||
"# Generate daily slots\n",
|
||
"days_needed = 21\n",
|
||
"slots = [(start_date + datetime.timedelta(days=i)).strftime('%Y-%m-%d') for i in range(days_needed)]\n",
|
||
"\n",
|
||
"print(f\"Date range: {start_date} to {end_date}\")\n",
|
||
"print(f\"Total days: {len(slots)}\")\n",
|
||
"print(f\"\\nFirst 5 dates: {slots[:5]}\")\n",
|
||
"print(f\"Last 5 dates: {slots[-5:]}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "df16c395",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 5. Load Field Boundaries and Create BBox Grid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "cf88f697",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Load GeoJSON\n",
|
||
"geo_json = gpd.read_file(str(geojson_file))\n",
|
||
"print(f\"Loaded {len(geo_json)} field polygons\")\n",
|
||
"\n",
|
||
"# Create geometries\n",
|
||
"geometries = [Geometry(geometry, crs=CRS.WGS84) for geometry in geo_json.geometry]\n",
|
||
"shapely_geometries = [geometry.geometry for geometry in geometries]\n",
|
||
"\n",
|
||
"# Get total bounds\n",
|
||
"from shapely.geometry import box\n",
|
||
"total_bounds = geo_json.total_bounds # [minx, miny, maxx, maxy]\n",
|
||
"print(f\"\\nTotal bounds: {total_bounds}\")\n",
|
||
"\n",
|
||
"# Calculate approximate image size for single bbox\n",
|
||
"single_bbox_test = BBox(bbox=tuple(total_bounds), crs=CRS.WGS84)\n",
|
||
"single_size = bbox_to_dimensions(single_bbox_test, resolution=resolution)\n",
|
||
"print(f\"Single bbox would create image of: {single_size[0]} x {single_size[1]} pixels\")\n",
|
||
"\n",
|
||
"# SentinelHub limit is 2500x2500 pixels\n",
|
||
"if single_size[0] > 2500 or single_size[1] > 2500:\n",
|
||
" print(f\"⚠️ Image too large for single download (max 2500x2500)\")\n",
|
||
" print(f\" Using 2x2 grid to split into smaller tiles...\")\n",
|
||
" \n",
|
||
" # Use BBoxSplitter with 2x2 grid\n",
|
||
" bbox_splitter = BBoxSplitter(\n",
|
||
" shapely_geometries, CRS.WGS84, (2, 2), reduce_bbox_sizes=True\n",
|
||
" )\n",
|
||
" bbox_list = bbox_splitter.get_bbox_list()\n",
|
||
" print(f\" Split into {len(bbox_list)} tiles\")\n",
|
||
"else:\n",
|
||
" print(f\"✓ Single bbox works - using 1 tile per date\")\n",
|
||
" bbox_list = [single_bbox_test]\n",
|
||
"\n",
|
||
"# Verify tile sizes\n",
|
||
"print(f\"\\nVerifying tile sizes:\")\n",
|
||
"for i, bbox in enumerate(bbox_list, 1):\n",
|
||
" size = bbox_to_dimensions(bbox, resolution=resolution)\n",
|
||
" status = \"✓\" if size[0] <= 2500 and size[1] <= 2500 else \"✗\"\n",
|
||
" print(f\" Tile {i}: {size[0]} x {size[1]} pixels {status}\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f78964df",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 6. Check Image Availability"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "09c2fcc6",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 5.5. Visualize Download Grid (Optional)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1e1a7660",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Visualize the download grid to ensure good coverage\n",
|
||
"fig, ax = plt.subplots(1, 1, figsize=(12, 12))\n",
|
||
"\n",
|
||
"# Plot field boundaries\n",
|
||
"geo_json.boundary.plot(ax=ax, color='green', linewidth=2, label='Fields')\n",
|
||
"\n",
|
||
"# Plot bboxes\n",
|
||
"for i, bbox in enumerate(bbox_list):\n",
|
||
" bbox_geom = box(bbox[0], bbox[1], bbox[2], bbox[3])\n",
|
||
" x, y = bbox_geom.exterior.xy\n",
|
||
" ax.plot(x, y, 'r--', linewidth=1, alpha=0.7)\n",
|
||
" # Add bbox number\n",
|
||
" centroid = bbox_geom.centroid\n",
|
||
" ax.text(centroid.x, centroid.y, str(i+1), fontsize=10, ha='center', \n",
|
||
" bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.5))\n",
|
||
"\n",
|
||
"ax.set_xlabel('Longitude')\n",
|
||
"ax.set_ylabel('Latitude')\n",
|
||
"ax.set_title('Download Grid (Red) vs Field Boundaries (Green)', fontsize=14, fontweight='bold')\n",
|
||
"ax.legend()\n",
|
||
"ax.grid(True, alpha=0.3)\n",
|
||
"plt.tight_layout()\n",
|
||
"plt.show()\n",
|
||
"\n",
|
||
"print(f\"✓ Visualization complete - verify that red boxes cover green field boundaries\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "2fcded08",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def is_image_available(date):\n",
|
||
" \"\"\"Check if Planet images are available for a given date.\"\"\"\n",
|
||
" for bbox in bbox_list:\n",
|
||
" search_iterator = catalog.search(\n",
|
||
" collection=byoc,\n",
|
||
" bbox=bbox,\n",
|
||
" time=(date, date)\n",
|
||
" )\n",
|
||
" if len(list(search_iterator)) > 0:\n",
|
||
" return True\n",
|
||
" return False\n",
|
||
"\n",
|
||
"# Filter to available dates only\n",
|
||
"print(\"Checking image availability...\")\n",
|
||
"available_slots = [slot for slot in slots if is_image_available(slot)]\n",
|
||
"\n",
|
||
"print(f\"\\n{'='*60}\")\n",
|
||
"print(f\"Total requested dates: {len(slots)}\")\n",
|
||
"print(f\"Available dates: {len(available_slots)}\")\n",
|
||
"print(f\"Excluded (no data): {len(slots) - len(available_slots)}\")\n",
|
||
"print(f\"{'='*60}\")\n",
|
||
"print(f\"\\nAvailable dates:\")\n",
|
||
"for slot in available_slots:\n",
|
||
" print(f\" - {slot}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b67f5deb",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 7. Define Download Functions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "26cd367f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Evalscript to get RGB + NIR + UDM1 mask\n",
|
||
"# NOTE: Not specifying sampleType makes SentinelHub auto-convert 0-1 float to 0-255 byte\n",
|
||
"# This matches the production script behavior\n",
|
||
"evalscript_with_udm = \"\"\"\n",
|
||
" //VERSION=3\n",
|
||
"\n",
|
||
" function setup() {\n",
|
||
" return {\n",
|
||
" input: [{\n",
|
||
" bands: [\"red\", \"green\", \"blue\", \"nir\", \"udm1\"]\n",
|
||
" }],\n",
|
||
" output: {\n",
|
||
" bands: 5\n",
|
||
" // sampleType: \"FLOAT32\" - commented out to get 0-255 byte output like production\n",
|
||
" }\n",
|
||
" };\n",
|
||
" }\n",
|
||
"\n",
|
||
" function evaluatePixel(sample) {\n",
|
||
" // Return all bands including udm1 (last band)\n",
|
||
" return [\n",
|
||
" 2.5 * sample.red / 10000,\n",
|
||
" 2.5 * sample.green / 10000,\n",
|
||
" 2.5 * sample.blue / 10000,\n",
|
||
" 2.5 * sample.nir / 10000,\n",
|
||
" sample.udm1 // 0 = usable, 1 = unusable (clouds, shadows, etc.)\n",
|
||
" ];\n",
|
||
" }\n",
|
||
"\"\"\"\n",
|
||
"\n",
|
||
"def get_download_request(time_interval, bbox, size):\n",
|
||
" \"\"\"Create a SentinelHub request for a given date and bbox.\"\"\"\n",
|
||
" return SentinelHubRequest(\n",
|
||
" evalscript=evalscript_with_udm,\n",
|
||
" input_data=[\n",
|
||
" SentinelHubRequest.input_data(\n",
|
||
" data_collection=DataCollection.planet_data2,\n",
|
||
" time_interval=(time_interval, time_interval)\n",
|
||
" )\n",
|
||
" ],\n",
|
||
" responses=[\n",
|
||
" SentinelHubRequest.output_response('default', MimeType.TIFF)\n",
|
||
" ],\n",
|
||
" bbox=bbox,\n",
|
||
" size=size,\n",
|
||
" config=config,\n",
|
||
" data_folder=str(BASE_PATH_SINGLE_IMAGES / time_interval),\n",
|
||
" )\n",
|
||
"\n",
|
||
"def download_for_date_and_bbox(slot, bbox, size):\n",
|
||
" \"\"\"Download image for a specific date and bounding box.\"\"\"\n",
|
||
" list_of_requests = [get_download_request(slot, bbox, size)]\n",
|
||
" list_of_requests = [request.download_list[0] for request in list_of_requests]\n",
|
||
" \n",
|
||
" data = SentinelHubDownloadClient(config=config).download(list_of_requests, max_threads=5)\n",
|
||
" time.sleep(0.1)\n",
|
||
" return data\n",
|
||
"\n",
|
||
"def merge_tiles_for_date(slot):\n",
|
||
" \"\"\"Merge all tiles for a given date into one GeoTIFF.\"\"\"\n",
|
||
" # List downloaded tiles\n",
|
||
" file_list = [str(x / \"response.tiff\") for x in Path(BASE_PATH_SINGLE_IMAGES / slot).iterdir() if x.is_dir()]\n",
|
||
" \n",
|
||
" if not file_list:\n",
|
||
" print(f\" No tiles found for {slot}\")\n",
|
||
" return None\n",
|
||
" \n",
|
||
" vrt_path = str(folder_for_virtual_raster / f\"merged_{slot}.vrt\")\n",
|
||
" output_path = str(folder_for_merged_tifs / f\"{slot}.tif\")\n",
|
||
" \n",
|
||
" # Create virtual raster with proper options\n",
|
||
" vrt_options = gdal.BuildVRTOptions(\n",
|
||
" resolution='highest',\n",
|
||
" separate=False,\n",
|
||
" addAlpha=False\n",
|
||
" )\n",
|
||
" vrt = gdal.BuildVRT(vrt_path, file_list, options=vrt_options)\n",
|
||
" vrt = None # Close\n",
|
||
" \n",
|
||
" # Convert to GeoTIFF with proper options\n",
|
||
" # Use COMPRESS=LZW to save space, TILED for better performance\n",
|
||
" translate_options = gdal.TranslateOptions(\n",
|
||
" creationOptions=['COMPRESS=LZW', 'TILED=YES', 'BIGTIFF=IF_SAFER']\n",
|
||
" )\n",
|
||
" gdal.Translate(output_path, vrt_path, options=translate_options)\n",
|
||
" \n",
|
||
" return output_path\n",
|
||
"\n",
|
||
"print(\"✓ Download functions defined\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e9f17ba8",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 8. Download Images"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e66173ea",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(f\"Starting download for {len(available_slots)} dates...\\n\")\n",
|
||
"\n",
|
||
"for i, slot in enumerate(available_slots, 1):\n",
|
||
" print(f\"[{i}/{len(available_slots)}] Downloading {slot}...\")\n",
|
||
" \n",
|
||
" for j, bbox in enumerate(bbox_list, 1):\n",
|
||
" bbox_obj = BBox(bbox=bbox, crs=CRS.WGS84)\n",
|
||
" size = bbox_to_dimensions(bbox_obj, resolution=resolution)\n",
|
||
" \n",
|
||
" try:\n",
|
||
" download_for_date_and_bbox(slot, bbox_obj, size)\n",
|
||
" print(f\" ✓ Tile {j}/{len(bbox_list)} downloaded\")\n",
|
||
" except Exception as e:\n",
|
||
" print(f\" ✗ Tile {j}/{len(bbox_list)} failed: {e}\")\n",
|
||
" \n",
|
||
" time.sleep(0.2)\n",
|
||
" \n",
|
||
" print()\n",
|
||
"\n",
|
||
"print(\"\\n✓ All downloads complete!\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e4bec74c",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 9. Merge Tiles into Single Images"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e9b270be",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(\"Merging tiles for each date...\\n\")\n",
|
||
"\n",
|
||
"merged_files = {}\n",
|
||
"for slot in available_slots:\n",
|
||
" print(f\"Merging {slot}...\")\n",
|
||
" output_path = merge_tiles_for_date(slot)\n",
|
||
" if output_path:\n",
|
||
" merged_files[slot] = output_path\n",
|
||
" print(f\" ✓ Saved to: {output_path}\")\n",
|
||
" else:\n",
|
||
" print(f\" ✗ Failed to merge\")\n",
|
||
"\n",
|
||
"print(f\"\\n✓ Successfully merged {len(merged_files)} images\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ec3f1a6d",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 10. Analyze Cloud Coverage Using UDM1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "9f4047e5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def analyze_cloud_coverage(tif_path):\n",
|
||
" \"\"\"Calculate cloud coverage percentage using UDM1 band (band 5).\"\"\"\n",
|
||
" ds = gdal.Open(tif_path)\n",
|
||
" if ds is None:\n",
|
||
" return None, None\n",
|
||
" \n",
|
||
" # Band 5 is UDM1 (0 = clear, 1 = cloudy/unusable)\n",
|
||
" udm_band = ds.GetRasterBand(5).ReadAsArray()\n",
|
||
" \n",
|
||
" total_pixels = udm_band.size\n",
|
||
" cloudy_pixels = np.sum(udm_band == 1)\n",
|
||
" cloud_percentage = (cloudy_pixels / total_pixels) * 100\n",
|
||
" \n",
|
||
" ds = None\n",
|
||
" return cloud_percentage, udm_band\n",
|
||
"\n",
|
||
"# Analyze all images\n",
|
||
"cloud_stats = {}\n",
|
||
"print(\"Analyzing cloud coverage...\\n\")\n",
|
||
"print(f\"{'Date':<12} {'Cloud %':<10} {'Status'}\")\n",
|
||
"print(\"-\" * 40)\n",
|
||
"\n",
|
||
"for date, path in sorted(merged_files.items()):\n",
|
||
" cloud_pct, _ = analyze_cloud_coverage(path)\n",
|
||
" if cloud_pct is not None:\n",
|
||
" cloud_stats[date] = cloud_pct\n",
|
||
" \n",
|
||
" # Categorize\n",
|
||
" if cloud_pct < 5:\n",
|
||
" status = \"☀️ Clear\"\n",
|
||
" elif cloud_pct < 20:\n",
|
||
" status = \"🌤️ Mostly clear\"\n",
|
||
" elif cloud_pct < 50:\n",
|
||
" status = \"⛅ Partly cloudy\"\n",
|
||
" else:\n",
|
||
" status = \"☁️ Very cloudy\"\n",
|
||
" \n",
|
||
" print(f\"{date:<12} {cloud_pct:>6.2f}% {status}\")\n",
|
||
"\n",
|
||
"print(f\"\\n✓ Analysis complete for {len(cloud_stats)} images\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "3d966858",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 11. Visualize Images with Cloud Coverage"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f8b2b2fc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def create_quicklook(tif_path, date, cloud_pct):\n",
|
||
" \"\"\"Create RGB quicklook with UDM1 overlay.\"\"\"\n",
|
||
" ds = gdal.Open(tif_path)\n",
|
||
" if ds is None:\n",
|
||
" return None\n",
|
||
" \n",
|
||
" # Read RGB bands (1=R, 2=G, 3=B)\n",
|
||
" red = ds.GetRasterBand(1).ReadAsArray()\n",
|
||
" green = ds.GetRasterBand(2).ReadAsArray()\n",
|
||
" blue = ds.GetRasterBand(3).ReadAsArray()\n",
|
||
" udm = ds.GetRasterBand(5).ReadAsArray()\n",
|
||
" \n",
|
||
" # Clip to 0-1 range\n",
|
||
" rgb = np.dstack([np.clip(red, 0, 1), np.clip(green, 0, 1), np.clip(blue, 0, 1)])\n",
|
||
" \n",
|
||
" # Create figure\n",
|
||
" fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n",
|
||
" \n",
|
||
" # RGB image\n",
|
||
" axes[0].imshow(rgb)\n",
|
||
" axes[0].set_title(f\"RGB - {date}\", fontsize=14, fontweight='bold')\n",
|
||
" axes[0].axis('off')\n",
|
||
" \n",
|
||
" # UDM1 mask (clouds in red)\n",
|
||
" cloud_overlay = rgb.copy()\n",
|
||
" cloud_overlay[udm == 1] = [1, 0, 0] # Red for clouds\n",
|
||
" axes[1].imshow(cloud_overlay)\n",
|
||
" axes[1].set_title(f\"Cloud Mask (UDM1) - {cloud_pct:.1f}% cloudy\", fontsize=14, fontweight='bold')\n",
|
||
" axes[1].axis('off')\n",
|
||
" \n",
|
||
" plt.tight_layout()\n",
|
||
" ds = None\n",
|
||
" return fig\n",
|
||
"\n",
|
||
"# Display images sorted by cloud coverage (most cloudy first)\n",
|
||
"sorted_by_clouds = sorted(cloud_stats.items(), key=lambda x: x[1], reverse=True)\n",
|
||
"\n",
|
||
"print(\"Generating visualizations...\\n\")\n",
|
||
"for date, cloud_pct in sorted_by_clouds[:5]: # Show top 5 cloudiest\n",
|
||
" if date in merged_files:\n",
|
||
" fig = create_quicklook(merged_files[date], date, cloud_pct)\n",
|
||
" if fig:\n",
|
||
" plt.show()\n",
|
||
" plt.close()\n",
|
||
"\n",
|
||
"print(\"✓ Visualizations complete\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "94de1b4b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 12. Select Candidate Images for OmniCloudMask Testing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "4ae8c727",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Select images with moderate to high cloud coverage (20-70%)\n",
|
||
"# These are good candidates for testing cloud detection\n",
|
||
"test_candidates = [\n",
|
||
" (date, cloud_pct, merged_files[date]) \n",
|
||
" for date, cloud_pct in cloud_stats.items() \n",
|
||
" if 20 <= cloud_pct <= 70\n",
|
||
"]\n",
|
||
"\n",
|
||
"test_candidates.sort(key=lambda x: x[1], reverse=True)\n",
|
||
"\n",
|
||
"print(\"\\n\" + \"=\"*60)\n",
|
||
"print(\"RECOMMENDED IMAGES FOR OMNICLOUDMASK TESTING\")\n",
|
||
"print(\"=\"*60)\n",
|
||
"print(f\"\\n{'Rank':<6} {'Date':<12} {'Cloud %':<10} {'Path'}\")\n",
|
||
"print(\"-\" * 80)\n",
|
||
"\n",
|
||
"for i, (date, cloud_pct, path) in enumerate(test_candidates[:5], 1):\n",
|
||
" print(f\"{i:<6} {date:<12} {cloud_pct:>6.2f}% {path}\")\n",
|
||
"\n",
|
||
"if test_candidates:\n",
|
||
" print(f\"\\n✓ Top candidate: {test_candidates[0][0]} ({test_candidates[0][1]:.1f}% cloudy)\")\n",
|
||
" print(f\" Path: {test_candidates[0][2]}\")\n",
|
||
" print(\"\\n👉 Use this image in Step 2 (cloud_detection_step2_test_omnicloudmask.ipynb)\")\n",
|
||
"else:\n",
|
||
" print(\"\\n⚠️ No suitable cloudy images found in this period.\")\n",
|
||
" print(\" Try extending the date range or select any available image.\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ea103951",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 13. Export Summary"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b5c78310",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Save summary to JSON for Step 2\n",
|
||
"summary = {\n",
|
||
" \"project\": project,\n",
|
||
" \"date_range\": f\"{start_date} to {end_date}\",\n",
|
||
" \"total_dates\": len(slots),\n",
|
||
" \"available_dates\": len(available_slots),\n",
|
||
" \"cloud_statistics\": cloud_stats,\n",
|
||
" \"test_candidates\": [\n",
|
||
" {\"date\": date, \"cloud_percentage\": cloud_pct, \"path\": path}\n",
|
||
" for date, cloud_pct, path in test_candidates[:5]\n",
|
||
" ],\n",
|
||
" \"merged_files\": merged_files\n",
|
||
"}\n",
|
||
"\n",
|
||
"summary_path = BASE_PATH / 'cloud_detection_summary.json'\n",
|
||
"with open(summary_path, 'w') as f:\n",
|
||
" json.dump(summary, f, indent=2)\n",
|
||
"\n",
|
||
"print(f\"✓ Summary saved to: {summary_path}\")\n",
|
||
"print(\"\\n\" + \"=\"*60)\n",
|
||
"print(\"NEXT STEP: Open cloud_detection_step2_test_omnicloudmask.ipynb\")\n",
|
||
"print(\"=\"*60)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f6f6d142",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 14. Cleanup (Optional)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "88a775f8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Uncomment to delete intermediate files (single tiles and virtual rasters)\n",
|
||
"# Keep merged GeoTIFFs for Step 2\n",
|
||
"\n",
|
||
"cleanup = False # Set to True to enable cleanup\n",
|
||
"\n",
|
||
"if cleanup:\n",
|
||
" folders_to_clean = [BASE_PATH_SINGLE_IMAGES, folder_for_virtual_raster]\n",
|
||
" \n",
|
||
" for folder in folders_to_clean:\n",
|
||
" if folder.exists():\n",
|
||
" shutil.rmtree(folder)\n",
|
||
" folder.mkdir()\n",
|
||
" print(f\"✓ Cleaned: {folder}\")\n",
|
||
" \n",
|
||
" print(\"\\n✓ Cleanup complete - merged GeoTIFFs preserved\")\n",
|
||
"else:\n",
|
||
" print(\"Cleanup disabled. Set cleanup=True to remove intermediate files.\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.3"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|