From 073f1567d96fae5f998780805bb63c87a17ee234 Mon Sep 17 00:00:00 2001
From: Timon <timon@resiliencebv.com>
Date: Mon, 16 Mar 2026 16:16:16 +0100
Subject: [PATCH] wip

---
 python_app/weather_api_comparison.py | 648 ++++++++++++++++++++++-----
 1 file changed, 527 insertions(+), 121 deletions(-)
diff --git a/python_app/weather_api_comparison.py b/python_app/weather_api_comparison.py
index d8cb238..821de8e 100644
--- a/python_app/weather_api_comparison.py
+++ b/python_app/weather_api_comparison.py
@@ -12,34 +12,42 @@ ARCHIVE providers (no API key required):
   4. NASA POWER               — completely independent source (0.50°, global)
 
 FORECAST providers (no API key required):
-  5. Open-Meteo Forecast      — ensemble of NWP models      (global)
-  6. YR.no LocationForecast   — Norwegian Met Institute     (~10 days, global)
+  5. Open-Meteo Forecast      — deterministic NWP (global)
+  6. Open-Meteo Ensemble      — ECMWF IFS 51-member ensemble; gives probability bands
+  7. YR.no LocationForecast   — Norwegian Met Institute (~10 days, global)
 
 FORECAST providers (API key required — set in CONFIG below, leave "" to skip):
-  7. OpenWeatherMap            — free tier, 1000 calls/day
-  8. WeatherAPI.com            — free tier
+  8. OpenWeatherMap            — free tier, 1000 calls/day
+  9. WeatherAPI.com            — free tier
 
 OUTPUT:
   Plots saved to: weather_comparison_plots/
-  Summary stats printed to console.
+    archive_<loc>.png              — daily lines + multi-source spread band + agreement signal
+    archive_rolling_<loc>.png      — 30-day rolling mean comparison (original style)
+    cumulative_<loc>.png           — cumulative annual precipitation
+    vs_era5_<loc>.png              — each provider vs ERA5 scatter (note: ERA5 is not ground truth)
+    pairwise_<loc>.png             — pairwise Pearson r and RMSE heatmaps (unbiased)
+    wetdry_agreement_<loc>.png     — % of days providers agree on wet vs dry
+    forecast_ensemble_<loc>.png    — ensemble uncertainty bands + exceedance probability
+    forecast_<loc>.png             — deterministic forecast bars (original style)
 
 Usage:
   python weather_api_comparison.py
 """
 
-import os
-import json
-import time
 import datetime
-import requests
-import pandas as pd
+import time
+
 import matplotlib
 matplotlib.use("Agg")
-import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
+import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
+import requests
 from pathlib import Path
 
+
 # ============================================================
 # CONFIG
 # ============================================================
@@ -74,7 +82,6 @@ def fetch_openmeteo_archive(lat, lon, start, end, model="era5"):
     """Open-Meteo ERA5 / ERA5-Land / CERRA archive.
     ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names.
     """
-    # ERA5 is the default model — adding models param with wrong casing causes 400
     model_suffix = "" if model == "era5" else f"&models={model}"
     url = (
         f"https://archive-api.open-meteo.com/v1/archive"
@@ -128,7 +135,6 @@ def fetch_nasa_power(lat, lon, start, end):
 
 def fetch_openmeteo_forecast(lat, lon, days=8):
     """Open-Meteo NWP forecast — default best_match model."""
-    end = datetime.date.today() + datetime.timedelta(days=days)
     url = (
         f"https://api.open-meteo.com/v1/forecast"
         f"?latitude={lat}&longitude={lon}"
@@ -147,8 +153,57 @@ def fetch_openmeteo_forecast(lat, lon, days=8):
     return df
 
 
+def fetch_openmeteo_ensemble_forecast(lat, lon, days=8):
+    """Open-Meteo ECMWF IFS ensemble forecast — 51 members at 0.4° resolution.
+
+    Returns a DataFrame with daily percentile bands and exceedance probabilities:
+      p10, p25, p50 (median), p75, p90  — mm/day
+      prob_gt_1mm, prob_gt_5mm          — % of members exceeding threshold
+      n_members                          — number of members in response
+    """
+    url = (
+        f"https://ensemble-api.open-meteo.com/v1/ensemble"
+        f"?latitude={lat}&longitude={lon}"
+        f"&daily=precipitation_sum"
+        f"&models=ecmwf_ifs04"
+        f"&forecast_days={days}"
+        f"&timezone=UTC"
+    )
+    r = requests.get(url, timeout=60)
+    r.raise_for_status()
+    body = r.json()
+    daily = body["daily"]
+    dates = pd.to_datetime(daily["time"])
+
+    # Member columns are named like "precipitation_sum_member01", etc.
+    member_keys = [k for k in daily.keys() if k.startswith("precipitation_sum")]
+    if not member_keys:
+        print("    ⚠ No member columns found in ensemble response")
+        return None
+
+    # Shape: (n_members, n_days)
+    members = np.array([daily[k] for k in member_keys], dtype=float)
+    members = np.where(members < 0, 0, members)  # clip negatives
+
+    df = pd.DataFrame({
+        "date":        dates,
+        "p10":         np.nanpercentile(members, 10, axis=0),
+        "p25":         np.nanpercentile(members, 25, axis=0),
+        "p50":         np.nanpercentile(members, 50, axis=0),
+        "p75":         np.nanpercentile(members, 75, axis=0),
+        "p90":         np.nanpercentile(members, 90, axis=0),
+        "mean":        np.nanmean(members, axis=0),
+        "prob_gt_1mm": np.mean(members > 1.0, axis=0) * 100,
+        "prob_gt_5mm": np.mean(members > 5.0, axis=0) * 100,
+        "n_members":   members.shape[0],
+    })
+    return df
+
+
 def fetch_yr_forecast(lat, lon):
-    """YR.no LocationForecast 2.0 — hourly precip aggregated to daily."""
+    """YR.no LocationForecast 2.0 — hourly precip aggregated to daily.
+    Note: forecast-only service; no historical archive is available.
+    """
     url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}"
     headers = {"User-Agent": USER_AGENT}
     r = requests.get(url, headers=headers, timeout=30)
@@ -234,88 +289,209 @@ def compare_stats(df, ref_col, other_col):
             "Bias": round(bias,2), "r": round(r,3)}
 
 
+def compute_pairwise_stats(data_dict):
+    """Compute pairwise Pearson r and RMSE for all archive provider pairs.
+
+    Returns:
+        names       — list of provider names (same order as matrices)
+        r_matrix    — (n x n) Pearson r array, NaN where not computable
+        rmse_matrix — (n x n) RMSE array (mm/day), 0 on diagonal
+    """
+    providers = [(name, df) for name, df in data_dict.items()
+                 if df is not None and len(df) > 5]
+    names = [p[0] for p in providers]
+    n = len(names)
+
+    r_matrix    = np.full((n, n), np.nan)
+    rmse_matrix = np.full((n, n), np.nan)
+
+    for i in range(n):
+        r_matrix[i, i]    = 1.0
+        rmse_matrix[i, i] = 0.0
+        _, df_i = providers[i]
+        for j in range(i + 1, n):
+            _, df_j = providers[j]
+            merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j"))
+            valid = merged[["rain_mm_i", "rain_mm_j"]].dropna()
+            if len(valid) < 5:
+                continue
+            r    = valid["rain_mm_i"].corr(valid["rain_mm_j"])
+            rmse = ((valid["rain_mm_i"] - valid["rain_mm_j"]) ** 2).mean() ** 0.5
+            r_matrix[i, j]    = r_matrix[j, i]    = round(r, 3)
+            rmse_matrix[i, j] = rmse_matrix[j, i] = round(rmse, 2)
+
+    return names, r_matrix, rmse_matrix
+
+
+def wetdry_agreement(data_dict, threshold=1.0):
+    """For each provider pair, compute % of days both-dry / both-wet / disagree.
+
+    A day is 'dry' if rain_mm < threshold (default 1 mm).
+    Returns a list of dicts: pair, both_dry, both_wet, disagree, n.
+    """
+    providers = [(name, df) for name, df in data_dict.items()
+                 if df is not None and len(df) > 5]
+    results = []
+    for i, (name_i, df_i) in enumerate(providers):
+        for j in range(i + 1, len(providers)):
+            name_j, df_j = providers[j]
+            merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j"))
+            valid = merged[["rain_mm_i", "rain_mm_j"]].dropna()
+            if len(valid) < 5:
+                continue
+            dry_i = valid["rain_mm_i"] < threshold
+            dry_j = valid["rain_mm_j"] < threshold
+            both_dry = (dry_i  &  dry_j).mean() * 100
+            both_wet = (~dry_i & ~dry_j).mean() * 100
+            disagree = 100 - both_dry - both_wet
+            results.append({
+                "pair":      f"{name_i}\nvs\n{name_j}",
+                "both_dry":  round(both_dry, 1),
+                "both_wet":  round(both_wet, 1),
+                "disagree":  round(disagree, 1),
+                "n":         len(valid),
+            })
+    return results
+
+
 # ============================================================
-# PLOTTING
+# PLOTTING — ARCHIVE
 # ============================================================
 
-def plot_archive(data_dict, location_name, start, end, output_dir):
-    """Line plot of all archive providers for one location."""
-    fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)
+ARCHIVE_COLORS = {
+    "ERA5 (Open-Meteo)":      "#1f77b4",
+    "ERA5-Land (Open-Meteo)": "#ff7f0e",
+    "CERRA (Open-Meteo)":     "#2ca02c",
+    "NASA POWER":             "#d62728",
+}
 
-    colors = {
-        "ERA5 (Open-Meteo)":      "#1f77b4",
-        "ERA5-Land (Open-Meteo)": "#ff7f0e",
-        "CERRA (Open-Meteo)":     "#2ca02c",
-        "NASA POWER":             "#d62728",
-    }
 
-    # Top: daily raw
+def _build_spread_frame(data_dict):
+    """Merge all valid archive providers onto a common date axis.
+    Returns a DataFrame with one column per provider + _mean and _std columns.
+    """
+    valid = {name: df for name, df in data_dict.items()
+             if df is not None and len(df) > 0}
+    if not valid:
+        return None, []
+
+    merged = None
+    for name, df in valid.items():
+        tmp = df[["date", "rain_mm"]].rename(columns={"rain_mm": name})
+        merged = tmp if merged is None else merged.merge(tmp, on="date", how="outer")
+
+    cols = list(valid.keys())
+    merged[cols]   = merged[cols].clip(lower=0)
+    merged["_mean"] = merged[cols].mean(axis=1)
+    merged["_std"]  = merged[cols].std(axis=1)
+    return merged.sort_values("date").reset_index(drop=True), cols
+
+
+def plot_archive_with_spread(data_dict, location_name, start, end, output_dir):
+    """Three-panel archive plot:
+      Top:    Individual provider lines + multi-source mean±std shading
+      Middle: 30-day rolling mean
+      Bottom: Inter-source std (agreement signal — used to flag uncertain periods)
+    """
+    spread, _ = _build_spread_frame(data_dict)
+    if spread is None:
+        return
+
+    valid_providers = {name: df for name, df in data_dict.items()
+                       if df is not None and len(df) > 0}
+
+    fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)
+
+    # --- Top: daily raw + spread band ---
     ax1 = axes[0]
-    for name, df in data_dict.items():
-        if df is not None and len(df) > 0:
-            ax1.plot(df["date"], df["rain_mm"], label=name,
-                     color=colors.get(name), linewidth=0.8, alpha=0.85)
+    ax1.fill_between(
+        spread["date"],
+        (spread["_mean"] - spread["_std"]).clip(lower=0),
+        spread["_mean"] + spread["_std"],
+        alpha=0.15, color="gray", label="Multi-source ±1 std"
+    )
+    ax1.plot(spread["date"], spread["_mean"], color="black", linewidth=1.0,
+             linestyle="--", alpha=0.6, label="Multi-source mean", zorder=5)
+    for name, df in valid_providers.items():
+        ax1.plot(df["date"], df["rain_mm"],
+                 label=name, color=ARCHIVE_COLORS.get(name, "gray"),
+                 linewidth=0.7, alpha=0.85)
     ax1.set_ylabel("Precipitation (mm/day)")
-    ax1.set_title(f"{location_name} — Daily Precipitation (archive {start} → {end})")
-    ax1.legend(fontsize=8)
+    ax1.set_title(
+        f"{location_name} — Daily Precipitation with Multi-Source Spread\n"
+        f"{start} → {end}  |  Grey band = ±1 std across all providers"
+    )
+    ax1.legend(fontsize=8, ncol=2)
     ax1.grid(True, alpha=0.3)
 
-    # Bottom: 30-day rolling mean
+    # --- Middle: 30-day rolling mean ---
     ax2 = axes[1]
-    for name, df in data_dict.items():
-        if df is not None and len(df) > 0:
-            rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean()
-            ax2.plot(rolled.index, rolled.values, label=name,
-                     color=colors.get(name), linewidth=1.5)
+    roll_mean = spread.set_index("date")["_mean"].rolling(30, min_periods=15).mean()
+    roll_std  = spread.set_index("date")["_std"].rolling(30, min_periods=15).mean()
+    ax2.fill_between(
+        roll_mean.index,
+        (roll_mean - roll_std).clip(lower=0),
+        roll_mean + roll_std,
+        alpha=0.15, color="gray"
+    )
+    for name, df in valid_providers.items():
+        rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean()
+        ax2.plot(rolled.index, rolled.values,
+                 label=name, color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.5)
     ax2.set_ylabel("30-day rolling mean (mm/day)")
-    ax2.set_xlabel("Date")
     ax2.legend(fontsize=8)
     ax2.grid(True, alpha=0.3)
-    ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
-    fig.autofmt_xdate()
 
+    # --- Bottom: inter-source std (agreement signal) ---
+    ax3 = axes[2]
+    ax3.fill_between(
+        spread["date"], 0, spread["_std"],
+        color="purple", alpha=0.35,
+        label="Std across providers (higher = less agreement)"
+    )
+    median_std = spread["_std"].median()
+    ax3.axhline(median_std, color="purple", linestyle=":", linewidth=1,
+                label=f"Median std = {median_std:.2f} mm/day")
+    ax3.set_ylabel("Std dev across\nproviders (mm)")
+    ax3.set_xlabel("Date")
+    ax3.legend(fontsize=8)
+    ax3.grid(True, alpha=0.3)
+    ax3.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
+
+    fig.autofmt_xdate()
     plt.tight_layout()
     path = output_dir / f"archive_{location_name}.png"
-    plt.savefig(path, dpi=150)
+    plt.savefig(path, dpi=150, bbox_inches="tight")
     plt.close()
     print(f"  Saved: {path}")
 
 
-def plot_forecast(data_dict, location_name, output_dir):
-    """Bar chart comparing 7-day forecasts across providers."""
-    fig, ax = plt.subplots(figsize=(12, 5))
+def plot_cumulative(data_dict, location_name, output_dir):
+    """Cumulative annual precipitation — most relevant for crop/irrigation context."""
+    fig, ax = plt.subplots(figsize=(14, 5))
 
-    providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0]
-    n = len(providers)
-    if n == 0:
-        plt.close()
-        return
+    for name, df in data_dict.items():
+        if df is None or len(df) == 0:
+            continue
+        s = df.set_index("date")["rain_mm"].sort_index().cumsum()
+        total = s.iloc[-1]
+        ax.plot(s.index, s.values,
+                label=f"{name}  (total: {total:.0f} mm)",
+                color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.8)
 
-    # Collect all forecast dates
-    all_dates = sorted(set(
-        d for _, df in providers
-        for d in df["date"].dt.date.tolist()
-    ))
-    x = np.arange(len(all_dates))
-    width = 0.8 / n
-
-    cmap = matplotlib.colormaps["tab10"].resampled(n)
-    for i, (name, df) in enumerate(providers):
-        vals = []
-        date_map = dict(zip(df["date"].dt.date, df["rain_mm"]))
-        for d in all_dates:
-            vals.append(date_map.get(d, 0.0))
-        ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85)
-
-    ax.set_xticks(x + width * (n - 1) / 2)
-    ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right")
-    ax.set_ylabel("Precipitation (mm/day)")
-    ax.set_title(f"{location_name} — 7-Day Forecast Comparison")
+    ax.set_ylabel("Cumulative precipitation (mm)")
+    ax.set_xlabel("Date")
+    ax.set_title(
+        f"{location_name} — Cumulative Annual Precipitation by Provider\n"
+        "Divergence = sources disagree on total seasonal rainfall"
+    )
     ax.legend(fontsize=9)
-    ax.grid(True, axis="y", alpha=0.3)
+    ax.grid(True, alpha=0.3)
+    ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
+    fig.autofmt_xdate()
     plt.tight_layout()
 
-    path = output_dir / f"forecast_{location_name}.png"
+    path = output_dir / f"cumulative_{location_name}.png"
     plt.savefig(path, dpi=150)
     plt.close()
     print(f"  Saved: {path}")
@@ -324,6 +500,10 @@ def plot_forecast(data_dict, location_name, output_dir):
 def plot_vs_era5(data_dict, location_name, output_dir):
     """Each provider vs ERA5 reference: scatter + regression line.
 
+    NOTE: ERA5 is used as reference here for visual comparison only.
+    It is a reanalysis product, not a ground-truth measurement.
+    Use the pairwise heatmap (plot_pairwise_heatmap) for an unbiased comparison.
+
     How to read:
       - Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip.
       - Points on the red diagonal = perfect agreement.
@@ -344,26 +524,18 @@ def plot_vs_era5(data_dict, location_name, output_dir):
     n = len(others)
     fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False)
 
-    colors = {
-        "ERA5-Land (Open-Meteo)": "#ff7f0e",
-        "CERRA (Open-Meteo)":     "#2ca02c",
-        "NASA POWER":             "#d62728",
-    }
-
     for i, (name, df) in enumerate(others):
         ax = axes[0][i]
         merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
         valid  = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna()
 
-        color = colors.get(name, "steelblue")
+        color = ARCHIVE_COLORS.get(name, "steelblue")
         ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"],
                    s=4, alpha=0.35, color=color)
 
-        # Perfect-agreement diagonal
         lim = max(valid.max().max(), 1) * 1.05
         ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement")
 
-        # Linear regression line
         if len(valid) > 5:
             coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1)
             x_fit  = np.linspace(0, lim, 100)
@@ -382,8 +554,8 @@ def plot_vs_era5(data_dict, location_name, output_dir):
         ax.grid(True, alpha=0.3)
 
     fig.suptitle(
-        f"{location_name} — Daily Precip vs ERA5 Reference\n"
-        "Red dashed = perfect agreement. Points above line = provider wetter than ERA5.",
+        f"{location_name} — Daily Precip vs ERA5 (reference only — ERA5 is not ground truth)\n"
+        "Red dashed = perfect agreement. See pairwise heatmap for unbiased comparison.",
         fontsize=10
     )
     plt.tight_layout()
@@ -393,38 +565,262 @@ def plot_vs_era5(data_dict, location_name, output_dir):
     print(f"  Saved: {path}")
 
 
-def plot_cumulative(data_dict, location_name, output_dir):
-    """Cumulative annual precipitation — most relevant for crop/irrigation context."""
-    fig, ax = plt.subplots(figsize=(14, 5))
+def plot_pairwise_heatmap(data_dict, location_name, output_dir):
+    """Two heatmaps side by side: pairwise Pearson r and pairwise RMSE.
 
-    colors = {
-        "ERA5 (Open-Meteo)":      "#1f77b4",
-        "ERA5-Land (Open-Meteo)": "#ff7f0e",
-        "CERRA (Open-Meteo)":     "#2ca02c",
-        "NASA POWER":             "#d62728",
-    }
+    No provider is treated as reference — all pairs compared equally.
+    High r + low RMSE = sources agree. Where they diverge, neither is ground truth.
+    """
+    names, r_matrix, rmse_matrix = compute_pairwise_stats(data_dict)
+    if len(names) < 2:
+        return
 
-    for name, df in data_dict.items():
-        if df is None or len(df) == 0:
-            continue
-        s = df.set_index("date")["rain_mm"].sort_index().cumsum()
-        total = s.iloc[-1]
-        ax.plot(s.index, s.values, label=f"{name}  (total: {total:.0f} mm)",
-                color=colors.get(name), linewidth=1.8)
+    n = len(names)
+    short = [nm.replace(" (Open-Meteo)", "\n(OM)") for nm in names]
 
-    ax.set_ylabel("Cumulative precipitation (mm)")
-    ax.set_xlabel("Date")
-    ax.set_title(
-        f"{location_name} — Cumulative Annual Precipitation by Provider\n"
-        "Divergence = sources disagree on total seasonal rainfall"
+    fig, axes = plt.subplots(1, 2, figsize=(max(10, n * 2.5), max(5, n * 1.5 + 1)))
+
+    # Pearson r
+    ax1 = axes[0]
+    im1 = ax1.imshow(r_matrix, vmin=0, vmax=1, cmap="YlGn", aspect="auto")
+    plt.colorbar(im1, ax=ax1, label="Pearson r")
+    ax1.set_xticks(range(n)); ax1.set_yticks(range(n))
+    ax1.set_xticklabels(short, fontsize=8, rotation=45, ha="right")
+    ax1.set_yticklabels(short, fontsize=8)
+    for i in range(n):
+        for j in range(n):
+            val = r_matrix[i, j]
+            if not np.isnan(val):
+                text_color = "white" if val > 0.8 else "black"
+                ax1.text(j, i, f"{val:.2f}", ha="center", va="center",
+                         fontsize=9, color=text_color)
+    ax1.set_title("Pairwise Pearson r\n(1.0 = perfect agreement, no reference)")
+
+    # RMSE
+    ax2 = axes[1]
+    off_diag = rmse_matrix[rmse_matrix > 0]
+    vmax_rmse = float(np.nanmax(off_diag)) if len(off_diag) > 0 else 10
+    im2 = ax2.imshow(rmse_matrix, vmin=0, vmax=vmax_rmse, cmap="YlOrRd_r", aspect="auto")
+    plt.colorbar(im2, ax=ax2, label="RMSE (mm/day)")
+    ax2.set_xticks(range(n)); ax2.set_yticks(range(n))
+    ax2.set_xticklabels(short, fontsize=8, rotation=45, ha="right")
+    ax2.set_yticklabels(short, fontsize=8)
+    for i in range(n):
+        for j in range(n):
+            val = rmse_matrix[i, j]
+            if not np.isnan(val):
+                ax2.text(j, i, f"{val:.1f}", ha="center", va="center", fontsize=9)
+    ax2.set_title("Pairwise RMSE (mm/day)\n(0 = perfect agreement)")
+
+    fig.suptitle(
+        f"{location_name} — Archive Provider Agreement (unbiased — no single reference)\n"
+        "Pairs with high r + low RMSE are consistent. Divergent pairs reveal dataset uncertainty.",
+        fontsize=10
     )
-    ax.legend(fontsize=9)
-    ax.grid(True, alpha=0.3)
-    ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
+    plt.tight_layout()
+    path = output_dir / f"pairwise_{location_name}.png"
+    plt.savefig(path, dpi=150, bbox_inches="tight")
+    plt.close()
+    print(f"  Saved: {path}")
+
+
+def plot_wetdry_agreement(data_dict, location_name, output_dir, threshold=1.0):
+    """Stacked bar chart: for each provider pair, % of days both-dry / both-wet / disagree.
+
+    High 'disagree' % means one source says rain, the other says dry on the same day.
+    This is the most practically relevant divergence for SmartCane crop monitoring.
+    """
+    results = wetdry_agreement(data_dict, threshold)
+    if not results:
+        return
+
+    pairs    = [r["pair"]     for r in results]
+    both_dry = [r["both_dry"] for r in results]
+    both_wet = [r["both_wet"] for r in results]
+    disagree = [r["disagree"] for r in results]
+
+    fig, ax = plt.subplots(figsize=(max(8, len(pairs) * 2.8), 6))
+    x = np.arange(len(pairs))
+
+    ax.bar(x, both_dry, label=f"Both dry (<{threshold} mm)", color="#a8d5e2", edgecolor="white")
+    ax.bar(x, both_wet, bottom=both_dry,
+           label=f"Both wet (≥{threshold} mm)", color="#3a7ebf", edgecolor="white")
+    bottom2 = [d + w for d, w in zip(both_dry, both_wet)]
+    ax.bar(x, disagree, bottom=bottom2,
+           label="Disagree (one wet, one dry)", color="#e07b54", edgecolor="white")
+
+    # Add % labels inside bars
+    for i, r in enumerate(results):
+        if r["both_dry"] > 4:
+            ax.text(i, r["both_dry"] / 2, f"{r['both_dry']:.0f}%",
+                    ha="center", va="center", fontsize=7, color="black")
+        if r["both_wet"] > 4:
+            ax.text(i, r["both_dry"] + r["both_wet"] / 2, f"{r['both_wet']:.0f}%",
+                    ha="center", va="center", fontsize=7, color="white")
+        if r["disagree"] > 4:
+            ax.text(i, bottom2[i] + r["disagree"] / 2, f"{r['disagree']:.0f}%",
+                    ha="center", va="center", fontsize=7, color="white")
+
+    ax.set_xticks(x)
+    ax.set_xticklabels(pairs, fontsize=8)
+    ax.set_ylim(0, 108)
+    ax.set_ylabel("% of days in archive period")
+    ax.set_title(
+        f"{location_name} — Provider Agreement on Wet vs Dry Days\n"
+        f"Threshold = {threshold} mm/day  |  Orange = providers disagree on whether it rained"
+    )
+    ax.legend(loc="upper right", fontsize=9)
+    ax.grid(True, axis="y", alpha=0.3)
+    plt.tight_layout()
+
+    path = output_dir / f"wetdry_agreement_{location_name}.png"
+    plt.savefig(path, dpi=150, bbox_inches="tight")
+    plt.close()
+    print(f"  Saved: {path}")
+
+
+# ============================================================
+# PLOTTING — FORECAST
+# ============================================================
+
+def plot_forecast_with_ensemble(forecast_data, ensemble_df, location_name, output_dir):
+    """Two-panel forecast comparison:
+      Top:    Deterministic bars (YR.no, Open-Meteo) + ECMWF ensemble percentile shading
+      Bottom: Exceedance probabilities P(rain > 1 mm) and P(rain > 5 mm)
+
+    Reading guide:
+      - Dark blue shading = where 50% of ensemble members agree (25th–75th %ile)
+      - Light blue shading = where 80% of ensemble members agree (10th–90th %ile)
+      - Bars = deterministic (single-value) forecast from each provider
+      - Bottom panel: at 50% on the dashed line = coin-flip uncertainty about rain event
+    """
+    has_ensemble = ensemble_df is not None and len(ensemble_df) > 0
+    det_providers = [(name, df) for name, df in forecast_data.items()
+                     if df is not None and len(df) > 0]
+
+    if not has_ensemble and not det_providers:
+        return
+
+    fig, axes = plt.subplots(
+        2, 1, figsize=(12, 8), sharex=True,
+        gridspec_kw={"height_ratios": [3, 1.5]}
+    )
+
+    # ---- TOP: deterministic bars + ensemble shading ----
+    ax1 = axes[0]
+
+    if has_ensemble:
+        ax1.fill_between(
+            ensemble_df["date"], ensemble_df["p10"], ensemble_df["p90"],
+            alpha=0.12, color="steelblue", label="Ensemble 10th–90th %ile"
+        )
+        ax1.fill_between(
+            ensemble_df["date"], ensemble_df["p25"], ensemble_df["p75"],
+            alpha=0.28, color="steelblue", label="Ensemble 25th–75th %ile"
+        )
+        ax1.plot(
+            ensemble_df["date"], ensemble_df["p50"],
+            color="steelblue", linewidth=1.8, linestyle="-",
+            label=f"Ensemble median ({int(ensemble_df['n_members'].iloc[0])} members)",
+            zorder=5
+        )
+
+    det_colors = {"Open-Meteo Forecast": "#1f77b4", "YR.no": "#e8882a"}
+    bar_half   = datetime.timedelta(hours=10)  # offset so bars don't overlap
+    n_det = len(det_providers)
+    offsets = np.linspace(-bar_half.total_seconds() / 3600 * (n_det - 1) / 2,
+                           bar_half.total_seconds() / 3600 * (n_det - 1) / 2,
+                           n_det)
+    bar_width_days = 0.3
+
+    for k, (name, df) in enumerate(det_providers):
+        offset_td = datetime.timedelta(hours=offsets[k])
+        shifted_dates = df["date"] + offset_td
+        ax1.bar(
+            shifted_dates, df["rain_mm"],
+            width=bar_width_days,
+            label=name,
+            color=det_colors.get(name, f"C{k}"),
+            alpha=0.75,
+            zorder=4,
+        )
+
+    ax1.set_ylabel("Precipitation (mm/day)")
+    ax1.set_title(
+        f"{location_name} — 7-Day Forecast\n"
+        "Shading = Open-Meteo ECMWF ensemble spread  |  Bars = deterministic forecasts"
+    )
+    ax1.legend(fontsize=8, loc="upper right")
+    ax1.grid(True, axis="y", alpha=0.3)
+    ax1.set_ylim(bottom=0)
+
+    # ---- BOTTOM: exceedance probabilities ----
+    ax2 = axes[1]
+    if has_ensemble:
+        ax2.step(
+            ensemble_df["date"], ensemble_df["prob_gt_1mm"],
+            where="mid", color="steelblue", linewidth=2.0,
+            label="P(rain > 1 mm)"
+        )
+        ax2.step(
+            ensemble_df["date"], ensemble_df["prob_gt_5mm"],
+            where="mid", color="darkblue", linewidth=2.0, linestyle="--",
+            label="P(rain > 5 mm)"
+        )
+        ax2.fill_between(
+            ensemble_df["date"], 0, ensemble_df["prob_gt_5mm"],
+            step="mid", alpha=0.18, color="darkblue"
+        )
+        ax2.axhline(50, color="gray", linestyle=":", linewidth=0.9, alpha=0.8,
+                    label="50% (coin-flip)")
+        ax2.set_ylim(0, 108)
+        ax2.set_ylabel("Exceedance\nprobability (%)")
+        ax2.legend(fontsize=8, loc="upper right")
+        ax2.grid(True, alpha=0.3)
+    ax2.set_xlabel("Date")
+    ax2.xaxis.set_major_formatter(mdates.DateFormatter("%d %b"))
+
     fig.autofmt_xdate()
     plt.tight_layout()
 
-    path = output_dir / f"cumulative_{location_name}.png"
+    path = output_dir / f"forecast_ensemble_{location_name}.png"
+    plt.savefig(path, dpi=150, bbox_inches="tight")
+    plt.close()
+    print(f"  Saved: {path}")
+
+
+def plot_forecast(data_dict, location_name, output_dir):
+    """Bar chart comparing deterministic 7-day forecasts (original style, kept for reference)."""
+    _, ax = plt.subplots(figsize=(12, 5))
+
+    providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0]
+    n = len(providers)
+    if n == 0:
+        plt.close()
+        return
+
+    all_dates = sorted(set(
+        d for _, df in providers
+        for d in df["date"].dt.date.tolist()
+    ))
+    x = np.arange(len(all_dates))
+    width = 0.8 / n
+
+    cmap = matplotlib.colormaps["tab10"].resampled(n)
+    for i, (name, df) in enumerate(providers):
+        date_map = dict(zip(df["date"].dt.date, df["rain_mm"]))
+        vals = [date_map.get(d, 0.0) for d in all_dates]
+        ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85)
+
+    ax.set_xticks(x + width * (n - 1) / 2)
+    ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right")
+    ax.set_ylabel("Precipitation (mm/day)")
+    ax.set_title(f"{location_name} — 7-Day Deterministic Forecast Comparison")
+    ax.legend(fontsize=9)
+    ax.grid(True, axis="y", alpha=0.3)
+    plt.tight_layout()
+
+    path = output_dir / f"forecast_{location_name}.png"
     plt.savefig(path, dpi=150)
     plt.close()
     print(f"  Saved: {path}")
@@ -489,28 +885,26 @@ def run_location(loc_name, lat, lon):
         print(f"    ✗ NASA POWER failed: {e}")
         archive_data["NASA POWER"] = None
 
-    # Stats vs ERA5 reference
-    print("\n  Stats vs ERA5 (Open-Meteo) reference:")
-    ref_df = archive_data.get("ERA5 (Open-Meteo)")
-    for name, df in archive_data.items():
-        if name == "ERA5 (Open-Meteo)" or df is None:
-            continue
-        if ref_df is None:
-            continue
-        merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
-        stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp")
-        print(f"    {name:30s}  n={stats['n']:4d}  MAE={stats['MAE']}  "
-              f"RMSE={stats['RMSE']}  Bias={stats['Bias']}  r={stats['r']}")
+    # Pairwise stats (no ERA5 reference bias)
+    print("\n  Pairwise archive stats (Pearson r | RMSE mm/day | Bias mm/day):")
+    names, r_mat, rmse_mat = compute_pairwise_stats(archive_data)
+    for i in range(len(names)):
+        for j in range(i + 1, len(names)):
+            if not np.isnan(r_mat[i, j]):
+                print(f"    {names[i]:30s} vs {names[j]:30s}  "
+                      f"r={r_mat[i,j]:.3f}  RMSE={rmse_mat[i,j]:.2f} mm")
 
-    plot_archive(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR)
+    plot_archive_with_spread(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR)
     plot_cumulative(archive_data, loc_name, OUTPUT_DIR)
     plot_vs_era5(archive_data, loc_name, OUTPUT_DIR)
+    plot_pairwise_heatmap(archive_data, loc_name, OUTPUT_DIR)
+    plot_wetdry_agreement(archive_data, loc_name, OUTPUT_DIR)
 
     # ---- FORECAST ----
     print("\n[Forecast]")
     forecast_data = {}
 
-    print("  Fetching Open-Meteo forecast...")
+    print("  Fetching Open-Meteo deterministic forecast...")
     try:
         forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon)
         print(f"    → {len(forecast_data['Open-Meteo Forecast'])} days")
@@ -518,6 +912,17 @@ def run_location(loc_name, lat, lon):
         print(f"    ✗ Open-Meteo forecast failed: {e}")
         forecast_data["Open-Meteo Forecast"] = None
 
+    time.sleep(0.5)
+    print("  Fetching Open-Meteo ECMWF ensemble forecast...")
+    ensemble_df = None
+    try:
+        ensemble_df = fetch_openmeteo_ensemble_forecast(lat, lon)
+        if ensemble_df is not None:
+            print(f"    → {len(ensemble_df)} days  "
+                  f"({int(ensemble_df['n_members'].iloc[0])} ensemble members)")
+    except Exception as e:
+        print(f"    ✗ Open-Meteo ensemble failed: {e}")
+
     time.sleep(0.5)
     print("  Fetching YR.no LocationForecast...")
     try:
@@ -551,6 +956,7 @@ def run_location(loc_name, lat, lon):
             print(f"    ✗ WeatherAPI.com failed: {e}")
             forecast_data["WeatherAPI.com"] = None
 
+    plot_forecast_with_ensemble(forecast_data, ensemble_df, loc_name, OUTPUT_DIR)
     plot_forecast(forecast_data, loc_name, OUTPUT_DIR)