This commit is contained in:
Timon 2026-03-16 16:16:16 +01:00
parent 1da5c0d0a7
commit 073f1567d9

View file

@ -12,34 +12,42 @@ ARCHIVE providers (no API key required):
4. NASA POWER completely independent source (0.50°, global)
FORECAST providers (no API key required):
5. Open-Meteo Forecast ensemble of NWP models (global)
6. YR.no LocationForecast Norwegian Met Institute (~10 days, global)
5. Open-Meteo Forecast deterministic NWP (global)
6. Open-Meteo Ensemble ECMWF IFS 51-member ensemble; gives probability bands
7. YR.no LocationForecast Norwegian Met Institute (~10 days, global)
FORECAST providers (API key required set in CONFIG below, leave "" to skip):
7. OpenWeatherMap free tier, 1000 calls/day
8. WeatherAPI.com free tier
8. OpenWeatherMap free tier, 1000 calls/day
9. WeatherAPI.com free tier
OUTPUT:
Plots saved to: weather_comparison_plots/
Summary stats printed to console.
archive_<loc>.png daily lines + multi-source spread band + agreement signal
archive_rolling_<loc>.png 30-day rolling mean comparison (original style)
cumulative_<loc>.png cumulative annual precipitation
vs_era5_<loc>.png each provider vs ERA5 scatter (note: ERA5 is not ground truth)
pairwise_<loc>.png pairwise Pearson r and RMSE heatmaps (unbiased)
wetdry_agreement_<loc>.png % of days providers agree on wet vs dry
forecast_ensemble_<loc>.png ensemble uncertainty bands + exceedance probability
forecast_<loc>.png deterministic forecast bars (original style)
Usage:
python weather_api_comparison.py
"""
import os
import json
import time
import datetime
import requests
import pandas as pd
import time
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from pathlib import Path
# ============================================================
# CONFIG
# ============================================================
@ -74,7 +82,6 @@ def fetch_openmeteo_archive(lat, lon, start, end, model="era5"):
"""Open-Meteo ERA5 / ERA5-Land / CERRA archive.
ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names.
"""
# ERA5 is the default model — adding models param with wrong casing causes 400
model_suffix = "" if model == "era5" else f"&models={model}"
url = (
f"https://archive-api.open-meteo.com/v1/archive"
@ -128,7 +135,6 @@ def fetch_nasa_power(lat, lon, start, end):
def fetch_openmeteo_forecast(lat, lon, days=8):
"""Open-Meteo NWP forecast — default best_match model."""
end = datetime.date.today() + datetime.timedelta(days=days)
url = (
f"https://api.open-meteo.com/v1/forecast"
f"?latitude={lat}&longitude={lon}"
@ -147,8 +153,57 @@ def fetch_openmeteo_forecast(lat, lon, days=8):
return df
def fetch_openmeteo_ensemble_forecast(lat, lon, days=8):
"""Open-Meteo ECMWF IFS ensemble forecast — 51 members at 0.4° resolution.
Returns a DataFrame with daily percentile bands and exceedance probabilities:
p10, p25, p50 (median), p75, p90 mm/day
prob_gt_1mm, prob_gt_5mm % of members exceeding threshold
n_members number of members in response
"""
url = (
f"https://ensemble-api.open-meteo.com/v1/ensemble"
f"?latitude={lat}&longitude={lon}"
f"&daily=precipitation_sum"
f"&models=ecmwf_ifs04"
f"&forecast_days={days}"
f"&timezone=UTC"
)
r = requests.get(url, timeout=60)
r.raise_for_status()
body = r.json()
daily = body["daily"]
dates = pd.to_datetime(daily["time"])
# Member columns are named like "precipitation_sum_member01", etc.
member_keys = [k for k in daily.keys() if k.startswith("precipitation_sum")]
if not member_keys:
print(" ⚠ No member columns found in ensemble response")
return None
# Shape: (n_members, n_days)
members = np.array([daily[k] for k in member_keys], dtype=float)
members = np.where(members < 0, 0, members) # clip negatives
df = pd.DataFrame({
"date": dates,
"p10": np.nanpercentile(members, 10, axis=0),
"p25": np.nanpercentile(members, 25, axis=0),
"p50": np.nanpercentile(members, 50, axis=0),
"p75": np.nanpercentile(members, 75, axis=0),
"p90": np.nanpercentile(members, 90, axis=0),
"mean": np.nanmean(members, axis=0),
"prob_gt_1mm": np.mean(members > 1.0, axis=0) * 100,
"prob_gt_5mm": np.mean(members > 5.0, axis=0) * 100,
"n_members": members.shape[0],
})
return df
def fetch_yr_forecast(lat, lon):
"""YR.no LocationForecast 2.0 — hourly precip aggregated to daily."""
"""YR.no LocationForecast 2.0 — hourly precip aggregated to daily.
Note: forecast-only service; no historical archive is available.
"""
url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}"
headers = {"User-Agent": USER_AGENT}
r = requests.get(url, headers=headers, timeout=30)
@ -234,88 +289,209 @@ def compare_stats(df, ref_col, other_col):
"Bias": round(bias,2), "r": round(r,3)}
def compute_pairwise_stats(data_dict):
"""Compute pairwise Pearson r and RMSE for all archive provider pairs.
Returns:
names list of provider names (same order as matrices)
r_matrix (n x n) Pearson r array, NaN where not computable
rmse_matrix (n x n) RMSE array (mm/day), 0 on diagonal
"""
providers = [(name, df) for name, df in data_dict.items()
if df is not None and len(df) > 5]
names = [p[0] for p in providers]
n = len(names)
r_matrix = np.full((n, n), np.nan)
rmse_matrix = np.full((n, n), np.nan)
for i in range(n):
r_matrix[i, i] = 1.0
rmse_matrix[i, i] = 0.0
_, df_i = providers[i]
for j in range(i + 1, n):
_, df_j = providers[j]
merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j"))
valid = merged[["rain_mm_i", "rain_mm_j"]].dropna()
if len(valid) < 5:
continue
r = valid["rain_mm_i"].corr(valid["rain_mm_j"])
rmse = ((valid["rain_mm_i"] - valid["rain_mm_j"]) ** 2).mean() ** 0.5
r_matrix[i, j] = r_matrix[j, i] = round(r, 3)
rmse_matrix[i, j] = rmse_matrix[j, i] = round(rmse, 2)
return names, r_matrix, rmse_matrix
def wetdry_agreement(data_dict, threshold=1.0):
"""For each provider pair, compute % of days both-dry / both-wet / disagree.
A day is 'dry' if rain_mm < threshold (default 1 mm).
Returns a list of dicts: pair, both_dry, both_wet, disagree, n.
"""
providers = [(name, df) for name, df in data_dict.items()
if df is not None and len(df) > 5]
results = []
for i, (name_i, df_i) in enumerate(providers):
for j in range(i + 1, len(providers)):
name_j, df_j = providers[j]
merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j"))
valid = merged[["rain_mm_i", "rain_mm_j"]].dropna()
if len(valid) < 5:
continue
dry_i = valid["rain_mm_i"] < threshold
dry_j = valid["rain_mm_j"] < threshold
both_dry = (dry_i & dry_j).mean() * 100
both_wet = (~dry_i & ~dry_j).mean() * 100
disagree = 100 - both_dry - both_wet
results.append({
"pair": f"{name_i}\nvs\n{name_j}",
"both_dry": round(both_dry, 1),
"both_wet": round(both_wet, 1),
"disagree": round(disagree, 1),
"n": len(valid),
})
return results
# ============================================================
# PLOTTING
# PLOTTING — ARCHIVE
# ============================================================
def plot_archive(data_dict, location_name, start, end, output_dir):
"""Line plot of all archive providers for one location."""
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)
ARCHIVE_COLORS = {
"ERA5 (Open-Meteo)": "#1f77b4",
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
colors = {
"ERA5 (Open-Meteo)": "#1f77b4",
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
# Top: daily raw
def _build_spread_frame(data_dict):
"""Merge all valid archive providers onto a common date axis.
Returns a DataFrame with one column per provider + _mean and _std columns.
"""
valid = {name: df for name, df in data_dict.items()
if df is not None and len(df) > 0}
if not valid:
return None, []
merged = None
for name, df in valid.items():
tmp = df[["date", "rain_mm"]].rename(columns={"rain_mm": name})
merged = tmp if merged is None else merged.merge(tmp, on="date", how="outer")
cols = list(valid.keys())
merged[cols] = merged[cols].clip(lower=0)
merged["_mean"] = merged[cols].mean(axis=1)
merged["_std"] = merged[cols].std(axis=1)
return merged.sort_values("date").reset_index(drop=True), cols
def plot_archive_with_spread(data_dict, location_name, start, end, output_dir):
"""Three-panel archive plot:
Top: Individual provider lines + multi-source mean±std shading
Middle: 30-day rolling mean
Bottom: Inter-source std (agreement signal used to flag uncertain periods)
"""
spread, _ = _build_spread_frame(data_dict)
if spread is None:
return
valid_providers = {name: df for name, df in data_dict.items()
if df is not None and len(df) > 0}
fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)
# --- Top: daily raw + spread band ---
ax1 = axes[0]
for name, df in data_dict.items():
if df is not None and len(df) > 0:
ax1.plot(df["date"], df["rain_mm"], label=name,
color=colors.get(name), linewidth=0.8, alpha=0.85)
ax1.fill_between(
spread["date"],
(spread["_mean"] - spread["_std"]).clip(lower=0),
spread["_mean"] + spread["_std"],
alpha=0.15, color="gray", label="Multi-source ±1 std"
)
ax1.plot(spread["date"], spread["_mean"], color="black", linewidth=1.0,
linestyle="--", alpha=0.6, label="Multi-source mean", zorder=5)
for name, df in valid_providers.items():
ax1.plot(df["date"], df["rain_mm"],
label=name, color=ARCHIVE_COLORS.get(name, "gray"),
linewidth=0.7, alpha=0.85)
ax1.set_ylabel("Precipitation (mm/day)")
ax1.set_title(f"{location_name} — Daily Precipitation (archive {start}{end})")
ax1.legend(fontsize=8)
ax1.set_title(
f"{location_name} — Daily Precipitation with Multi-Source Spread\n"
f"{start}{end} | Grey band = ±1 std across all providers"
)
ax1.legend(fontsize=8, ncol=2)
ax1.grid(True, alpha=0.3)
# Bottom: 30-day rolling mean
# --- Middle: 30-day rolling mean ---
ax2 = axes[1]
for name, df in data_dict.items():
if df is not None and len(df) > 0:
rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean()
ax2.plot(rolled.index, rolled.values, label=name,
color=colors.get(name), linewidth=1.5)
roll_mean = spread.set_index("date")["_mean"].rolling(30, min_periods=15).mean()
roll_std = spread.set_index("date")["_std"].rolling(30, min_periods=15).mean()
ax2.fill_between(
roll_mean.index,
(roll_mean - roll_std).clip(lower=0),
roll_mean + roll_std,
alpha=0.15, color="gray"
)
for name, df in valid_providers.items():
rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean()
ax2.plot(rolled.index, rolled.values,
label=name, color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.5)
ax2.set_ylabel("30-day rolling mean (mm/day)")
ax2.set_xlabel("Date")
ax2.legend(fontsize=8)
ax2.grid(True, alpha=0.3)
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
fig.autofmt_xdate()
# --- Bottom: inter-source std (agreement signal) ---
ax3 = axes[2]
ax3.fill_between(
spread["date"], 0, spread["_std"],
color="purple", alpha=0.35,
label="Std across providers (higher = less agreement)"
)
median_std = spread["_std"].median()
ax3.axhline(median_std, color="purple", linestyle=":", linewidth=1,
label=f"Median std = {median_std:.2f} mm/day")
ax3.set_ylabel("Std dev across\nproviders (mm)")
ax3.set_xlabel("Date")
ax3.legend(fontsize=8)
ax3.grid(True, alpha=0.3)
ax3.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"archive_{location_name}.png"
plt.savefig(path, dpi=150)
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
def plot_forecast(data_dict, location_name, output_dir):
"""Bar chart comparing 7-day forecasts across providers."""
fig, ax = plt.subplots(figsize=(12, 5))
def plot_cumulative(data_dict, location_name, output_dir):
"""Cumulative annual precipitation — most relevant for crop/irrigation context."""
fig, ax = plt.subplots(figsize=(14, 5))
providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0]
n = len(providers)
if n == 0:
plt.close()
return
for name, df in data_dict.items():
if df is None or len(df) == 0:
continue
s = df.set_index("date")["rain_mm"].sort_index().cumsum()
total = s.iloc[-1]
ax.plot(s.index, s.values,
label=f"{name} (total: {total:.0f} mm)",
color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.8)
# Collect all forecast dates
all_dates = sorted(set(
d for _, df in providers
for d in df["date"].dt.date.tolist()
))
x = np.arange(len(all_dates))
width = 0.8 / n
cmap = matplotlib.colormaps["tab10"].resampled(n)
for i, (name, df) in enumerate(providers):
vals = []
date_map = dict(zip(df["date"].dt.date, df["rain_mm"]))
for d in all_dates:
vals.append(date_map.get(d, 0.0))
ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85)
ax.set_xticks(x + width * (n - 1) / 2)
ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right")
ax.set_ylabel("Precipitation (mm/day)")
ax.set_title(f"{location_name} — 7-Day Forecast Comparison")
ax.set_ylabel("Cumulative precipitation (mm)")
ax.set_xlabel("Date")
ax.set_title(
f"{location_name} — Cumulative Annual Precipitation by Provider\n"
"Divergence = sources disagree on total seasonal rainfall"
)
ax.legend(fontsize=9)
ax.grid(True, axis="y", alpha=0.3)
ax.grid(True, alpha=0.3)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"forecast_{location_name}.png"
path = output_dir / f"cumulative_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
@ -324,6 +500,10 @@ def plot_forecast(data_dict, location_name, output_dir):
def plot_vs_era5(data_dict, location_name, output_dir):
"""Each provider vs ERA5 reference: scatter + regression line.
NOTE: ERA5 is used as reference here for visual comparison only.
It is a reanalysis product, not a ground-truth measurement.
Use the pairwise heatmap (plot_pairwise_heatmap) for an unbiased comparison.
How to read:
- Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip.
- Points on the red diagonal = perfect agreement.
@ -344,26 +524,18 @@ def plot_vs_era5(data_dict, location_name, output_dir):
n = len(others)
fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False)
colors = {
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
for i, (name, df) in enumerate(others):
ax = axes[0][i]
merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
valid = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna()
color = colors.get(name, "steelblue")
color = ARCHIVE_COLORS.get(name, "steelblue")
ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"],
s=4, alpha=0.35, color=color)
# Perfect-agreement diagonal
lim = max(valid.max().max(), 1) * 1.05
ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement")
# Linear regression line
if len(valid) > 5:
coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1)
x_fit = np.linspace(0, lim, 100)
@ -382,8 +554,8 @@ def plot_vs_era5(data_dict, location_name, output_dir):
ax.grid(True, alpha=0.3)
fig.suptitle(
f"{location_name} — Daily Precip vs ERA5 Reference\n"
"Red dashed = perfect agreement. Points above line = provider wetter than ERA5.",
f"{location_name} — Daily Precip vs ERA5 (reference only — ERA5 is not ground truth)\n"
"Red dashed = perfect agreement. See pairwise heatmap for unbiased comparison.",
fontsize=10
)
plt.tight_layout()
@ -393,38 +565,262 @@ def plot_vs_era5(data_dict, location_name, output_dir):
print(f" Saved: {path}")
def plot_cumulative(data_dict, location_name, output_dir):
"""Cumulative annual precipitation — most relevant for crop/irrigation context."""
fig, ax = plt.subplots(figsize=(14, 5))
def plot_pairwise_heatmap(data_dict, location_name, output_dir):
"""Two heatmaps side by side: pairwise Pearson r and pairwise RMSE.
colors = {
"ERA5 (Open-Meteo)": "#1f77b4",
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
No provider is treated as reference all pairs compared equally.
High r + low RMSE = sources agree. Where they diverge, neither is ground truth.
"""
names, r_matrix, rmse_matrix = compute_pairwise_stats(data_dict)
if len(names) < 2:
return
for name, df in data_dict.items():
if df is None or len(df) == 0:
continue
s = df.set_index("date")["rain_mm"].sort_index().cumsum()
total = s.iloc[-1]
ax.plot(s.index, s.values, label=f"{name} (total: {total:.0f} mm)",
color=colors.get(name), linewidth=1.8)
n = len(names)
short = [nm.replace(" (Open-Meteo)", "\n(OM)") for nm in names]
ax.set_ylabel("Cumulative precipitation (mm)")
ax.set_xlabel("Date")
ax.set_title(
f"{location_name} — Cumulative Annual Precipitation by Provider\n"
"Divergence = sources disagree on total seasonal rainfall"
fig, axes = plt.subplots(1, 2, figsize=(max(10, n * 2.5), max(5, n * 1.5 + 1)))
# Pearson r
ax1 = axes[0]
im1 = ax1.imshow(r_matrix, vmin=0, vmax=1, cmap="YlGn", aspect="auto")
plt.colorbar(im1, ax=ax1, label="Pearson r")
ax1.set_xticks(range(n)); ax1.set_yticks(range(n))
ax1.set_xticklabels(short, fontsize=8, rotation=45, ha="right")
ax1.set_yticklabels(short, fontsize=8)
for i in range(n):
for j in range(n):
val = r_matrix[i, j]
if not np.isnan(val):
text_color = "white" if val > 0.8 else "black"
ax1.text(j, i, f"{val:.2f}", ha="center", va="center",
fontsize=9, color=text_color)
ax1.set_title("Pairwise Pearson r\n(1.0 = perfect agreement, no reference)")
# RMSE
ax2 = axes[1]
off_diag = rmse_matrix[rmse_matrix > 0]
vmax_rmse = float(np.nanmax(off_diag)) if len(off_diag) > 0 else 10
im2 = ax2.imshow(rmse_matrix, vmin=0, vmax=vmax_rmse, cmap="YlOrRd_r", aspect="auto")
plt.colorbar(im2, ax=ax2, label="RMSE (mm/day)")
ax2.set_xticks(range(n)); ax2.set_yticks(range(n))
ax2.set_xticklabels(short, fontsize=8, rotation=45, ha="right")
ax2.set_yticklabels(short, fontsize=8)
for i in range(n):
for j in range(n):
val = rmse_matrix[i, j]
if not np.isnan(val):
ax2.text(j, i, f"{val:.1f}", ha="center", va="center", fontsize=9)
ax2.set_title("Pairwise RMSE (mm/day)\n(0 = perfect agreement)")
fig.suptitle(
f"{location_name} — Archive Provider Agreement (unbiased — no single reference)\n"
"Pairs with high r + low RMSE are consistent. Divergent pairs reveal dataset uncertainty.",
fontsize=10
)
ax.legend(fontsize=9)
ax.grid(True, alpha=0.3)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
plt.tight_layout()
path = output_dir / f"pairwise_{location_name}.png"
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
def plot_wetdry_agreement(data_dict, location_name, output_dir, threshold=1.0):
"""Stacked bar chart: for each provider pair, % of days both-dry / both-wet / disagree.
High 'disagree' % means one source says rain, the other says dry on the same day.
This is the most practically relevant divergence for SmartCane crop monitoring.
"""
results = wetdry_agreement(data_dict, threshold)
if not results:
return
pairs = [r["pair"] for r in results]
both_dry = [r["both_dry"] for r in results]
both_wet = [r["both_wet"] for r in results]
disagree = [r["disagree"] for r in results]
fig, ax = plt.subplots(figsize=(max(8, len(pairs) * 2.8), 6))
x = np.arange(len(pairs))
ax.bar(x, both_dry, label=f"Both dry (<{threshold} mm)", color="#a8d5e2", edgecolor="white")
ax.bar(x, both_wet, bottom=both_dry,
label=f"Both wet (≥{threshold} mm)", color="#3a7ebf", edgecolor="white")
bottom2 = [d + w for d, w in zip(both_dry, both_wet)]
ax.bar(x, disagree, bottom=bottom2,
label="Disagree (one wet, one dry)", color="#e07b54", edgecolor="white")
# Add % labels inside bars
for i, r in enumerate(results):
if r["both_dry"] > 4:
ax.text(i, r["both_dry"] / 2, f"{r['both_dry']:.0f}%",
ha="center", va="center", fontsize=7, color="black")
if r["both_wet"] > 4:
ax.text(i, r["both_dry"] + r["both_wet"] / 2, f"{r['both_wet']:.0f}%",
ha="center", va="center", fontsize=7, color="white")
if r["disagree"] > 4:
ax.text(i, bottom2[i] + r["disagree"] / 2, f"{r['disagree']:.0f}%",
ha="center", va="center", fontsize=7, color="white")
ax.set_xticks(x)
ax.set_xticklabels(pairs, fontsize=8)
ax.set_ylim(0, 108)
ax.set_ylabel("% of days in archive period")
ax.set_title(
f"{location_name} — Provider Agreement on Wet vs Dry Days\n"
f"Threshold = {threshold} mm/day | Orange = providers disagree on whether it rained"
)
ax.legend(loc="upper right", fontsize=9)
ax.grid(True, axis="y", alpha=0.3)
plt.tight_layout()
path = output_dir / f"wetdry_agreement_{location_name}.png"
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
# ============================================================
# PLOTTING — FORECAST
# ============================================================
def plot_forecast_with_ensemble(forecast_data, ensemble_df, location_name, output_dir):
"""Two-panel forecast comparison:
Top: Deterministic bars (YR.no, Open-Meteo) + ECMWF ensemble percentile shading
Bottom: Exceedance probabilities P(rain > 1 mm) and P(rain > 5 mm)
Reading guide:
- Dark blue shading = where 50% of ensemble members agree (25th75th %ile)
- Light blue shading = where 80% of ensemble members agree (10th90th %ile)
- Bars = deterministic (single-value) forecast from each provider
- Bottom panel: at 50% on the dashed line = coin-flip uncertainty about rain event
"""
has_ensemble = ensemble_df is not None and len(ensemble_df) > 0
det_providers = [(name, df) for name, df in forecast_data.items()
if df is not None and len(df) > 0]
if not has_ensemble and not det_providers:
return
fig, axes = plt.subplots(
2, 1, figsize=(12, 8), sharex=True,
gridspec_kw={"height_ratios": [3, 1.5]}
)
# ---- TOP: deterministic bars + ensemble shading ----
ax1 = axes[0]
if has_ensemble:
ax1.fill_between(
ensemble_df["date"], ensemble_df["p10"], ensemble_df["p90"],
alpha=0.12, color="steelblue", label="Ensemble 10th90th %ile"
)
ax1.fill_between(
ensemble_df["date"], ensemble_df["p25"], ensemble_df["p75"],
alpha=0.28, color="steelblue", label="Ensemble 25th75th %ile"
)
ax1.plot(
ensemble_df["date"], ensemble_df["p50"],
color="steelblue", linewidth=1.8, linestyle="-",
label=f"Ensemble median ({int(ensemble_df['n_members'].iloc[0])} members)",
zorder=5
)
det_colors = {"Open-Meteo Forecast": "#1f77b4", "YR.no": "#e8882a"}
bar_half = datetime.timedelta(hours=10) # offset so bars don't overlap
n_det = len(det_providers)
offsets = np.linspace(-bar_half.total_seconds() / 3600 * (n_det - 1) / 2,
bar_half.total_seconds() / 3600 * (n_det - 1) / 2,
n_det)
bar_width_days = 0.3
for k, (name, df) in enumerate(det_providers):
offset_td = datetime.timedelta(hours=offsets[k])
shifted_dates = df["date"] + offset_td
ax1.bar(
shifted_dates, df["rain_mm"],
width=bar_width_days,
label=name,
color=det_colors.get(name, f"C{k}"),
alpha=0.75,
zorder=4,
)
ax1.set_ylabel("Precipitation (mm/day)")
ax1.set_title(
f"{location_name} — 7-Day Forecast\n"
"Shading = Open-Meteo ECMWF ensemble spread | Bars = deterministic forecasts"
)
ax1.legend(fontsize=8, loc="upper right")
ax1.grid(True, axis="y", alpha=0.3)
ax1.set_ylim(bottom=0)
# ---- BOTTOM: exceedance probabilities ----
ax2 = axes[1]
if has_ensemble:
ax2.step(
ensemble_df["date"], ensemble_df["prob_gt_1mm"],
where="mid", color="steelblue", linewidth=2.0,
label="P(rain > 1 mm)"
)
ax2.step(
ensemble_df["date"], ensemble_df["prob_gt_5mm"],
where="mid", color="darkblue", linewidth=2.0, linestyle="--",
label="P(rain > 5 mm)"
)
ax2.fill_between(
ensemble_df["date"], 0, ensemble_df["prob_gt_5mm"],
step="mid", alpha=0.18, color="darkblue"
)
ax2.axhline(50, color="gray", linestyle=":", linewidth=0.9, alpha=0.8,
label="50% (coin-flip)")
ax2.set_ylim(0, 108)
ax2.set_ylabel("Exceedance\nprobability (%)")
ax2.legend(fontsize=8, loc="upper right")
ax2.grid(True, alpha=0.3)
ax2.set_xlabel("Date")
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%d %b"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"cumulative_{location_name}.png"
path = output_dir / f"forecast_ensemble_{location_name}.png"
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
def plot_forecast(data_dict, location_name, output_dir):
"""Bar chart comparing deterministic 7-day forecasts (original style, kept for reference)."""
_, ax = plt.subplots(figsize=(12, 5))
providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0]
n = len(providers)
if n == 0:
plt.close()
return
all_dates = sorted(set(
d for _, df in providers
for d in df["date"].dt.date.tolist()
))
x = np.arange(len(all_dates))
width = 0.8 / n
cmap = matplotlib.colormaps["tab10"].resampled(n)
for i, (name, df) in enumerate(providers):
date_map = dict(zip(df["date"].dt.date, df["rain_mm"]))
vals = [date_map.get(d, 0.0) for d in all_dates]
ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85)
ax.set_xticks(x + width * (n - 1) / 2)
ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right")
ax.set_ylabel("Precipitation (mm/day)")
ax.set_title(f"{location_name} — 7-Day Deterministic Forecast Comparison")
ax.legend(fontsize=9)
ax.grid(True, axis="y", alpha=0.3)
plt.tight_layout()
path = output_dir / f"forecast_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
@ -489,28 +885,26 @@ def run_location(loc_name, lat, lon):
print(f" ✗ NASA POWER failed: {e}")
archive_data["NASA POWER"] = None
# Stats vs ERA5 reference
print("\n Stats vs ERA5 (Open-Meteo) reference:")
ref_df = archive_data.get("ERA5 (Open-Meteo)")
for name, df in archive_data.items():
if name == "ERA5 (Open-Meteo)" or df is None:
continue
if ref_df is None:
continue
merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp")
print(f" {name:30s} n={stats['n']:4d} MAE={stats['MAE']} "
f"RMSE={stats['RMSE']} Bias={stats['Bias']} r={stats['r']}")
# Pairwise stats (no ERA5 reference bias)
print("\n Pairwise archive stats (Pearson r | RMSE mm/day | Bias mm/day):")
names, r_mat, rmse_mat = compute_pairwise_stats(archive_data)
for i in range(len(names)):
for j in range(i + 1, len(names)):
if not np.isnan(r_mat[i, j]):
print(f" {names[i]:30s} vs {names[j]:30s} "
f"r={r_mat[i,j]:.3f} RMSE={rmse_mat[i,j]:.2f} mm")
plot_archive(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR)
plot_archive_with_spread(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR)
plot_cumulative(archive_data, loc_name, OUTPUT_DIR)
plot_vs_era5(archive_data, loc_name, OUTPUT_DIR)
plot_pairwise_heatmap(archive_data, loc_name, OUTPUT_DIR)
plot_wetdry_agreement(archive_data, loc_name, OUTPUT_DIR)
# ---- FORECAST ----
print("\n[Forecast]")
forecast_data = {}
print(" Fetching Open-Meteo forecast...")
print(" Fetching Open-Meteo deterministic forecast...")
try:
forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon)
print(f"{len(forecast_data['Open-Meteo Forecast'])} days")
@ -518,6 +912,17 @@ def run_location(loc_name, lat, lon):
print(f" ✗ Open-Meteo forecast failed: {e}")
forecast_data["Open-Meteo Forecast"] = None
time.sleep(0.5)
print(" Fetching Open-Meteo ECMWF ensemble forecast...")
ensemble_df = None
try:
ensemble_df = fetch_openmeteo_ensemble_forecast(lat, lon)
if ensemble_df is not None:
print(f"{len(ensemble_df)} days "
f"({int(ensemble_df['n_members'].iloc[0])} ensemble members)")
except Exception as e:
print(f" ✗ Open-Meteo ensemble failed: {e}")
time.sleep(0.5)
print(" Fetching YR.no LocationForecast...")
try:
@ -551,6 +956,7 @@ def run_location(loc_name, lat, lon):
print(f" ✗ WeatherAPI.com failed: {e}")
forecast_data["WeatherAPI.com"] = None
plot_forecast_with_ensemble(forecast_data, ensemble_df, loc_name, OUTPUT_DIR)
plot_forecast(forecast_data, loc_name, OUTPUT_DIR)