SmartCane/python_app/weather_api_comparison.py
2026-03-16 16:16:16 +01:00

974 lines
37 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
weather_api_comparison.py
=========================
Compare daily precipitation from multiple free weather APIs across two locations:
- Arnhem, Netherlands (51.985°N, 5.899°E) — European climate
- Angata, Kenya ( 1.330°S, 34.738°E) — tropical / sugarcane context
ARCHIVE providers (no API key required):
1. Open-Meteo ERA5 — current SmartCane provider (0.25°, global)
2. Open-Meteo ERA5-Land — higher resolution variant (0.10°, global)
3. Open-Meteo CERRA — EU regional reanalysis (0.05°, EU only)
4. NASA POWER — completely independent source (0.50°, global)
FORECAST providers (no API key required):
5. Open-Meteo Forecast — deterministic NWP (global)
6. Open-Meteo Ensemble — ECMWF IFS 51-member ensemble; gives probability bands
7. YR.no LocationForecast — Norwegian Met Institute (~10 days, global)
FORECAST providers (API key required — set in CONFIG below, leave "" to skip):
8. OpenWeatherMap — free tier, 1000 calls/day
9. WeatherAPI.com — free tier
OUTPUT:
Plots saved to: weather_comparison_plots/
archive_<loc>.png — daily lines + multi-source spread band + agreement signal
archive_rolling_<loc>.png — 30-day rolling mean comparison (original style)
cumulative_<loc>.png — cumulative annual precipitation
vs_era5_<loc>.png — each provider vs ERA5 scatter (note: ERA5 is not ground truth)
pairwise_<loc>.png — pairwise Pearson r and RMSE heatmaps (unbiased)
wetdry_agreement_<loc>.png — % of days providers agree on wet vs dry
forecast_ensemble_<loc>.png — ensemble uncertainty bands + exceedance probability
forecast_<loc>.png — deterministic forecast bars (original style)
Usage:
python weather_api_comparison.py
"""
import datetime
import time
import matplotlib
matplotlib.use("Agg")
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from pathlib import Path
# ============================================================
# CONFIG
# ============================================================
LOCATIONS = {
"Arnhem_NL": {"lat": 51.985, "lon": 5.899},
"Angata_KE": {"lat": -1.330, "lon": 34.738},
}
# Archive: last 12 months
ARCHIVE_END = datetime.date.today() - datetime.timedelta(days=2) # ERA5 lags ~2 days
ARCHIVE_START = ARCHIVE_END - datetime.timedelta(days=365)
# Forecast: today + 7 days
FORECAST_START = datetime.date.today()
FORECAST_END = FORECAST_START + datetime.timedelta(days=7)
# Optional API keys — leave "" to skip that provider
OPENWEATHERMAP_KEY = "" # https://openweathermap.org/api
WEATHERAPI_KEY = "" # https://www.weatherapi.com/
OUTPUT_DIR = Path("weather_comparison_plots")
OUTPUT_DIR.mkdir(exist_ok=True)
USER_AGENT = "SmartCane-WeatherComparison/1.0 (research; contact via github)"
# ============================================================
# ARCHIVE FETCHERS
# ============================================================
def fetch_openmeteo_archive(lat, lon, start, end, model="era5"):
"""Open-Meteo ERA5 / ERA5-Land / CERRA archive.
ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names.
"""
model_suffix = "" if model == "era5" else f"&models={model}"
url = (
f"https://archive-api.open-meteo.com/v1/archive"
f"?latitude={lat}&longitude={lon}"
f"&daily=precipitation_sum"
f"&start_date={start}&end_date={end}"
f"{model_suffix}"
f"&timezone=UTC"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
df = pd.DataFrame({
"date": pd.to_datetime(body["daily"]["time"]),
"rain_mm": body["daily"]["precipitation_sum"],
})
df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").clip(lower=0).fillna(0)
# ERA5-Land sometimes returns values in meters (Open-Meteo API quirk).
# Auto-detect: if annual total < 50mm for any non-polar location, assume m → convert.
if df["rain_mm"].sum() < 50 and len(df) > 30:
df["rain_mm"] = df["rain_mm"] * 1000
print(f" ⚠ Unit auto-converted m→mm (values were implausibly small)")
return df
def fetch_nasa_power(lat, lon, start, end):
"""NASA POWER — daily PRECTOTCORR (precipitation corrected), 0.5° grid."""
url = (
"https://power.larc.nasa.gov/api/temporal/daily/point"
f"?parameters=PRECTOTCORR"
f"&community=AG"
f"&longitude={lon}&latitude={lat}"
f"&start={start.strftime('%Y%m%d')}&end={end.strftime('%Y%m%d')}"
f"&format=JSON"
)
r = requests.get(url, timeout=60)
r.raise_for_status()
body = r.json()
raw = body["properties"]["parameter"]["PRECTOTCORR"]
df = pd.DataFrame([
{"date": pd.to_datetime(k, format="%Y%m%d"), "rain_mm": max(v, 0)}
for k, v in raw.items()
if v != -999 # NASA POWER fill value
])
return df.sort_values("date").reset_index(drop=True)
# ============================================================
# FORECAST FETCHERS
# ============================================================
def fetch_openmeteo_forecast(lat, lon, days=8):
"""Open-Meteo NWP forecast — default best_match model."""
url = (
f"https://api.open-meteo.com/v1/forecast"
f"?latitude={lat}&longitude={lon}"
f"&daily=precipitation_sum"
f"&forecast_days={days + 1}"
f"&timezone=UTC"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
df = pd.DataFrame({
"date": pd.to_datetime(body["daily"]["time"]),
"rain_mm": body["daily"]["precipitation_sum"],
})
df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").fillna(0)
return df
def fetch_openmeteo_ensemble_forecast(lat, lon, days=8):
"""Open-Meteo ECMWF IFS ensemble forecast — 51 members at 0.4° resolution.
Returns a DataFrame with daily percentile bands and exceedance probabilities:
p10, p25, p50 (median), p75, p90 — mm/day
prob_gt_1mm, prob_gt_5mm — % of members exceeding threshold
n_members — number of members in response
"""
url = (
f"https://ensemble-api.open-meteo.com/v1/ensemble"
f"?latitude={lat}&longitude={lon}"
f"&daily=precipitation_sum"
f"&models=ecmwf_ifs04"
f"&forecast_days={days}"
f"&timezone=UTC"
)
r = requests.get(url, timeout=60)
r.raise_for_status()
body = r.json()
daily = body["daily"]
dates = pd.to_datetime(daily["time"])
# Member columns are named like "precipitation_sum_member01", etc.
member_keys = [k for k in daily.keys() if k.startswith("precipitation_sum")]
if not member_keys:
print(" ⚠ No member columns found in ensemble response")
return None
# Shape: (n_members, n_days)
members = np.array([daily[k] for k in member_keys], dtype=float)
members = np.where(members < 0, 0, members) # clip negatives
df = pd.DataFrame({
"date": dates,
"p10": np.nanpercentile(members, 10, axis=0),
"p25": np.nanpercentile(members, 25, axis=0),
"p50": np.nanpercentile(members, 50, axis=0),
"p75": np.nanpercentile(members, 75, axis=0),
"p90": np.nanpercentile(members, 90, axis=0),
"mean": np.nanmean(members, axis=0),
"prob_gt_1mm": np.mean(members > 1.0, axis=0) * 100,
"prob_gt_5mm": np.mean(members > 5.0, axis=0) * 100,
"n_members": members.shape[0],
})
return df
def fetch_yr_forecast(lat, lon):
"""YR.no LocationForecast 2.0 — hourly precip aggregated to daily.
Note: forecast-only service; no historical archive is available.
"""
url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}"
headers = {"User-Agent": USER_AGENT}
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
body = r.json()
records = []
for entry in body["properties"]["timeseries"]:
ts = pd.to_datetime(entry["time"])
data = entry["data"]
precip = 0.0
if "next_1_hours" in data:
precip = data["next_1_hours"]["details"].get("precipitation_amount", 0.0)
elif "next_6_hours" in data:
precip = data["next_6_hours"]["details"].get("precipitation_amount", 0.0) / 6
records.append({"datetime": ts, "precip_hour": precip})
hourly = pd.DataFrame(records)
hourly["date"] = hourly["datetime"].dt.date
daily = (
hourly.groupby("date")["precip_hour"]
.sum()
.reset_index()
.rename(columns={"precip_hour": "rain_mm"})
)
daily["date"] = pd.to_datetime(daily["date"])
return daily
def fetch_openweathermap_forecast(lat, lon, api_key):
"""OpenWeatherMap One Call 3.0 — daily forecast (needs paid/free key)."""
url = (
f"https://api.openweathermap.org/data/3.0/onecall"
f"?lat={lat}&lon={lon}"
f"&exclude=current,minutely,hourly,alerts"
f"&appid={api_key}&units=metric"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
records = []
for day in body.get("daily", []):
records.append({
"date": pd.to_datetime(day["dt"], unit="s").normalize(),
"rain_mm": day.get("rain", 0.0),
})
return pd.DataFrame(records)
def fetch_weatherapi_forecast(lat, lon, api_key, days=7):
"""WeatherAPI.com free forecast (up to 3 days on free tier, 14 on paid)."""
url = (
f"https://api.weatherapi.com/v1/forecast.json"
f"?key={api_key}&q={lat},{lon}&days={days}&aqi=no&alerts=no"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
records = []
for day in body["forecast"]["forecastday"]:
records.append({
"date": pd.to_datetime(day["date"]),
"rain_mm": day["day"].get("totalprecip_mm", 0.0),
})
return pd.DataFrame(records)
# ============================================================
# STATS
# ============================================================
def compare_stats(df, ref_col, other_col):
"""Compute MAE, RMSE, bias, Pearson r between two columns."""
valid = df[[ref_col, other_col]].dropna()
if len(valid) < 5:
return {"n": len(valid), "MAE": None, "RMSE": None, "Bias": None, "r": None}
diff = valid[other_col] - valid[ref_col]
mae = diff.abs().mean()
rmse = (diff**2).mean()**0.5
bias = diff.mean()
r = valid[ref_col].corr(valid[other_col])
return {"n": len(valid), "MAE": round(mae,2), "RMSE": round(rmse,2),
"Bias": round(bias,2), "r": round(r,3)}
def compute_pairwise_stats(data_dict):
"""Compute pairwise Pearson r and RMSE for all archive provider pairs.
Returns:
names — list of provider names (same order as matrices)
r_matrix — (n x n) Pearson r array, NaN where not computable
rmse_matrix — (n x n) RMSE array (mm/day), 0 on diagonal
"""
providers = [(name, df) for name, df in data_dict.items()
if df is not None and len(df) > 5]
names = [p[0] for p in providers]
n = len(names)
r_matrix = np.full((n, n), np.nan)
rmse_matrix = np.full((n, n), np.nan)
for i in range(n):
r_matrix[i, i] = 1.0
rmse_matrix[i, i] = 0.0
_, df_i = providers[i]
for j in range(i + 1, n):
_, df_j = providers[j]
merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j"))
valid = merged[["rain_mm_i", "rain_mm_j"]].dropna()
if len(valid) < 5:
continue
r = valid["rain_mm_i"].corr(valid["rain_mm_j"])
rmse = ((valid["rain_mm_i"] - valid["rain_mm_j"]) ** 2).mean() ** 0.5
r_matrix[i, j] = r_matrix[j, i] = round(r, 3)
rmse_matrix[i, j] = rmse_matrix[j, i] = round(rmse, 2)
return names, r_matrix, rmse_matrix
def wetdry_agreement(data_dict, threshold=1.0):
"""For each provider pair, compute % of days both-dry / both-wet / disagree.
A day is 'dry' if rain_mm < threshold (default 1 mm).
Returns a list of dicts: pair, both_dry, both_wet, disagree, n.
"""
providers = [(name, df) for name, df in data_dict.items()
if df is not None and len(df) > 5]
results = []
for i, (name_i, df_i) in enumerate(providers):
for j in range(i + 1, len(providers)):
name_j, df_j = providers[j]
merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j"))
valid = merged[["rain_mm_i", "rain_mm_j"]].dropna()
if len(valid) < 5:
continue
dry_i = valid["rain_mm_i"] < threshold
dry_j = valid["rain_mm_j"] < threshold
both_dry = (dry_i & dry_j).mean() * 100
both_wet = (~dry_i & ~dry_j).mean() * 100
disagree = 100 - both_dry - both_wet
results.append({
"pair": f"{name_i}\nvs\n{name_j}",
"both_dry": round(both_dry, 1),
"both_wet": round(both_wet, 1),
"disagree": round(disagree, 1),
"n": len(valid),
})
return results
# ============================================================
# PLOTTING — ARCHIVE
# ============================================================
ARCHIVE_COLORS = {
"ERA5 (Open-Meteo)": "#1f77b4",
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
def _build_spread_frame(data_dict):
"""Merge all valid archive providers onto a common date axis.
Returns a DataFrame with one column per provider + _mean and _std columns.
"""
valid = {name: df for name, df in data_dict.items()
if df is not None and len(df) > 0}
if not valid:
return None, []
merged = None
for name, df in valid.items():
tmp = df[["date", "rain_mm"]].rename(columns={"rain_mm": name})
merged = tmp if merged is None else merged.merge(tmp, on="date", how="outer")
cols = list(valid.keys())
merged[cols] = merged[cols].clip(lower=0)
merged["_mean"] = merged[cols].mean(axis=1)
merged["_std"] = merged[cols].std(axis=1)
return merged.sort_values("date").reset_index(drop=True), cols
def plot_archive_with_spread(data_dict, location_name, start, end, output_dir):
"""Three-panel archive plot:
Top: Individual provider lines + multi-source mean±std shading
Middle: 30-day rolling mean
Bottom: Inter-source std (agreement signal — used to flag uncertain periods)
"""
spread, _ = _build_spread_frame(data_dict)
if spread is None:
return
valid_providers = {name: df for name, df in data_dict.items()
if df is not None and len(df) > 0}
fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)
# --- Top: daily raw + spread band ---
ax1 = axes[0]
ax1.fill_between(
spread["date"],
(spread["_mean"] - spread["_std"]).clip(lower=0),
spread["_mean"] + spread["_std"],
alpha=0.15, color="gray", label="Multi-source ±1 std"
)
ax1.plot(spread["date"], spread["_mean"], color="black", linewidth=1.0,
linestyle="--", alpha=0.6, label="Multi-source mean", zorder=5)
for name, df in valid_providers.items():
ax1.plot(df["date"], df["rain_mm"],
label=name, color=ARCHIVE_COLORS.get(name, "gray"),
linewidth=0.7, alpha=0.85)
ax1.set_ylabel("Precipitation (mm/day)")
ax1.set_title(
f"{location_name} — Daily Precipitation with Multi-Source Spread\n"
f"{start}{end} | Grey band = ±1 std across all providers"
)
ax1.legend(fontsize=8, ncol=2)
ax1.grid(True, alpha=0.3)
# --- Middle: 30-day rolling mean ---
ax2 = axes[1]
roll_mean = spread.set_index("date")["_mean"].rolling(30, min_periods=15).mean()
roll_std = spread.set_index("date")["_std"].rolling(30, min_periods=15).mean()
ax2.fill_between(
roll_mean.index,
(roll_mean - roll_std).clip(lower=0),
roll_mean + roll_std,
alpha=0.15, color="gray"
)
for name, df in valid_providers.items():
rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean()
ax2.plot(rolled.index, rolled.values,
label=name, color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.5)
ax2.set_ylabel("30-day rolling mean (mm/day)")
ax2.legend(fontsize=8)
ax2.grid(True, alpha=0.3)
# --- Bottom: inter-source std (agreement signal) ---
ax3 = axes[2]
ax3.fill_between(
spread["date"], 0, spread["_std"],
color="purple", alpha=0.35,
label="Std across providers (higher = less agreement)"
)
median_std = spread["_std"].median()
ax3.axhline(median_std, color="purple", linestyle=":", linewidth=1,
label=f"Median std = {median_std:.2f} mm/day")
ax3.set_ylabel("Std dev across\nproviders (mm)")
ax3.set_xlabel("Date")
ax3.legend(fontsize=8)
ax3.grid(True, alpha=0.3)
ax3.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"archive_{location_name}.png"
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
def plot_cumulative(data_dict, location_name, output_dir):
"""Cumulative annual precipitation — most relevant for crop/irrigation context."""
fig, ax = plt.subplots(figsize=(14, 5))
for name, df in data_dict.items():
if df is None or len(df) == 0:
continue
s = df.set_index("date")["rain_mm"].sort_index().cumsum()
total = s.iloc[-1]
ax.plot(s.index, s.values,
label=f"{name} (total: {total:.0f} mm)",
color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.8)
ax.set_ylabel("Cumulative precipitation (mm)")
ax.set_xlabel("Date")
ax.set_title(
f"{location_name} — Cumulative Annual Precipitation by Provider\n"
"Divergence = sources disagree on total seasonal rainfall"
)
ax.legend(fontsize=9)
ax.grid(True, alpha=0.3)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"cumulative_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
def plot_vs_era5(data_dict, location_name, output_dir):
"""Each provider vs ERA5 reference: scatter + regression line.
NOTE: ERA5 is used as reference here for visual comparison only.
It is a reanalysis product, not a ground-truth measurement.
Use the pairwise heatmap (plot_pairwise_heatmap) for an unbiased comparison.
How to read:
- Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip.
- Points on the red diagonal = perfect agreement.
- Points above = provider wetter than ERA5 on that day.
- r = Pearson correlation (1 = perfect). MAE = mean absolute error in mm/day.
- Bias = provider minus ERA5 on average (positive = provider wetter).
"""
ref_name = "ERA5 (Open-Meteo)"
ref_df = data_dict.get(ref_name)
if ref_df is None:
return
others = [(n, df) for n, df in data_dict.items()
if n != ref_name and df is not None and len(df) > 0]
if not others:
return
n = len(others)
fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False)
for i, (name, df) in enumerate(others):
ax = axes[0][i]
merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
valid = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna()
color = ARCHIVE_COLORS.get(name, "steelblue")
ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"],
s=4, alpha=0.35, color=color)
lim = max(valid.max().max(), 1) * 1.05
ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement")
if len(valid) > 5:
coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1)
x_fit = np.linspace(0, lim, 100)
ax.plot(x_fit, np.polyval(coeffs, x_fit), "k-", linewidth=1,
alpha=0.6, label=f"Regression (slope={coeffs[0]:.2f})")
stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp")
ax.set_xlim(0, lim); ax.set_ylim(0, lim)
ax.set_xlabel("ERA5 (Open-Meteo) mm/day", fontsize=9)
ax.set_ylabel(f"{name} mm/day", fontsize=9)
ax.set_title(
f"{name}\nr={stats['r']} MAE={stats['MAE']} mm Bias={stats['Bias']:+.2f} mm",
fontsize=9
)
ax.legend(fontsize=7)
ax.grid(True, alpha=0.3)
fig.suptitle(
f"{location_name} — Daily Precip vs ERA5 (reference only — ERA5 is not ground truth)\n"
"Red dashed = perfect agreement. See pairwise heatmap for unbiased comparison.",
fontsize=10
)
plt.tight_layout()
path = output_dir / f"vs_era5_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
def plot_pairwise_heatmap(data_dict, location_name, output_dir):
"""Two heatmaps side by side: pairwise Pearson r and pairwise RMSE.
No provider is treated as reference — all pairs compared equally.
High r + low RMSE = sources agree. Where they diverge, neither is ground truth.
"""
names, r_matrix, rmse_matrix = compute_pairwise_stats(data_dict)
if len(names) < 2:
return
n = len(names)
short = [nm.replace(" (Open-Meteo)", "\n(OM)") for nm in names]
fig, axes = plt.subplots(1, 2, figsize=(max(10, n * 2.5), max(5, n * 1.5 + 1)))
# Pearson r
ax1 = axes[0]
im1 = ax1.imshow(r_matrix, vmin=0, vmax=1, cmap="YlGn", aspect="auto")
plt.colorbar(im1, ax=ax1, label="Pearson r")
ax1.set_xticks(range(n)); ax1.set_yticks(range(n))
ax1.set_xticklabels(short, fontsize=8, rotation=45, ha="right")
ax1.set_yticklabels(short, fontsize=8)
for i in range(n):
for j in range(n):
val = r_matrix[i, j]
if not np.isnan(val):
text_color = "white" if val > 0.8 else "black"
ax1.text(j, i, f"{val:.2f}", ha="center", va="center",
fontsize=9, color=text_color)
ax1.set_title("Pairwise Pearson r\n(1.0 = perfect agreement, no reference)")
# RMSE
ax2 = axes[1]
off_diag = rmse_matrix[rmse_matrix > 0]
vmax_rmse = float(np.nanmax(off_diag)) if len(off_diag) > 0 else 10
im2 = ax2.imshow(rmse_matrix, vmin=0, vmax=vmax_rmse, cmap="YlOrRd_r", aspect="auto")
plt.colorbar(im2, ax=ax2, label="RMSE (mm/day)")
ax2.set_xticks(range(n)); ax2.set_yticks(range(n))
ax2.set_xticklabels(short, fontsize=8, rotation=45, ha="right")
ax2.set_yticklabels(short, fontsize=8)
for i in range(n):
for j in range(n):
val = rmse_matrix[i, j]
if not np.isnan(val):
ax2.text(j, i, f"{val:.1f}", ha="center", va="center", fontsize=9)
ax2.set_title("Pairwise RMSE (mm/day)\n(0 = perfect agreement)")
fig.suptitle(
f"{location_name} — Archive Provider Agreement (unbiased — no single reference)\n"
"Pairs with high r + low RMSE are consistent. Divergent pairs reveal dataset uncertainty.",
fontsize=10
)
plt.tight_layout()
path = output_dir / f"pairwise_{location_name}.png"
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
def plot_wetdry_agreement(data_dict, location_name, output_dir, threshold=1.0):
"""Stacked bar chart: for each provider pair, % of days both-dry / both-wet / disagree.
High 'disagree' % means one source says rain, the other says dry on the same day.
This is the most practically relevant divergence for SmartCane crop monitoring.
"""
results = wetdry_agreement(data_dict, threshold)
if not results:
return
pairs = [r["pair"] for r in results]
both_dry = [r["both_dry"] for r in results]
both_wet = [r["both_wet"] for r in results]
disagree = [r["disagree"] for r in results]
fig, ax = plt.subplots(figsize=(max(8, len(pairs) * 2.8), 6))
x = np.arange(len(pairs))
ax.bar(x, both_dry, label=f"Both dry (<{threshold} mm)", color="#a8d5e2", edgecolor="white")
ax.bar(x, both_wet, bottom=both_dry,
label=f"Both wet (≥{threshold} mm)", color="#3a7ebf", edgecolor="white")
bottom2 = [d + w for d, w in zip(both_dry, both_wet)]
ax.bar(x, disagree, bottom=bottom2,
label="Disagree (one wet, one dry)", color="#e07b54", edgecolor="white")
# Add % labels inside bars
for i, r in enumerate(results):
if r["both_dry"] > 4:
ax.text(i, r["both_dry"] / 2, f"{r['both_dry']:.0f}%",
ha="center", va="center", fontsize=7, color="black")
if r["both_wet"] > 4:
ax.text(i, r["both_dry"] + r["both_wet"] / 2, f"{r['both_wet']:.0f}%",
ha="center", va="center", fontsize=7, color="white")
if r["disagree"] > 4:
ax.text(i, bottom2[i] + r["disagree"] / 2, f"{r['disagree']:.0f}%",
ha="center", va="center", fontsize=7, color="white")
ax.set_xticks(x)
ax.set_xticklabels(pairs, fontsize=8)
ax.set_ylim(0, 108)
ax.set_ylabel("% of days in archive period")
ax.set_title(
f"{location_name} — Provider Agreement on Wet vs Dry Days\n"
f"Threshold = {threshold} mm/day | Orange = providers disagree on whether it rained"
)
ax.legend(loc="upper right", fontsize=9)
ax.grid(True, axis="y", alpha=0.3)
plt.tight_layout()
path = output_dir / f"wetdry_agreement_{location_name}.png"
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
# ============================================================
# PLOTTING — FORECAST
# ============================================================
def plot_forecast_with_ensemble(forecast_data, ensemble_df, location_name, output_dir):
"""Two-panel forecast comparison:
Top: Deterministic bars (YR.no, Open-Meteo) + ECMWF ensemble percentile shading
Bottom: Exceedance probabilities P(rain > 1 mm) and P(rain > 5 mm)
Reading guide:
- Dark blue shading = where 50% of ensemble members agree (25th75th %ile)
- Light blue shading = where 80% of ensemble members agree (10th90th %ile)
- Bars = deterministic (single-value) forecast from each provider
- Bottom panel: at 50% on the dashed line = coin-flip uncertainty about rain event
"""
has_ensemble = ensemble_df is not None and len(ensemble_df) > 0
det_providers = [(name, df) for name, df in forecast_data.items()
if df is not None and len(df) > 0]
if not has_ensemble and not det_providers:
return
fig, axes = plt.subplots(
2, 1, figsize=(12, 8), sharex=True,
gridspec_kw={"height_ratios": [3, 1.5]}
)
# ---- TOP: deterministic bars + ensemble shading ----
ax1 = axes[0]
if has_ensemble:
ax1.fill_between(
ensemble_df["date"], ensemble_df["p10"], ensemble_df["p90"],
alpha=0.12, color="steelblue", label="Ensemble 10th90th %ile"
)
ax1.fill_between(
ensemble_df["date"], ensemble_df["p25"], ensemble_df["p75"],
alpha=0.28, color="steelblue", label="Ensemble 25th75th %ile"
)
ax1.plot(
ensemble_df["date"], ensemble_df["p50"],
color="steelblue", linewidth=1.8, linestyle="-",
label=f"Ensemble median ({int(ensemble_df['n_members'].iloc[0])} members)",
zorder=5
)
det_colors = {"Open-Meteo Forecast": "#1f77b4", "YR.no": "#e8882a"}
bar_half = datetime.timedelta(hours=10) # offset so bars don't overlap
n_det = len(det_providers)
offsets = np.linspace(-bar_half.total_seconds() / 3600 * (n_det - 1) / 2,
bar_half.total_seconds() / 3600 * (n_det - 1) / 2,
n_det)
bar_width_days = 0.3
for k, (name, df) in enumerate(det_providers):
offset_td = datetime.timedelta(hours=offsets[k])
shifted_dates = df["date"] + offset_td
ax1.bar(
shifted_dates, df["rain_mm"],
width=bar_width_days,
label=name,
color=det_colors.get(name, f"C{k}"),
alpha=0.75,
zorder=4,
)
ax1.set_ylabel("Precipitation (mm/day)")
ax1.set_title(
f"{location_name} — 7-Day Forecast\n"
"Shading = Open-Meteo ECMWF ensemble spread | Bars = deterministic forecasts"
)
ax1.legend(fontsize=8, loc="upper right")
ax1.grid(True, axis="y", alpha=0.3)
ax1.set_ylim(bottom=0)
# ---- BOTTOM: exceedance probabilities ----
ax2 = axes[1]
if has_ensemble:
ax2.step(
ensemble_df["date"], ensemble_df["prob_gt_1mm"],
where="mid", color="steelblue", linewidth=2.0,
label="P(rain > 1 mm)"
)
ax2.step(
ensemble_df["date"], ensemble_df["prob_gt_5mm"],
where="mid", color="darkblue", linewidth=2.0, linestyle="--",
label="P(rain > 5 mm)"
)
ax2.fill_between(
ensemble_df["date"], 0, ensemble_df["prob_gt_5mm"],
step="mid", alpha=0.18, color="darkblue"
)
ax2.axhline(50, color="gray", linestyle=":", linewidth=0.9, alpha=0.8,
label="50% (coin-flip)")
ax2.set_ylim(0, 108)
ax2.set_ylabel("Exceedance\nprobability (%)")
ax2.legend(fontsize=8, loc="upper right")
ax2.grid(True, alpha=0.3)
ax2.set_xlabel("Date")
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%d %b"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"forecast_ensemble_{location_name}.png"
plt.savefig(path, dpi=150, bbox_inches="tight")
plt.close()
print(f" Saved: {path}")
def plot_forecast(data_dict, location_name, output_dir):
"""Bar chart comparing deterministic 7-day forecasts (original style, kept for reference)."""
_, ax = plt.subplots(figsize=(12, 5))
providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0]
n = len(providers)
if n == 0:
plt.close()
return
all_dates = sorted(set(
d for _, df in providers
for d in df["date"].dt.date.tolist()
))
x = np.arange(len(all_dates))
width = 0.8 / n
cmap = matplotlib.colormaps["tab10"].resampled(n)
for i, (name, df) in enumerate(providers):
date_map = dict(zip(df["date"].dt.date, df["rain_mm"]))
vals = [date_map.get(d, 0.0) for d in all_dates]
ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85)
ax.set_xticks(x + width * (n - 1) / 2)
ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right")
ax.set_ylabel("Precipitation (mm/day)")
ax.set_title(f"{location_name} — 7-Day Deterministic Forecast Comparison")
ax.legend(fontsize=9)
ax.grid(True, axis="y", alpha=0.3)
plt.tight_layout()
path = output_dir / f"forecast_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
# ============================================================
# MAIN
# ============================================================
def run_location(loc_name, lat, lon):
print(f"\n{'='*60}")
print(f" {loc_name} ({lat}°, {lon}°)")
print(f"{'='*60}")
# ---- ARCHIVE ----
print("\n[Archive]")
archive_data = {}
print(" Fetching Open-Meteo ERA5...")
try:
archive_data["ERA5 (Open-Meteo)"] = fetch_openmeteo_archive(
lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5"
)
print(f"{len(archive_data['ERA5 (Open-Meteo)'])} days")
except Exception as e:
print(f" ✗ ERA5 failed: {e}")
archive_data["ERA5 (Open-Meteo)"] = None
time.sleep(0.5)
print(" Fetching Open-Meteo ERA5-Land...")
try:
archive_data["ERA5-Land (Open-Meteo)"] = fetch_openmeteo_archive(
lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5_land"
)
print(f"{len(archive_data['ERA5-Land (Open-Meteo)'])} days")
except Exception as e:
print(f" ✗ ERA5-Land failed: {e}")
archive_data["ERA5-Land (Open-Meteo)"] = None
time.sleep(0.5)
# CERRA only covers Europe (roughly 20°W45°E, 30°N80°N)
if -20 <= lon <= 45 and 30 <= lat <= 80:
print(" Fetching Open-Meteo CERRA (EU only)...")
try:
archive_data["CERRA (Open-Meteo)"] = fetch_openmeteo_archive(
lat, lon, ARCHIVE_START, ARCHIVE_END, model="cerra"
)
print(f"{len(archive_data['CERRA (Open-Meteo)'])} days")
except Exception as e:
print(f" ✗ CERRA failed: {e}")
archive_data["CERRA (Open-Meteo)"] = None
else:
print(" Skipping CERRA (outside EU coverage)")
archive_data["CERRA (Open-Meteo)"] = None
time.sleep(0.5)
print(" Fetching NASA POWER...")
try:
archive_data["NASA POWER"] = fetch_nasa_power(lat, lon, ARCHIVE_START, ARCHIVE_END)
print(f"{len(archive_data['NASA POWER'])} days")
except Exception as e:
print(f" ✗ NASA POWER failed: {e}")
archive_data["NASA POWER"] = None
# Pairwise stats (no ERA5 reference bias)
print("\n Pairwise archive stats (Pearson r | RMSE mm/day | Bias mm/day):")
names, r_mat, rmse_mat = compute_pairwise_stats(archive_data)
for i in range(len(names)):
for j in range(i + 1, len(names)):
if not np.isnan(r_mat[i, j]):
print(f" {names[i]:30s} vs {names[j]:30s} "
f"r={r_mat[i,j]:.3f} RMSE={rmse_mat[i,j]:.2f} mm")
plot_archive_with_spread(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR)
plot_cumulative(archive_data, loc_name, OUTPUT_DIR)
plot_vs_era5(archive_data, loc_name, OUTPUT_DIR)
plot_pairwise_heatmap(archive_data, loc_name, OUTPUT_DIR)
plot_wetdry_agreement(archive_data, loc_name, OUTPUT_DIR)
# ---- FORECAST ----
print("\n[Forecast]")
forecast_data = {}
print(" Fetching Open-Meteo deterministic forecast...")
try:
forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon)
print(f"{len(forecast_data['Open-Meteo Forecast'])} days")
except Exception as e:
print(f" ✗ Open-Meteo forecast failed: {e}")
forecast_data["Open-Meteo Forecast"] = None
time.sleep(0.5)
print(" Fetching Open-Meteo ECMWF ensemble forecast...")
ensemble_df = None
try:
ensemble_df = fetch_openmeteo_ensemble_forecast(lat, lon)
if ensemble_df is not None:
print(f"{len(ensemble_df)} days "
f"({int(ensemble_df['n_members'].iloc[0])} ensemble members)")
except Exception as e:
print(f" ✗ Open-Meteo ensemble failed: {e}")
time.sleep(0.5)
print(" Fetching YR.no LocationForecast...")
try:
forecast_data["YR.no"] = fetch_yr_forecast(lat, lon)
print(f"{len(forecast_data['YR.no'])} days")
except Exception as e:
print(f" ✗ YR.no failed: {e}")
forecast_data["YR.no"] = None
if OPENWEATHERMAP_KEY:
time.sleep(0.5)
print(" Fetching OpenWeatherMap forecast...")
try:
forecast_data["OpenWeatherMap"] = fetch_openweathermap_forecast(
lat, lon, OPENWEATHERMAP_KEY
)
print(f"{len(forecast_data['OpenWeatherMap'])} days")
except Exception as e:
print(f" ✗ OpenWeatherMap failed: {e}")
forecast_data["OpenWeatherMap"] = None
if WEATHERAPI_KEY:
time.sleep(0.5)
print(" Fetching WeatherAPI.com forecast...")
try:
forecast_data["WeatherAPI.com"] = fetch_weatherapi_forecast(
lat, lon, WEATHERAPI_KEY
)
print(f"{len(forecast_data['WeatherAPI.com'])} days")
except Exception as e:
print(f" ✗ WeatherAPI.com failed: {e}")
forecast_data["WeatherAPI.com"] = None
plot_forecast_with_ensemble(forecast_data, ensemble_df, loc_name, OUTPUT_DIR)
plot_forecast(forecast_data, loc_name, OUTPUT_DIR)
if __name__ == "__main__":
print(f"Weather API Comparison — {datetime.date.today()}")
print(f"Archive: {ARCHIVE_START}{ARCHIVE_END}")
print(f"Forecast: {FORECAST_START}{FORECAST_END}")
print(f"Output: {OUTPUT_DIR.resolve()}")
for loc_name, coords in LOCATIONS.items():
run_location(loc_name, coords["lat"], coords["lon"])
time.sleep(1)
print(f"\nDone. Plots saved to: {OUTPUT_DIR.resolve()}")