""" weather_api_comparison.py ========================= Compare daily precipitation from multiple free weather APIs across two locations: - Arnhem, Netherlands (51.985°N, 5.899°E) — European climate - Angata, Kenya ( 1.330°S, 34.738°E) — tropical / sugarcane context ARCHIVE providers (no API key required): 1. Open-Meteo ERA5 — current SmartCane provider (0.25°, global) 2. Open-Meteo ERA5-Land — higher resolution variant (0.10°, global) 3. Open-Meteo CERRA — EU regional reanalysis (0.05°, EU only) 4. NASA POWER — completely independent source (0.50°, global) FORECAST providers (no API key required): 5. Open-Meteo Forecast — deterministic NWP (global) 6. Open-Meteo Ensemble — ECMWF IFS 51-member ensemble; gives probability bands 7. YR.no LocationForecast — Norwegian Met Institute (~10 days, global) FORECAST providers (API key required — set in CONFIG below, leave "" to skip): 8. OpenWeatherMap — free tier, 1000 calls/day 9. WeatherAPI.com — free tier OUTPUT: Plots saved to: weather_comparison_plots/ archive_.png — daily lines + multi-source spread band + agreement signal archive_rolling_.png — 30-day rolling mean comparison (original style) cumulative_.png — cumulative annual precipitation vs_era5_.png — each provider vs ERA5 scatter (note: ERA5 is not ground truth) pairwise_.png — pairwise Pearson r and RMSE heatmaps (unbiased) wetdry_agreement_.png — % of days providers agree on wet vs dry forecast_ensemble_.png — ensemble uncertainty bands + exceedance probability forecast_.png — deterministic forecast bars (original style) Usage: python weather_api_comparison.py """ import datetime import time import matplotlib matplotlib.use("Agg") import matplotlib.dates as mdates import matplotlib.pyplot as plt import numpy as np import pandas as pd import requests from pathlib import Path # ============================================================ # CONFIG # ============================================================ LOCATIONS = { "Arnhem_NL": {"lat": 51.985, "lon": 5.899}, "Angata_KE": {"lat": -1.330, "lon": 34.738}, } # Archive: last 12 months ARCHIVE_END = datetime.date.today() - datetime.timedelta(days=2) # ERA5 lags ~2 days ARCHIVE_START = ARCHIVE_END - datetime.timedelta(days=365) # Forecast: today + 7 days FORECAST_START = datetime.date.today() FORECAST_END = FORECAST_START + datetime.timedelta(days=7) # Optional API keys — leave "" to skip that provider OPENWEATHERMAP_KEY = "" # https://openweathermap.org/api WEATHERAPI_KEY = "" # https://www.weatherapi.com/ OUTPUT_DIR = Path("weather_comparison_plots") OUTPUT_DIR.mkdir(exist_ok=True) USER_AGENT = "SmartCane-WeatherComparison/1.0 (research; contact via github)" # ============================================================ # ARCHIVE FETCHERS # ============================================================ def fetch_openmeteo_archive(lat, lon, start, end, model="era5"): """Open-Meteo ERA5 / ERA5-Land / CERRA archive. ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names. """ model_suffix = "" if model == "era5" else f"&models={model}" url = ( f"https://archive-api.open-meteo.com/v1/archive" f"?latitude={lat}&longitude={lon}" f"&daily=precipitation_sum" f"&start_date={start}&end_date={end}" f"{model_suffix}" f"&timezone=UTC" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() df = pd.DataFrame({ "date": pd.to_datetime(body["daily"]["time"]), "rain_mm": body["daily"]["precipitation_sum"], }) df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").clip(lower=0).fillna(0) # ERA5-Land sometimes returns values in meters (Open-Meteo API quirk). # Auto-detect: if annual total < 50mm for any non-polar location, assume m → convert. if df["rain_mm"].sum() < 50 and len(df) > 30: df["rain_mm"] = df["rain_mm"] * 1000 print(f" ⚠ Unit auto-converted m→mm (values were implausibly small)") return df def fetch_nasa_power(lat, lon, start, end): """NASA POWER — daily PRECTOTCORR (precipitation corrected), 0.5° grid.""" url = ( "https://power.larc.nasa.gov/api/temporal/daily/point" f"?parameters=PRECTOTCORR" f"&community=AG" f"&longitude={lon}&latitude={lat}" f"&start={start.strftime('%Y%m%d')}&end={end.strftime('%Y%m%d')}" f"&format=JSON" ) r = requests.get(url, timeout=60) r.raise_for_status() body = r.json() raw = body["properties"]["parameter"]["PRECTOTCORR"] df = pd.DataFrame([ {"date": pd.to_datetime(k, format="%Y%m%d"), "rain_mm": max(v, 0)} for k, v in raw.items() if v != -999 # NASA POWER fill value ]) return df.sort_values("date").reset_index(drop=True) # ============================================================ # FORECAST FETCHERS # ============================================================ def fetch_openmeteo_forecast(lat, lon, days=8): """Open-Meteo NWP forecast — default best_match model.""" url = ( f"https://api.open-meteo.com/v1/forecast" f"?latitude={lat}&longitude={lon}" f"&daily=precipitation_sum" f"&forecast_days={days + 1}" f"&timezone=UTC" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() df = pd.DataFrame({ "date": pd.to_datetime(body["daily"]["time"]), "rain_mm": body["daily"]["precipitation_sum"], }) df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").fillna(0) return df def fetch_openmeteo_ensemble_forecast(lat, lon, days=8): """Open-Meteo ECMWF IFS ensemble forecast — 51 members at 0.4° resolution. Returns a DataFrame with daily percentile bands and exceedance probabilities: p10, p25, p50 (median), p75, p90 — mm/day prob_gt_1mm, prob_gt_5mm — % of members exceeding threshold n_members — number of members in response """ url = ( f"https://ensemble-api.open-meteo.com/v1/ensemble" f"?latitude={lat}&longitude={lon}" f"&daily=precipitation_sum" f"&models=ecmwf_ifs04" f"&forecast_days={days}" f"&timezone=UTC" ) r = requests.get(url, timeout=60) r.raise_for_status() body = r.json() daily = body["daily"] dates = pd.to_datetime(daily["time"]) # Member columns are named like "precipitation_sum_member01", etc. member_keys = [k for k in daily.keys() if k.startswith("precipitation_sum")] if not member_keys: print(" ⚠ No member columns found in ensemble response") return None # Shape: (n_members, n_days) members = np.array([daily[k] for k in member_keys], dtype=float) members = np.where(members < 0, 0, members) # clip negatives df = pd.DataFrame({ "date": dates, "p10": np.nanpercentile(members, 10, axis=0), "p25": np.nanpercentile(members, 25, axis=0), "p50": np.nanpercentile(members, 50, axis=0), "p75": np.nanpercentile(members, 75, axis=0), "p90": np.nanpercentile(members, 90, axis=0), "mean": np.nanmean(members, axis=0), "prob_gt_1mm": np.mean(members > 1.0, axis=0) * 100, "prob_gt_5mm": np.mean(members > 5.0, axis=0) * 100, "n_members": members.shape[0], }) return df def fetch_yr_forecast(lat, lon): """YR.no LocationForecast 2.0 — hourly precip aggregated to daily. Note: forecast-only service; no historical archive is available. """ url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}" headers = {"User-Agent": USER_AGENT} r = requests.get(url, headers=headers, timeout=30) r.raise_for_status() body = r.json() records = [] for entry in body["properties"]["timeseries"]: ts = pd.to_datetime(entry["time"]) data = entry["data"] precip = 0.0 if "next_1_hours" in data: precip = data["next_1_hours"]["details"].get("precipitation_amount", 0.0) elif "next_6_hours" in data: precip = data["next_6_hours"]["details"].get("precipitation_amount", 0.0) / 6 records.append({"datetime": ts, "precip_hour": precip}) hourly = pd.DataFrame(records) hourly["date"] = hourly["datetime"].dt.date daily = ( hourly.groupby("date")["precip_hour"] .sum() .reset_index() .rename(columns={"precip_hour": "rain_mm"}) ) daily["date"] = pd.to_datetime(daily["date"]) return daily def fetch_openweathermap_forecast(lat, lon, api_key): """OpenWeatherMap One Call 3.0 — daily forecast (needs paid/free key).""" url = ( f"https://api.openweathermap.org/data/3.0/onecall" f"?lat={lat}&lon={lon}" f"&exclude=current,minutely,hourly,alerts" f"&appid={api_key}&units=metric" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() records = [] for day in body.get("daily", []): records.append({ "date": pd.to_datetime(day["dt"], unit="s").normalize(), "rain_mm": day.get("rain", 0.0), }) return pd.DataFrame(records) def fetch_weatherapi_forecast(lat, lon, api_key, days=7): """WeatherAPI.com free forecast (up to 3 days on free tier, 14 on paid).""" url = ( f"https://api.weatherapi.com/v1/forecast.json" f"?key={api_key}&q={lat},{lon}&days={days}&aqi=no&alerts=no" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() records = [] for day in body["forecast"]["forecastday"]: records.append({ "date": pd.to_datetime(day["date"]), "rain_mm": day["day"].get("totalprecip_mm", 0.0), }) return pd.DataFrame(records) # ============================================================ # STATS # ============================================================ def compare_stats(df, ref_col, other_col): """Compute MAE, RMSE, bias, Pearson r between two columns.""" valid = df[[ref_col, other_col]].dropna() if len(valid) < 5: return {"n": len(valid), "MAE": None, "RMSE": None, "Bias": None, "r": None} diff = valid[other_col] - valid[ref_col] mae = diff.abs().mean() rmse = (diff**2).mean()**0.5 bias = diff.mean() r = valid[ref_col].corr(valid[other_col]) return {"n": len(valid), "MAE": round(mae,2), "RMSE": round(rmse,2), "Bias": round(bias,2), "r": round(r,3)} def compute_pairwise_stats(data_dict): """Compute pairwise Pearson r and RMSE for all archive provider pairs. Returns: names — list of provider names (same order as matrices) r_matrix — (n x n) Pearson r array, NaN where not computable rmse_matrix — (n x n) RMSE array (mm/day), 0 on diagonal """ providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 5] names = [p[0] for p in providers] n = len(names) r_matrix = np.full((n, n), np.nan) rmse_matrix = np.full((n, n), np.nan) for i in range(n): r_matrix[i, i] = 1.0 rmse_matrix[i, i] = 0.0 _, df_i = providers[i] for j in range(i + 1, n): _, df_j = providers[j] merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j")) valid = merged[["rain_mm_i", "rain_mm_j"]].dropna() if len(valid) < 5: continue r = valid["rain_mm_i"].corr(valid["rain_mm_j"]) rmse = ((valid["rain_mm_i"] - valid["rain_mm_j"]) ** 2).mean() ** 0.5 r_matrix[i, j] = r_matrix[j, i] = round(r, 3) rmse_matrix[i, j] = rmse_matrix[j, i] = round(rmse, 2) return names, r_matrix, rmse_matrix def wetdry_agreement(data_dict, threshold=1.0): """For each provider pair, compute % of days both-dry / both-wet / disagree. A day is 'dry' if rain_mm < threshold (default 1 mm). Returns a list of dicts: pair, both_dry, both_wet, disagree, n. """ providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 5] results = [] for i, (name_i, df_i) in enumerate(providers): for j in range(i + 1, len(providers)): name_j, df_j = providers[j] merged = df_i.merge(df_j, on="date", suffixes=("_i", "_j")) valid = merged[["rain_mm_i", "rain_mm_j"]].dropna() if len(valid) < 5: continue dry_i = valid["rain_mm_i"] < threshold dry_j = valid["rain_mm_j"] < threshold both_dry = (dry_i & dry_j).mean() * 100 both_wet = (~dry_i & ~dry_j).mean() * 100 disagree = 100 - both_dry - both_wet results.append({ "pair": f"{name_i}\nvs\n{name_j}", "both_dry": round(both_dry, 1), "both_wet": round(both_wet, 1), "disagree": round(disagree, 1), "n": len(valid), }) return results # ============================================================ # PLOTTING — ARCHIVE # ============================================================ ARCHIVE_COLORS = { "ERA5 (Open-Meteo)": "#1f77b4", "ERA5-Land (Open-Meteo)": "#ff7f0e", "CERRA (Open-Meteo)": "#2ca02c", "NASA POWER": "#d62728", } def _build_spread_frame(data_dict): """Merge all valid archive providers onto a common date axis. Returns a DataFrame with one column per provider + _mean and _std columns. """ valid = {name: df for name, df in data_dict.items() if df is not None and len(df) > 0} if not valid: return None, [] merged = None for name, df in valid.items(): tmp = df[["date", "rain_mm"]].rename(columns={"rain_mm": name}) merged = tmp if merged is None else merged.merge(tmp, on="date", how="outer") cols = list(valid.keys()) merged[cols] = merged[cols].clip(lower=0) merged["_mean"] = merged[cols].mean(axis=1) merged["_std"] = merged[cols].std(axis=1) return merged.sort_values("date").reset_index(drop=True), cols def plot_archive_with_spread(data_dict, location_name, start, end, output_dir): """Three-panel archive plot: Top: Individual provider lines + multi-source mean±std shading Middle: 30-day rolling mean Bottom: Inter-source std (agreement signal — used to flag uncertain periods) """ spread, _ = _build_spread_frame(data_dict) if spread is None: return valid_providers = {name: df for name, df in data_dict.items() if df is not None and len(df) > 0} fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True) # --- Top: daily raw + spread band --- ax1 = axes[0] ax1.fill_between( spread["date"], (spread["_mean"] - spread["_std"]).clip(lower=0), spread["_mean"] + spread["_std"], alpha=0.15, color="gray", label="Multi-source ±1 std" ) ax1.plot(spread["date"], spread["_mean"], color="black", linewidth=1.0, linestyle="--", alpha=0.6, label="Multi-source mean", zorder=5) for name, df in valid_providers.items(): ax1.plot(df["date"], df["rain_mm"], label=name, color=ARCHIVE_COLORS.get(name, "gray"), linewidth=0.7, alpha=0.85) ax1.set_ylabel("Precipitation (mm/day)") ax1.set_title( f"{location_name} — Daily Precipitation with Multi-Source Spread\n" f"{start} → {end} | Grey band = ±1 std across all providers" ) ax1.legend(fontsize=8, ncol=2) ax1.grid(True, alpha=0.3) # --- Middle: 30-day rolling mean --- ax2 = axes[1] roll_mean = spread.set_index("date")["_mean"].rolling(30, min_periods=15).mean() roll_std = spread.set_index("date")["_std"].rolling(30, min_periods=15).mean() ax2.fill_between( roll_mean.index, (roll_mean - roll_std).clip(lower=0), roll_mean + roll_std, alpha=0.15, color="gray" ) for name, df in valid_providers.items(): rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean() ax2.plot(rolled.index, rolled.values, label=name, color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.5) ax2.set_ylabel("30-day rolling mean (mm/day)") ax2.legend(fontsize=8) ax2.grid(True, alpha=0.3) # --- Bottom: inter-source std (agreement signal) --- ax3 = axes[2] ax3.fill_between( spread["date"], 0, spread["_std"], color="purple", alpha=0.35, label="Std across providers (higher = less agreement)" ) median_std = spread["_std"].median() ax3.axhline(median_std, color="purple", linestyle=":", linewidth=1, label=f"Median std = {median_std:.2f} mm/day") ax3.set_ylabel("Std dev across\nproviders (mm)") ax3.set_xlabel("Date") ax3.legend(fontsize=8) ax3.grid(True, alpha=0.3) ax3.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y")) fig.autofmt_xdate() plt.tight_layout() path = output_dir / f"archive_{location_name}.png" plt.savefig(path, dpi=150, bbox_inches="tight") plt.close() print(f" Saved: {path}") def plot_cumulative(data_dict, location_name, output_dir): """Cumulative annual precipitation — most relevant for crop/irrigation context.""" fig, ax = plt.subplots(figsize=(14, 5)) for name, df in data_dict.items(): if df is None or len(df) == 0: continue s = df.set_index("date")["rain_mm"].sort_index().cumsum() total = s.iloc[-1] ax.plot(s.index, s.values, label=f"{name} (total: {total:.0f} mm)", color=ARCHIVE_COLORS.get(name, "gray"), linewidth=1.8) ax.set_ylabel("Cumulative precipitation (mm)") ax.set_xlabel("Date") ax.set_title( f"{location_name} — Cumulative Annual Precipitation by Provider\n" "Divergence = sources disagree on total seasonal rainfall" ) ax.legend(fontsize=9) ax.grid(True, alpha=0.3) ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y")) fig.autofmt_xdate() plt.tight_layout() path = output_dir / f"cumulative_{location_name}.png" plt.savefig(path, dpi=150) plt.close() print(f" Saved: {path}") def plot_vs_era5(data_dict, location_name, output_dir): """Each provider vs ERA5 reference: scatter + regression line. NOTE: ERA5 is used as reference here for visual comparison only. It is a reanalysis product, not a ground-truth measurement. Use the pairwise heatmap (plot_pairwise_heatmap) for an unbiased comparison. How to read: - Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip. - Points on the red diagonal = perfect agreement. - Points above = provider wetter than ERA5 on that day. - r = Pearson correlation (1 = perfect). MAE = mean absolute error in mm/day. - Bias = provider minus ERA5 on average (positive = provider wetter). """ ref_name = "ERA5 (Open-Meteo)" ref_df = data_dict.get(ref_name) if ref_df is None: return others = [(n, df) for n, df in data_dict.items() if n != ref_name and df is not None and len(df) > 0] if not others: return n = len(others) fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False) for i, (name, df) in enumerate(others): ax = axes[0][i] merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp")) valid = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna() color = ARCHIVE_COLORS.get(name, "steelblue") ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"], s=4, alpha=0.35, color=color) lim = max(valid.max().max(), 1) * 1.05 ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement") if len(valid) > 5: coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1) x_fit = np.linspace(0, lim, 100) ax.plot(x_fit, np.polyval(coeffs, x_fit), "k-", linewidth=1, alpha=0.6, label=f"Regression (slope={coeffs[0]:.2f})") stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp") ax.set_xlim(0, lim); ax.set_ylim(0, lim) ax.set_xlabel("ERA5 (Open-Meteo) mm/day", fontsize=9) ax.set_ylabel(f"{name} mm/day", fontsize=9) ax.set_title( f"{name}\nr={stats['r']} MAE={stats['MAE']} mm Bias={stats['Bias']:+.2f} mm", fontsize=9 ) ax.legend(fontsize=7) ax.grid(True, alpha=0.3) fig.suptitle( f"{location_name} — Daily Precip vs ERA5 (reference only — ERA5 is not ground truth)\n" "Red dashed = perfect agreement. See pairwise heatmap for unbiased comparison.", fontsize=10 ) plt.tight_layout() path = output_dir / f"vs_era5_{location_name}.png" plt.savefig(path, dpi=150) plt.close() print(f" Saved: {path}") def plot_pairwise_heatmap(data_dict, location_name, output_dir): """Two heatmaps side by side: pairwise Pearson r and pairwise RMSE. No provider is treated as reference — all pairs compared equally. High r + low RMSE = sources agree. Where they diverge, neither is ground truth. """ names, r_matrix, rmse_matrix = compute_pairwise_stats(data_dict) if len(names) < 2: return n = len(names) short = [nm.replace(" (Open-Meteo)", "\n(OM)") for nm in names] fig, axes = plt.subplots(1, 2, figsize=(max(10, n * 2.5), max(5, n * 1.5 + 1))) # Pearson r ax1 = axes[0] im1 = ax1.imshow(r_matrix, vmin=0, vmax=1, cmap="YlGn", aspect="auto") plt.colorbar(im1, ax=ax1, label="Pearson r") ax1.set_xticks(range(n)); ax1.set_yticks(range(n)) ax1.set_xticklabels(short, fontsize=8, rotation=45, ha="right") ax1.set_yticklabels(short, fontsize=8) for i in range(n): for j in range(n): val = r_matrix[i, j] if not np.isnan(val): text_color = "white" if val > 0.8 else "black" ax1.text(j, i, f"{val:.2f}", ha="center", va="center", fontsize=9, color=text_color) ax1.set_title("Pairwise Pearson r\n(1.0 = perfect agreement, no reference)") # RMSE ax2 = axes[1] off_diag = rmse_matrix[rmse_matrix > 0] vmax_rmse = float(np.nanmax(off_diag)) if len(off_diag) > 0 else 10 im2 = ax2.imshow(rmse_matrix, vmin=0, vmax=vmax_rmse, cmap="YlOrRd_r", aspect="auto") plt.colorbar(im2, ax=ax2, label="RMSE (mm/day)") ax2.set_xticks(range(n)); ax2.set_yticks(range(n)) ax2.set_xticklabels(short, fontsize=8, rotation=45, ha="right") ax2.set_yticklabels(short, fontsize=8) for i in range(n): for j in range(n): val = rmse_matrix[i, j] if not np.isnan(val): ax2.text(j, i, f"{val:.1f}", ha="center", va="center", fontsize=9) ax2.set_title("Pairwise RMSE (mm/day)\n(0 = perfect agreement)") fig.suptitle( f"{location_name} — Archive Provider Agreement (unbiased — no single reference)\n" "Pairs with high r + low RMSE are consistent. Divergent pairs reveal dataset uncertainty.", fontsize=10 ) plt.tight_layout() path = output_dir / f"pairwise_{location_name}.png" plt.savefig(path, dpi=150, bbox_inches="tight") plt.close() print(f" Saved: {path}") def plot_wetdry_agreement(data_dict, location_name, output_dir, threshold=1.0): """Stacked bar chart: for each provider pair, % of days both-dry / both-wet / disagree. High 'disagree' % means one source says rain, the other says dry on the same day. This is the most practically relevant divergence for SmartCane crop monitoring. """ results = wetdry_agreement(data_dict, threshold) if not results: return pairs = [r["pair"] for r in results] both_dry = [r["both_dry"] for r in results] both_wet = [r["both_wet"] for r in results] disagree = [r["disagree"] for r in results] fig, ax = plt.subplots(figsize=(max(8, len(pairs) * 2.8), 6)) x = np.arange(len(pairs)) ax.bar(x, both_dry, label=f"Both dry (<{threshold} mm)", color="#a8d5e2", edgecolor="white") ax.bar(x, both_wet, bottom=both_dry, label=f"Both wet (≥{threshold} mm)", color="#3a7ebf", edgecolor="white") bottom2 = [d + w for d, w in zip(both_dry, both_wet)] ax.bar(x, disagree, bottom=bottom2, label="Disagree (one wet, one dry)", color="#e07b54", edgecolor="white") # Add % labels inside bars for i, r in enumerate(results): if r["both_dry"] > 4: ax.text(i, r["both_dry"] / 2, f"{r['both_dry']:.0f}%", ha="center", va="center", fontsize=7, color="black") if r["both_wet"] > 4: ax.text(i, r["both_dry"] + r["both_wet"] / 2, f"{r['both_wet']:.0f}%", ha="center", va="center", fontsize=7, color="white") if r["disagree"] > 4: ax.text(i, bottom2[i] + r["disagree"] / 2, f"{r['disagree']:.0f}%", ha="center", va="center", fontsize=7, color="white") ax.set_xticks(x) ax.set_xticklabels(pairs, fontsize=8) ax.set_ylim(0, 108) ax.set_ylabel("% of days in archive period") ax.set_title( f"{location_name} — Provider Agreement on Wet vs Dry Days\n" f"Threshold = {threshold} mm/day | Orange = providers disagree on whether it rained" ) ax.legend(loc="upper right", fontsize=9) ax.grid(True, axis="y", alpha=0.3) plt.tight_layout() path = output_dir / f"wetdry_agreement_{location_name}.png" plt.savefig(path, dpi=150, bbox_inches="tight") plt.close() print(f" Saved: {path}") # ============================================================ # PLOTTING — FORECAST # ============================================================ def plot_forecast_with_ensemble(forecast_data, ensemble_df, location_name, output_dir): """Two-panel forecast comparison: Top: Deterministic bars (YR.no, Open-Meteo) + ECMWF ensemble percentile shading Bottom: Exceedance probabilities P(rain > 1 mm) and P(rain > 5 mm) Reading guide: - Dark blue shading = where 50% of ensemble members agree (25th–75th %ile) - Light blue shading = where 80% of ensemble members agree (10th–90th %ile) - Bars = deterministic (single-value) forecast from each provider - Bottom panel: at 50% on the dashed line = coin-flip uncertainty about rain event """ has_ensemble = ensemble_df is not None and len(ensemble_df) > 0 det_providers = [(name, df) for name, df in forecast_data.items() if df is not None and len(df) > 0] if not has_ensemble and not det_providers: return fig, axes = plt.subplots( 2, 1, figsize=(12, 8), sharex=True, gridspec_kw={"height_ratios": [3, 1.5]} ) # ---- TOP: deterministic bars + ensemble shading ---- ax1 = axes[0] if has_ensemble: ax1.fill_between( ensemble_df["date"], ensemble_df["p10"], ensemble_df["p90"], alpha=0.12, color="steelblue", label="Ensemble 10th–90th %ile" ) ax1.fill_between( ensemble_df["date"], ensemble_df["p25"], ensemble_df["p75"], alpha=0.28, color="steelblue", label="Ensemble 25th–75th %ile" ) ax1.plot( ensemble_df["date"], ensemble_df["p50"], color="steelblue", linewidth=1.8, linestyle="-", label=f"Ensemble median ({int(ensemble_df['n_members'].iloc[0])} members)", zorder=5 ) det_colors = {"Open-Meteo Forecast": "#1f77b4", "YR.no": "#e8882a"} bar_half = datetime.timedelta(hours=10) # offset so bars don't overlap n_det = len(det_providers) offsets = np.linspace(-bar_half.total_seconds() / 3600 * (n_det - 1) / 2, bar_half.total_seconds() / 3600 * (n_det - 1) / 2, n_det) bar_width_days = 0.3 for k, (name, df) in enumerate(det_providers): offset_td = datetime.timedelta(hours=offsets[k]) shifted_dates = df["date"] + offset_td ax1.bar( shifted_dates, df["rain_mm"], width=bar_width_days, label=name, color=det_colors.get(name, f"C{k}"), alpha=0.75, zorder=4, ) ax1.set_ylabel("Precipitation (mm/day)") ax1.set_title( f"{location_name} — 7-Day Forecast\n" "Shading = Open-Meteo ECMWF ensemble spread | Bars = deterministic forecasts" ) ax1.legend(fontsize=8, loc="upper right") ax1.grid(True, axis="y", alpha=0.3) ax1.set_ylim(bottom=0) # ---- BOTTOM: exceedance probabilities ---- ax2 = axes[1] if has_ensemble: ax2.step( ensemble_df["date"], ensemble_df["prob_gt_1mm"], where="mid", color="steelblue", linewidth=2.0, label="P(rain > 1 mm)" ) ax2.step( ensemble_df["date"], ensemble_df["prob_gt_5mm"], where="mid", color="darkblue", linewidth=2.0, linestyle="--", label="P(rain > 5 mm)" ) ax2.fill_between( ensemble_df["date"], 0, ensemble_df["prob_gt_5mm"], step="mid", alpha=0.18, color="darkblue" ) ax2.axhline(50, color="gray", linestyle=":", linewidth=0.9, alpha=0.8, label="50% (coin-flip)") ax2.set_ylim(0, 108) ax2.set_ylabel("Exceedance\nprobability (%)") ax2.legend(fontsize=8, loc="upper right") ax2.grid(True, alpha=0.3) ax2.set_xlabel("Date") ax2.xaxis.set_major_formatter(mdates.DateFormatter("%d %b")) fig.autofmt_xdate() plt.tight_layout() path = output_dir / f"forecast_ensemble_{location_name}.png" plt.savefig(path, dpi=150, bbox_inches="tight") plt.close() print(f" Saved: {path}") def plot_forecast(data_dict, location_name, output_dir): """Bar chart comparing deterministic 7-day forecasts (original style, kept for reference).""" _, ax = plt.subplots(figsize=(12, 5)) providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0] n = len(providers) if n == 0: plt.close() return all_dates = sorted(set( d for _, df in providers for d in df["date"].dt.date.tolist() )) x = np.arange(len(all_dates)) width = 0.8 / n cmap = matplotlib.colormaps["tab10"].resampled(n) for i, (name, df) in enumerate(providers): date_map = dict(zip(df["date"].dt.date, df["rain_mm"])) vals = [date_map.get(d, 0.0) for d in all_dates] ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85) ax.set_xticks(x + width * (n - 1) / 2) ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right") ax.set_ylabel("Precipitation (mm/day)") ax.set_title(f"{location_name} — 7-Day Deterministic Forecast Comparison") ax.legend(fontsize=9) ax.grid(True, axis="y", alpha=0.3) plt.tight_layout() path = output_dir / f"forecast_{location_name}.png" plt.savefig(path, dpi=150) plt.close() print(f" Saved: {path}") # ============================================================ # MAIN # ============================================================ def run_location(loc_name, lat, lon): print(f"\n{'='*60}") print(f" {loc_name} ({lat}°, {lon}°)") print(f"{'='*60}") # ---- ARCHIVE ---- print("\n[Archive]") archive_data = {} print(" Fetching Open-Meteo ERA5...") try: archive_data["ERA5 (Open-Meteo)"] = fetch_openmeteo_archive( lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5" ) print(f" → {len(archive_data['ERA5 (Open-Meteo)'])} days") except Exception as e: print(f" ✗ ERA5 failed: {e}") archive_data["ERA5 (Open-Meteo)"] = None time.sleep(0.5) print(" Fetching Open-Meteo ERA5-Land...") try: archive_data["ERA5-Land (Open-Meteo)"] = fetch_openmeteo_archive( lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5_land" ) print(f" → {len(archive_data['ERA5-Land (Open-Meteo)'])} days") except Exception as e: print(f" ✗ ERA5-Land failed: {e}") archive_data["ERA5-Land (Open-Meteo)"] = None time.sleep(0.5) # CERRA only covers Europe (roughly 20°W–45°E, 30°N–80°N) if -20 <= lon <= 45 and 30 <= lat <= 80: print(" Fetching Open-Meteo CERRA (EU only)...") try: archive_data["CERRA (Open-Meteo)"] = fetch_openmeteo_archive( lat, lon, ARCHIVE_START, ARCHIVE_END, model="cerra" ) print(f" → {len(archive_data['CERRA (Open-Meteo)'])} days") except Exception as e: print(f" ✗ CERRA failed: {e}") archive_data["CERRA (Open-Meteo)"] = None else: print(" Skipping CERRA (outside EU coverage)") archive_data["CERRA (Open-Meteo)"] = None time.sleep(0.5) print(" Fetching NASA POWER...") try: archive_data["NASA POWER"] = fetch_nasa_power(lat, lon, ARCHIVE_START, ARCHIVE_END) print(f" → {len(archive_data['NASA POWER'])} days") except Exception as e: print(f" ✗ NASA POWER failed: {e}") archive_data["NASA POWER"] = None # Pairwise stats (no ERA5 reference bias) print("\n Pairwise archive stats (Pearson r | RMSE mm/day | Bias mm/day):") names, r_mat, rmse_mat = compute_pairwise_stats(archive_data) for i in range(len(names)): for j in range(i + 1, len(names)): if not np.isnan(r_mat[i, j]): print(f" {names[i]:30s} vs {names[j]:30s} " f"r={r_mat[i,j]:.3f} RMSE={rmse_mat[i,j]:.2f} mm") plot_archive_with_spread(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR) plot_cumulative(archive_data, loc_name, OUTPUT_DIR) plot_vs_era5(archive_data, loc_name, OUTPUT_DIR) plot_pairwise_heatmap(archive_data, loc_name, OUTPUT_DIR) plot_wetdry_agreement(archive_data, loc_name, OUTPUT_DIR) # ---- FORECAST ---- print("\n[Forecast]") forecast_data = {} print(" Fetching Open-Meteo deterministic forecast...") try: forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon) print(f" → {len(forecast_data['Open-Meteo Forecast'])} days") except Exception as e: print(f" ✗ Open-Meteo forecast failed: {e}") forecast_data["Open-Meteo Forecast"] = None time.sleep(0.5) print(" Fetching Open-Meteo ECMWF ensemble forecast...") ensemble_df = None try: ensemble_df = fetch_openmeteo_ensemble_forecast(lat, lon) if ensemble_df is not None: print(f" → {len(ensemble_df)} days " f"({int(ensemble_df['n_members'].iloc[0])} ensemble members)") except Exception as e: print(f" ✗ Open-Meteo ensemble failed: {e}") time.sleep(0.5) print(" Fetching YR.no LocationForecast...") try: forecast_data["YR.no"] = fetch_yr_forecast(lat, lon) print(f" → {len(forecast_data['YR.no'])} days") except Exception as e: print(f" ✗ YR.no failed: {e}") forecast_data["YR.no"] = None if OPENWEATHERMAP_KEY: time.sleep(0.5) print(" Fetching OpenWeatherMap forecast...") try: forecast_data["OpenWeatherMap"] = fetch_openweathermap_forecast( lat, lon, OPENWEATHERMAP_KEY ) print(f" → {len(forecast_data['OpenWeatherMap'])} days") except Exception as e: print(f" ✗ OpenWeatherMap failed: {e}") forecast_data["OpenWeatherMap"] = None if WEATHERAPI_KEY: time.sleep(0.5) print(" Fetching WeatherAPI.com forecast...") try: forecast_data["WeatherAPI.com"] = fetch_weatherapi_forecast( lat, lon, WEATHERAPI_KEY ) print(f" → {len(forecast_data['WeatherAPI.com'])} days") except Exception as e: print(f" ✗ WeatherAPI.com failed: {e}") forecast_data["WeatherAPI.com"] = None plot_forecast_with_ensemble(forecast_data, ensemble_df, loc_name, OUTPUT_DIR) plot_forecast(forecast_data, loc_name, OUTPUT_DIR) if __name__ == "__main__": print(f"Weather API Comparison — {datetime.date.today()}") print(f"Archive: {ARCHIVE_START} → {ARCHIVE_END}") print(f"Forecast: {FORECAST_START} → {FORECAST_END}") print(f"Output: {OUTPUT_DIR.resolve()}") for loc_name, coords in LOCATIONS.items(): run_location(loc_name, coords["lat"], coords["lon"]) time.sleep(1) print(f"\nDone. Plots saved to: {OUTPUT_DIR.resolve()}")