""" weather_api_comparison.py ========================= Compare daily precipitation from multiple free weather APIs across two locations: - Arnhem, Netherlands (51.985°N, 5.899°E) — European climate - Angata, Kenya ( 1.330°S, 34.738°E) — tropical / sugarcane context ARCHIVE providers (no API key required): 1. Open-Meteo ERA5 — current SmartCane provider (0.25°, global) 2. Open-Meteo ERA5-Land — higher resolution variant (0.10°, global) 3. Open-Meteo CERRA — EU regional reanalysis (0.05°, EU only) 4. NASA POWER — completely independent source (0.50°, global) FORECAST providers (no API key required): 5. Open-Meteo Forecast — ensemble of NWP models (global) 6. YR.no LocationForecast — Norwegian Met Institute (~10 days, global) FORECAST providers (API key required — set in CONFIG below, leave "" to skip): 7. OpenWeatherMap — free tier, 1000 calls/day 8. WeatherAPI.com — free tier OUTPUT: Plots saved to: weather_comparison_plots/ Summary stats printed to console. Usage: python weather_api_comparison.py """ import os import json import time import datetime import requests import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import matplotlib.dates as mdates import numpy as np from pathlib import Path # ============================================================ # CONFIG # ============================================================ LOCATIONS = { "Arnhem_NL": {"lat": 51.985, "lon": 5.899}, "Angata_KE": {"lat": -1.330, "lon": 34.738}, } # Archive: last 12 months ARCHIVE_END = datetime.date.today() - datetime.timedelta(days=2) # ERA5 lags ~2 days ARCHIVE_START = ARCHIVE_END - datetime.timedelta(days=365) # Forecast: today + 7 days FORECAST_START = datetime.date.today() FORECAST_END = FORECAST_START + datetime.timedelta(days=7) # Optional API keys — leave "" to skip that provider OPENWEATHERMAP_KEY = "" # https://openweathermap.org/api WEATHERAPI_KEY = "" # https://www.weatherapi.com/ OUTPUT_DIR = Path("weather_comparison_plots") OUTPUT_DIR.mkdir(exist_ok=True) USER_AGENT = "SmartCane-WeatherComparison/1.0 (research; contact via github)" # ============================================================ # ARCHIVE FETCHERS # ============================================================ def fetch_openmeteo_archive(lat, lon, start, end, model="era5"): """Open-Meteo ERA5 / ERA5-Land / CERRA archive. ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names. """ # ERA5 is the default model — adding models param with wrong casing causes 400 model_suffix = "" if model == "era5" else f"&models={model}" url = ( f"https://archive-api.open-meteo.com/v1/archive" f"?latitude={lat}&longitude={lon}" f"&daily=precipitation_sum" f"&start_date={start}&end_date={end}" f"{model_suffix}" f"&timezone=UTC" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() df = pd.DataFrame({ "date": pd.to_datetime(body["daily"]["time"]), "rain_mm": body["daily"]["precipitation_sum"], }) df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").clip(lower=0).fillna(0) # ERA5-Land sometimes returns values in meters (Open-Meteo API quirk). # Auto-detect: if annual total < 50mm for any non-polar location, assume m → convert. if df["rain_mm"].sum() < 50 and len(df) > 30: df["rain_mm"] = df["rain_mm"] * 1000 print(f" ⚠ Unit auto-converted m→mm (values were implausibly small)") return df def fetch_nasa_power(lat, lon, start, end): """NASA POWER — daily PRECTOTCORR (precipitation corrected), 0.5° grid.""" url = ( "https://power.larc.nasa.gov/api/temporal/daily/point" f"?parameters=PRECTOTCORR" f"&community=AG" f"&longitude={lon}&latitude={lat}" f"&start={start.strftime('%Y%m%d')}&end={end.strftime('%Y%m%d')}" f"&format=JSON" ) r = requests.get(url, timeout=60) r.raise_for_status() body = r.json() raw = body["properties"]["parameter"]["PRECTOTCORR"] df = pd.DataFrame([ {"date": pd.to_datetime(k, format="%Y%m%d"), "rain_mm": max(v, 0)} for k, v in raw.items() if v != -999 # NASA POWER fill value ]) return df.sort_values("date").reset_index(drop=True) # ============================================================ # FORECAST FETCHERS # ============================================================ def fetch_openmeteo_forecast(lat, lon, days=8): """Open-Meteo NWP forecast — default best_match model.""" end = datetime.date.today() + datetime.timedelta(days=days) url = ( f"https://api.open-meteo.com/v1/forecast" f"?latitude={lat}&longitude={lon}" f"&daily=precipitation_sum" f"&forecast_days={days + 1}" f"&timezone=UTC" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() df = pd.DataFrame({ "date": pd.to_datetime(body["daily"]["time"]), "rain_mm": body["daily"]["precipitation_sum"], }) df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").fillna(0) return df def fetch_yr_forecast(lat, lon): """YR.no LocationForecast 2.0 — hourly precip aggregated to daily.""" url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}" headers = {"User-Agent": USER_AGENT} r = requests.get(url, headers=headers, timeout=30) r.raise_for_status() body = r.json() records = [] for entry in body["properties"]["timeseries"]: ts = pd.to_datetime(entry["time"]) data = entry["data"] precip = 0.0 if "next_1_hours" in data: precip = data["next_1_hours"]["details"].get("precipitation_amount", 0.0) elif "next_6_hours" in data: precip = data["next_6_hours"]["details"].get("precipitation_amount", 0.0) / 6 records.append({"datetime": ts, "precip_hour": precip}) hourly = pd.DataFrame(records) hourly["date"] = hourly["datetime"].dt.date daily = ( hourly.groupby("date")["precip_hour"] .sum() .reset_index() .rename(columns={"precip_hour": "rain_mm"}) ) daily["date"] = pd.to_datetime(daily["date"]) return daily def fetch_openweathermap_forecast(lat, lon, api_key): """OpenWeatherMap One Call 3.0 — daily forecast (needs paid/free key).""" url = ( f"https://api.openweathermap.org/data/3.0/onecall" f"?lat={lat}&lon={lon}" f"&exclude=current,minutely,hourly,alerts" f"&appid={api_key}&units=metric" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() records = [] for day in body.get("daily", []): records.append({ "date": pd.to_datetime(day["dt"], unit="s").normalize(), "rain_mm": day.get("rain", 0.0), }) return pd.DataFrame(records) def fetch_weatherapi_forecast(lat, lon, api_key, days=7): """WeatherAPI.com free forecast (up to 3 days on free tier, 14 on paid).""" url = ( f"https://api.weatherapi.com/v1/forecast.json" f"?key={api_key}&q={lat},{lon}&days={days}&aqi=no&alerts=no" ) r = requests.get(url, timeout=30) r.raise_for_status() body = r.json() records = [] for day in body["forecast"]["forecastday"]: records.append({ "date": pd.to_datetime(day["date"]), "rain_mm": day["day"].get("totalprecip_mm", 0.0), }) return pd.DataFrame(records) # ============================================================ # STATS # ============================================================ def compare_stats(df, ref_col, other_col): """Compute MAE, RMSE, bias, Pearson r between two columns.""" valid = df[[ref_col, other_col]].dropna() if len(valid) < 5: return {"n": len(valid), "MAE": None, "RMSE": None, "Bias": None, "r": None} diff = valid[other_col] - valid[ref_col] mae = diff.abs().mean() rmse = (diff**2).mean()**0.5 bias = diff.mean() r = valid[ref_col].corr(valid[other_col]) return {"n": len(valid), "MAE": round(mae,2), "RMSE": round(rmse,2), "Bias": round(bias,2), "r": round(r,3)} # ============================================================ # PLOTTING # ============================================================ def plot_archive(data_dict, location_name, start, end, output_dir): """Line plot of all archive providers for one location.""" fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True) colors = { "ERA5 (Open-Meteo)": "#1f77b4", "ERA5-Land (Open-Meteo)": "#ff7f0e", "CERRA (Open-Meteo)": "#2ca02c", "NASA POWER": "#d62728", } # Top: daily raw ax1 = axes[0] for name, df in data_dict.items(): if df is not None and len(df) > 0: ax1.plot(df["date"], df["rain_mm"], label=name, color=colors.get(name), linewidth=0.8, alpha=0.85) ax1.set_ylabel("Precipitation (mm/day)") ax1.set_title(f"{location_name} — Daily Precipitation (archive {start} → {end})") ax1.legend(fontsize=8) ax1.grid(True, alpha=0.3) # Bottom: 30-day rolling mean ax2 = axes[1] for name, df in data_dict.items(): if df is not None and len(df) > 0: rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean() ax2.plot(rolled.index, rolled.values, label=name, color=colors.get(name), linewidth=1.5) ax2.set_ylabel("30-day rolling mean (mm/day)") ax2.set_xlabel("Date") ax2.legend(fontsize=8) ax2.grid(True, alpha=0.3) ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y")) fig.autofmt_xdate() plt.tight_layout() path = output_dir / f"archive_{location_name}.png" plt.savefig(path, dpi=150) plt.close() print(f" Saved: {path}") def plot_forecast(data_dict, location_name, output_dir): """Bar chart comparing 7-day forecasts across providers.""" fig, ax = plt.subplots(figsize=(12, 5)) providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0] n = len(providers) if n == 0: plt.close() return # Collect all forecast dates all_dates = sorted(set( d for _, df in providers for d in df["date"].dt.date.tolist() )) x = np.arange(len(all_dates)) width = 0.8 / n cmap = matplotlib.colormaps["tab10"].resampled(n) for i, (name, df) in enumerate(providers): vals = [] date_map = dict(zip(df["date"].dt.date, df["rain_mm"])) for d in all_dates: vals.append(date_map.get(d, 0.0)) ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85) ax.set_xticks(x + width * (n - 1) / 2) ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right") ax.set_ylabel("Precipitation (mm/day)") ax.set_title(f"{location_name} — 7-Day Forecast Comparison") ax.legend(fontsize=9) ax.grid(True, axis="y", alpha=0.3) plt.tight_layout() path = output_dir / f"forecast_{location_name}.png" plt.savefig(path, dpi=150) plt.close() print(f" Saved: {path}") def plot_vs_era5(data_dict, location_name, output_dir): """Each provider vs ERA5 reference: scatter + regression line. How to read: - Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip. - Points on the red diagonal = perfect agreement. - Points above = provider wetter than ERA5 on that day. - r = Pearson correlation (1 = perfect). MAE = mean absolute error in mm/day. - Bias = provider minus ERA5 on average (positive = provider wetter). """ ref_name = "ERA5 (Open-Meteo)" ref_df = data_dict.get(ref_name) if ref_df is None: return others = [(n, df) for n, df in data_dict.items() if n != ref_name and df is not None and len(df) > 0] if not others: return n = len(others) fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False) colors = { "ERA5-Land (Open-Meteo)": "#ff7f0e", "CERRA (Open-Meteo)": "#2ca02c", "NASA POWER": "#d62728", } for i, (name, df) in enumerate(others): ax = axes[0][i] merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp")) valid = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna() color = colors.get(name, "steelblue") ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"], s=4, alpha=0.35, color=color) # Perfect-agreement diagonal lim = max(valid.max().max(), 1) * 1.05 ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement") # Linear regression line if len(valid) > 5: coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1) x_fit = np.linspace(0, lim, 100) ax.plot(x_fit, np.polyval(coeffs, x_fit), "k-", linewidth=1, alpha=0.6, label=f"Regression (slope={coeffs[0]:.2f})") stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp") ax.set_xlim(0, lim); ax.set_ylim(0, lim) ax.set_xlabel("ERA5 (Open-Meteo) mm/day", fontsize=9) ax.set_ylabel(f"{name} mm/day", fontsize=9) ax.set_title( f"{name}\nr={stats['r']} MAE={stats['MAE']} mm Bias={stats['Bias']:+.2f} mm", fontsize=9 ) ax.legend(fontsize=7) ax.grid(True, alpha=0.3) fig.suptitle( f"{location_name} — Daily Precip vs ERA5 Reference\n" "Red dashed = perfect agreement. Points above line = provider wetter than ERA5.", fontsize=10 ) plt.tight_layout() path = output_dir / f"vs_era5_{location_name}.png" plt.savefig(path, dpi=150) plt.close() print(f" Saved: {path}") def plot_cumulative(data_dict, location_name, output_dir): """Cumulative annual precipitation — most relevant for crop/irrigation context.""" fig, ax = plt.subplots(figsize=(14, 5)) colors = { "ERA5 (Open-Meteo)": "#1f77b4", "ERA5-Land (Open-Meteo)": "#ff7f0e", "CERRA (Open-Meteo)": "#2ca02c", "NASA POWER": "#d62728", } for name, df in data_dict.items(): if df is None or len(df) == 0: continue s = df.set_index("date")["rain_mm"].sort_index().cumsum() total = s.iloc[-1] ax.plot(s.index, s.values, label=f"{name} (total: {total:.0f} mm)", color=colors.get(name), linewidth=1.8) ax.set_ylabel("Cumulative precipitation (mm)") ax.set_xlabel("Date") ax.set_title( f"{location_name} — Cumulative Annual Precipitation by Provider\n" "Divergence = sources disagree on total seasonal rainfall" ) ax.legend(fontsize=9) ax.grid(True, alpha=0.3) ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y")) fig.autofmt_xdate() plt.tight_layout() path = output_dir / f"cumulative_{location_name}.png" plt.savefig(path, dpi=150) plt.close() print(f" Saved: {path}") # ============================================================ # MAIN # ============================================================ def run_location(loc_name, lat, lon): print(f"\n{'='*60}") print(f" {loc_name} ({lat}°, {lon}°)") print(f"{'='*60}") # ---- ARCHIVE ---- print("\n[Archive]") archive_data = {} print(" Fetching Open-Meteo ERA5...") try: archive_data["ERA5 (Open-Meteo)"] = fetch_openmeteo_archive( lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5" ) print(f" → {len(archive_data['ERA5 (Open-Meteo)'])} days") except Exception as e: print(f" ✗ ERA5 failed: {e}") archive_data["ERA5 (Open-Meteo)"] = None time.sleep(0.5) print(" Fetching Open-Meteo ERA5-Land...") try: archive_data["ERA5-Land (Open-Meteo)"] = fetch_openmeteo_archive( lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5_land" ) print(f" → {len(archive_data['ERA5-Land (Open-Meteo)'])} days") except Exception as e: print(f" ✗ ERA5-Land failed: {e}") archive_data["ERA5-Land (Open-Meteo)"] = None time.sleep(0.5) # CERRA only covers Europe (roughly 20°W–45°E, 30°N–80°N) if -20 <= lon <= 45 and 30 <= lat <= 80: print(" Fetching Open-Meteo CERRA (EU only)...") try: archive_data["CERRA (Open-Meteo)"] = fetch_openmeteo_archive( lat, lon, ARCHIVE_START, ARCHIVE_END, model="cerra" ) print(f" → {len(archive_data['CERRA (Open-Meteo)'])} days") except Exception as e: print(f" ✗ CERRA failed: {e}") archive_data["CERRA (Open-Meteo)"] = None else: print(" Skipping CERRA (outside EU coverage)") archive_data["CERRA (Open-Meteo)"] = None time.sleep(0.5) print(" Fetching NASA POWER...") try: archive_data["NASA POWER"] = fetch_nasa_power(lat, lon, ARCHIVE_START, ARCHIVE_END) print(f" → {len(archive_data['NASA POWER'])} days") except Exception as e: print(f" ✗ NASA POWER failed: {e}") archive_data["NASA POWER"] = None # Stats vs ERA5 reference print("\n Stats vs ERA5 (Open-Meteo) reference:") ref_df = archive_data.get("ERA5 (Open-Meteo)") for name, df in archive_data.items(): if name == "ERA5 (Open-Meteo)" or df is None: continue if ref_df is None: continue merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp")) stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp") print(f" {name:30s} n={stats['n']:4d} MAE={stats['MAE']} " f"RMSE={stats['RMSE']} Bias={stats['Bias']} r={stats['r']}") plot_archive(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR) plot_cumulative(archive_data, loc_name, OUTPUT_DIR) plot_vs_era5(archive_data, loc_name, OUTPUT_DIR) # ---- FORECAST ---- print("\n[Forecast]") forecast_data = {} print(" Fetching Open-Meteo forecast...") try: forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon) print(f" → {len(forecast_data['Open-Meteo Forecast'])} days") except Exception as e: print(f" ✗ Open-Meteo forecast failed: {e}") forecast_data["Open-Meteo Forecast"] = None time.sleep(0.5) print(" Fetching YR.no LocationForecast...") try: forecast_data["YR.no"] = fetch_yr_forecast(lat, lon) print(f" → {len(forecast_data['YR.no'])} days") except Exception as e: print(f" ✗ YR.no failed: {e}") forecast_data["YR.no"] = None if OPENWEATHERMAP_KEY: time.sleep(0.5) print(" Fetching OpenWeatherMap forecast...") try: forecast_data["OpenWeatherMap"] = fetch_openweathermap_forecast( lat, lon, OPENWEATHERMAP_KEY ) print(f" → {len(forecast_data['OpenWeatherMap'])} days") except Exception as e: print(f" ✗ OpenWeatherMap failed: {e}") forecast_data["OpenWeatherMap"] = None if WEATHERAPI_KEY: time.sleep(0.5) print(" Fetching WeatherAPI.com forecast...") try: forecast_data["WeatherAPI.com"] = fetch_weatherapi_forecast( lat, lon, WEATHERAPI_KEY ) print(f" → {len(forecast_data['WeatherAPI.com'])} days") except Exception as e: print(f" ✗ WeatherAPI.com failed: {e}") forecast_data["WeatherAPI.com"] = None plot_forecast(forecast_data, loc_name, OUTPUT_DIR) if __name__ == "__main__": print(f"Weather API Comparison — {datetime.date.today()}") print(f"Archive: {ARCHIVE_START} → {ARCHIVE_END}") print(f"Forecast: {FORECAST_START} → {FORECAST_END}") print(f"Output: {OUTPUT_DIR.resolve()}") for loc_name, coords in LOCATIONS.items(): run_location(loc_name, coords["lat"], coords["lon"]) time.sleep(1) print(f"\nDone. Plots saved to: {OUTPUT_DIR.resolve()}")