Add weather API comparison scripts for precipitation analysis
- Implemented `weather_api_comparison.py` to compare daily precipitation from multiple weather APIs for Arnhem, Netherlands and Angata, Kenya. - Integrated fetching functions for various weather data sources including Open-Meteo, NASA POWER, OpenWeatherMap, and WeatherAPI.com. - Added plotting functions to visualize archive and forecast data, including cumulative precipitation and comparison against ERA5 reference. - Created `90_rainfall_utils.R` for R to fetch rainfall data and overlay it on CI plots, supporting multiple providers with a generic fetch wrapper. - Included spatial helpers for efficient API calls based on unique geographical tiles.
This commit is contained in:
parent
003bb8255e
commit
1da5c0d0a7
567
python_app/weather_api_comparison.py
Normal file
567
python_app/weather_api_comparison.py
Normal file
|
|
@ -0,0 +1,567 @@
|
|||
"""
|
||||
weather_api_comparison.py
|
||||
=========================
|
||||
Compare daily precipitation from multiple free weather APIs across two locations:
|
||||
- Arnhem, Netherlands (51.985°N, 5.899°E) — European climate
|
||||
- Angata, Kenya ( 1.330°S, 34.738°E) — tropical / sugarcane context
|
||||
|
||||
ARCHIVE providers (no API key required):
|
||||
1. Open-Meteo ERA5 — current SmartCane provider (0.25°, global)
|
||||
2. Open-Meteo ERA5-Land — higher resolution variant (0.10°, global)
|
||||
3. Open-Meteo CERRA — EU regional reanalysis (0.05°, EU only)
|
||||
4. NASA POWER — completely independent source (0.50°, global)
|
||||
|
||||
FORECAST providers (no API key required):
|
||||
5. Open-Meteo Forecast — ensemble of NWP models (global)
|
||||
6. YR.no LocationForecast — Norwegian Met Institute (~10 days, global)
|
||||
|
||||
FORECAST providers (API key required — set in CONFIG below, leave "" to skip):
|
||||
7. OpenWeatherMap — free tier, 1000 calls/day
|
||||
8. WeatherAPI.com — free tier
|
||||
|
||||
OUTPUT:
|
||||
Plots saved to: weather_comparison_plots/
|
||||
Summary stats printed to console.
|
||||
|
||||
Usage:
|
||||
python weather_api_comparison.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import datetime
|
||||
import requests
|
||||
import pandas as pd
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
# ============================================================
|
||||
# CONFIG
|
||||
# ============================================================
|
||||
|
||||
LOCATIONS = {
|
||||
"Arnhem_NL": {"lat": 51.985, "lon": 5.899},
|
||||
"Angata_KE": {"lat": -1.330, "lon": 34.738},
|
||||
}
|
||||
|
||||
# Archive: last 12 months
|
||||
ARCHIVE_END = datetime.date.today() - datetime.timedelta(days=2) # ERA5 lags ~2 days
|
||||
ARCHIVE_START = ARCHIVE_END - datetime.timedelta(days=365)
|
||||
|
||||
# Forecast: today + 7 days
|
||||
FORECAST_START = datetime.date.today()
|
||||
FORECAST_END = FORECAST_START + datetime.timedelta(days=7)
|
||||
|
||||
# Optional API keys — leave "" to skip that provider
|
||||
OPENWEATHERMAP_KEY = "" # https://openweathermap.org/api
|
||||
WEATHERAPI_KEY = "" # https://www.weatherapi.com/
|
||||
|
||||
OUTPUT_DIR = Path("weather_comparison_plots")
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
USER_AGENT = "SmartCane-WeatherComparison/1.0 (research; contact via github)"
|
||||
|
||||
# ============================================================
|
||||
# ARCHIVE FETCHERS
|
||||
# ============================================================
|
||||
|
||||
def fetch_openmeteo_archive(lat, lon, start, end, model="era5"):
|
||||
"""Open-Meteo ERA5 / ERA5-Land / CERRA archive.
|
||||
ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names.
|
||||
"""
|
||||
# ERA5 is the default model — adding models param with wrong casing causes 400
|
||||
model_suffix = "" if model == "era5" else f"&models={model}"
|
||||
url = (
|
||||
f"https://archive-api.open-meteo.com/v1/archive"
|
||||
f"?latitude={lat}&longitude={lon}"
|
||||
f"&daily=precipitation_sum"
|
||||
f"&start_date={start}&end_date={end}"
|
||||
f"{model_suffix}"
|
||||
f"&timezone=UTC"
|
||||
)
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
df = pd.DataFrame({
|
||||
"date": pd.to_datetime(body["daily"]["time"]),
|
||||
"rain_mm": body["daily"]["precipitation_sum"],
|
||||
})
|
||||
df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").clip(lower=0).fillna(0)
|
||||
# ERA5-Land sometimes returns values in meters (Open-Meteo API quirk).
|
||||
# Auto-detect: if annual total < 50mm for any non-polar location, assume m → convert.
|
||||
if df["rain_mm"].sum() < 50 and len(df) > 30:
|
||||
df["rain_mm"] = df["rain_mm"] * 1000
|
||||
print(f" ⚠ Unit auto-converted m→mm (values were implausibly small)")
|
||||
return df
|
||||
|
||||
|
||||
def fetch_nasa_power(lat, lon, start, end):
|
||||
"""NASA POWER — daily PRECTOTCORR (precipitation corrected), 0.5° grid."""
|
||||
url = (
|
||||
"https://power.larc.nasa.gov/api/temporal/daily/point"
|
||||
f"?parameters=PRECTOTCORR"
|
||||
f"&community=AG"
|
||||
f"&longitude={lon}&latitude={lat}"
|
||||
f"&start={start.strftime('%Y%m%d')}&end={end.strftime('%Y%m%d')}"
|
||||
f"&format=JSON"
|
||||
)
|
||||
r = requests.get(url, timeout=60)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
raw = body["properties"]["parameter"]["PRECTOTCORR"]
|
||||
df = pd.DataFrame([
|
||||
{"date": pd.to_datetime(k, format="%Y%m%d"), "rain_mm": max(v, 0)}
|
||||
for k, v in raw.items()
|
||||
if v != -999 # NASA POWER fill value
|
||||
])
|
||||
return df.sort_values("date").reset_index(drop=True)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# FORECAST FETCHERS
|
||||
# ============================================================
|
||||
|
||||
def fetch_openmeteo_forecast(lat, lon, days=8):
|
||||
"""Open-Meteo NWP forecast — default best_match model."""
|
||||
end = datetime.date.today() + datetime.timedelta(days=days)
|
||||
url = (
|
||||
f"https://api.open-meteo.com/v1/forecast"
|
||||
f"?latitude={lat}&longitude={lon}"
|
||||
f"&daily=precipitation_sum"
|
||||
f"&forecast_days={days + 1}"
|
||||
f"&timezone=UTC"
|
||||
)
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
df = pd.DataFrame({
|
||||
"date": pd.to_datetime(body["daily"]["time"]),
|
||||
"rain_mm": body["daily"]["precipitation_sum"],
|
||||
})
|
||||
df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").fillna(0)
|
||||
return df
|
||||
|
||||
|
||||
def fetch_yr_forecast(lat, lon):
|
||||
"""YR.no LocationForecast 2.0 — hourly precip aggregated to daily."""
|
||||
url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}"
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
r = requests.get(url, headers=headers, timeout=30)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
|
||||
records = []
|
||||
for entry in body["properties"]["timeseries"]:
|
||||
ts = pd.to_datetime(entry["time"])
|
||||
data = entry["data"]
|
||||
precip = 0.0
|
||||
if "next_1_hours" in data:
|
||||
precip = data["next_1_hours"]["details"].get("precipitation_amount", 0.0)
|
||||
elif "next_6_hours" in data:
|
||||
precip = data["next_6_hours"]["details"].get("precipitation_amount", 0.0) / 6
|
||||
records.append({"datetime": ts, "precip_hour": precip})
|
||||
|
||||
hourly = pd.DataFrame(records)
|
||||
hourly["date"] = hourly["datetime"].dt.date
|
||||
daily = (
|
||||
hourly.groupby("date")["precip_hour"]
|
||||
.sum()
|
||||
.reset_index()
|
||||
.rename(columns={"precip_hour": "rain_mm"})
|
||||
)
|
||||
daily["date"] = pd.to_datetime(daily["date"])
|
||||
return daily
|
||||
|
||||
|
||||
def fetch_openweathermap_forecast(lat, lon, api_key):
|
||||
"""OpenWeatherMap One Call 3.0 — daily forecast (needs paid/free key)."""
|
||||
url = (
|
||||
f"https://api.openweathermap.org/data/3.0/onecall"
|
||||
f"?lat={lat}&lon={lon}"
|
||||
f"&exclude=current,minutely,hourly,alerts"
|
||||
f"&appid={api_key}&units=metric"
|
||||
)
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
records = []
|
||||
for day in body.get("daily", []):
|
||||
records.append({
|
||||
"date": pd.to_datetime(day["dt"], unit="s").normalize(),
|
||||
"rain_mm": day.get("rain", 0.0),
|
||||
})
|
||||
return pd.DataFrame(records)
|
||||
|
||||
|
||||
def fetch_weatherapi_forecast(lat, lon, api_key, days=7):
|
||||
"""WeatherAPI.com free forecast (up to 3 days on free tier, 14 on paid)."""
|
||||
url = (
|
||||
f"https://api.weatherapi.com/v1/forecast.json"
|
||||
f"?key={api_key}&q={lat},{lon}&days={days}&aqi=no&alerts=no"
|
||||
)
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
records = []
|
||||
for day in body["forecast"]["forecastday"]:
|
||||
records.append({
|
||||
"date": pd.to_datetime(day["date"]),
|
||||
"rain_mm": day["day"].get("totalprecip_mm", 0.0),
|
||||
})
|
||||
return pd.DataFrame(records)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# STATS
|
||||
# ============================================================
|
||||
|
||||
def compare_stats(df, ref_col, other_col):
|
||||
"""Compute MAE, RMSE, bias, Pearson r between two columns."""
|
||||
valid = df[[ref_col, other_col]].dropna()
|
||||
if len(valid) < 5:
|
||||
return {"n": len(valid), "MAE": None, "RMSE": None, "Bias": None, "r": None}
|
||||
diff = valid[other_col] - valid[ref_col]
|
||||
mae = diff.abs().mean()
|
||||
rmse = (diff**2).mean()**0.5
|
||||
bias = diff.mean()
|
||||
r = valid[ref_col].corr(valid[other_col])
|
||||
return {"n": len(valid), "MAE": round(mae,2), "RMSE": round(rmse,2),
|
||||
"Bias": round(bias,2), "r": round(r,3)}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# PLOTTING
|
||||
# ============================================================
|
||||
|
||||
def plot_archive(data_dict, location_name, start, end, output_dir):
|
||||
"""Line plot of all archive providers for one location."""
|
||||
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)
|
||||
|
||||
colors = {
|
||||
"ERA5 (Open-Meteo)": "#1f77b4",
|
||||
"ERA5-Land (Open-Meteo)": "#ff7f0e",
|
||||
"CERRA (Open-Meteo)": "#2ca02c",
|
||||
"NASA POWER": "#d62728",
|
||||
}
|
||||
|
||||
# Top: daily raw
|
||||
ax1 = axes[0]
|
||||
for name, df in data_dict.items():
|
||||
if df is not None and len(df) > 0:
|
||||
ax1.plot(df["date"], df["rain_mm"], label=name,
|
||||
color=colors.get(name), linewidth=0.8, alpha=0.85)
|
||||
ax1.set_ylabel("Precipitation (mm/day)")
|
||||
ax1.set_title(f"{location_name} — Daily Precipitation (archive {start} → {end})")
|
||||
ax1.legend(fontsize=8)
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Bottom: 30-day rolling mean
|
||||
ax2 = axes[1]
|
||||
for name, df in data_dict.items():
|
||||
if df is not None and len(df) > 0:
|
||||
rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean()
|
||||
ax2.plot(rolled.index, rolled.values, label=name,
|
||||
color=colors.get(name), linewidth=1.5)
|
||||
ax2.set_ylabel("30-day rolling mean (mm/day)")
|
||||
ax2.set_xlabel("Date")
|
||||
ax2.legend(fontsize=8)
|
||||
ax2.grid(True, alpha=0.3)
|
||||
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
|
||||
fig.autofmt_xdate()
|
||||
|
||||
plt.tight_layout()
|
||||
path = output_dir / f"archive_{location_name}.png"
|
||||
plt.savefig(path, dpi=150)
|
||||
plt.close()
|
||||
print(f" Saved: {path}")
|
||||
|
||||
|
||||
def plot_forecast(data_dict, location_name, output_dir):
|
||||
"""Bar chart comparing 7-day forecasts across providers."""
|
||||
fig, ax = plt.subplots(figsize=(12, 5))
|
||||
|
||||
providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0]
|
||||
n = len(providers)
|
||||
if n == 0:
|
||||
plt.close()
|
||||
return
|
||||
|
||||
# Collect all forecast dates
|
||||
all_dates = sorted(set(
|
||||
d for _, df in providers
|
||||
for d in df["date"].dt.date.tolist()
|
||||
))
|
||||
x = np.arange(len(all_dates))
|
||||
width = 0.8 / n
|
||||
|
||||
cmap = matplotlib.colormaps["tab10"].resampled(n)
|
||||
for i, (name, df) in enumerate(providers):
|
||||
vals = []
|
||||
date_map = dict(zip(df["date"].dt.date, df["rain_mm"]))
|
||||
for d in all_dates:
|
||||
vals.append(date_map.get(d, 0.0))
|
||||
ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85)
|
||||
|
||||
ax.set_xticks(x + width * (n - 1) / 2)
|
||||
ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right")
|
||||
ax.set_ylabel("Precipitation (mm/day)")
|
||||
ax.set_title(f"{location_name} — 7-Day Forecast Comparison")
|
||||
ax.legend(fontsize=9)
|
||||
ax.grid(True, axis="y", alpha=0.3)
|
||||
plt.tight_layout()
|
||||
|
||||
path = output_dir / f"forecast_{location_name}.png"
|
||||
plt.savefig(path, dpi=150)
|
||||
plt.close()
|
||||
print(f" Saved: {path}")
|
||||
|
||||
|
||||
def plot_vs_era5(data_dict, location_name, output_dir):
|
||||
"""Each provider vs ERA5 reference: scatter + regression line.
|
||||
|
||||
How to read:
|
||||
- Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip.
|
||||
- Points on the red diagonal = perfect agreement.
|
||||
- Points above = provider wetter than ERA5 on that day.
|
||||
- r = Pearson correlation (1 = perfect). MAE = mean absolute error in mm/day.
|
||||
- Bias = provider minus ERA5 on average (positive = provider wetter).
|
||||
"""
|
||||
ref_name = "ERA5 (Open-Meteo)"
|
||||
ref_df = data_dict.get(ref_name)
|
||||
if ref_df is None:
|
||||
return
|
||||
|
||||
others = [(n, df) for n, df in data_dict.items()
|
||||
if n != ref_name and df is not None and len(df) > 0]
|
||||
if not others:
|
||||
return
|
||||
|
||||
n = len(others)
|
||||
fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False)
|
||||
|
||||
colors = {
|
||||
"ERA5-Land (Open-Meteo)": "#ff7f0e",
|
||||
"CERRA (Open-Meteo)": "#2ca02c",
|
||||
"NASA POWER": "#d62728",
|
||||
}
|
||||
|
||||
for i, (name, df) in enumerate(others):
|
||||
ax = axes[0][i]
|
||||
merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
|
||||
valid = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna()
|
||||
|
||||
color = colors.get(name, "steelblue")
|
||||
ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"],
|
||||
s=4, alpha=0.35, color=color)
|
||||
|
||||
# Perfect-agreement diagonal
|
||||
lim = max(valid.max().max(), 1) * 1.05
|
||||
ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement")
|
||||
|
||||
# Linear regression line
|
||||
if len(valid) > 5:
|
||||
coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1)
|
||||
x_fit = np.linspace(0, lim, 100)
|
||||
ax.plot(x_fit, np.polyval(coeffs, x_fit), "k-", linewidth=1,
|
||||
alpha=0.6, label=f"Regression (slope={coeffs[0]:.2f})")
|
||||
|
||||
stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp")
|
||||
ax.set_xlim(0, lim); ax.set_ylim(0, lim)
|
||||
ax.set_xlabel("ERA5 (Open-Meteo) mm/day", fontsize=9)
|
||||
ax.set_ylabel(f"{name} mm/day", fontsize=9)
|
||||
ax.set_title(
|
||||
f"{name}\nr={stats['r']} MAE={stats['MAE']} mm Bias={stats['Bias']:+.2f} mm",
|
||||
fontsize=9
|
||||
)
|
||||
ax.legend(fontsize=7)
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
fig.suptitle(
|
||||
f"{location_name} — Daily Precip vs ERA5 Reference\n"
|
||||
"Red dashed = perfect agreement. Points above line = provider wetter than ERA5.",
|
||||
fontsize=10
|
||||
)
|
||||
plt.tight_layout()
|
||||
path = output_dir / f"vs_era5_{location_name}.png"
|
||||
plt.savefig(path, dpi=150)
|
||||
plt.close()
|
||||
print(f" Saved: {path}")
|
||||
|
||||
|
||||
def plot_cumulative(data_dict, location_name, output_dir):
|
||||
"""Cumulative annual precipitation — most relevant for crop/irrigation context."""
|
||||
fig, ax = plt.subplots(figsize=(14, 5))
|
||||
|
||||
colors = {
|
||||
"ERA5 (Open-Meteo)": "#1f77b4",
|
||||
"ERA5-Land (Open-Meteo)": "#ff7f0e",
|
||||
"CERRA (Open-Meteo)": "#2ca02c",
|
||||
"NASA POWER": "#d62728",
|
||||
}
|
||||
|
||||
for name, df in data_dict.items():
|
||||
if df is None or len(df) == 0:
|
||||
continue
|
||||
s = df.set_index("date")["rain_mm"].sort_index().cumsum()
|
||||
total = s.iloc[-1]
|
||||
ax.plot(s.index, s.values, label=f"{name} (total: {total:.0f} mm)",
|
||||
color=colors.get(name), linewidth=1.8)
|
||||
|
||||
ax.set_ylabel("Cumulative precipitation (mm)")
|
||||
ax.set_xlabel("Date")
|
||||
ax.set_title(
|
||||
f"{location_name} — Cumulative Annual Precipitation by Provider\n"
|
||||
"Divergence = sources disagree on total seasonal rainfall"
|
||||
)
|
||||
ax.legend(fontsize=9)
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
|
||||
fig.autofmt_xdate()
|
||||
plt.tight_layout()
|
||||
|
||||
path = output_dir / f"cumulative_{location_name}.png"
|
||||
plt.savefig(path, dpi=150)
|
||||
plt.close()
|
||||
print(f" Saved: {path}")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# MAIN
|
||||
# ============================================================
|
||||
|
||||
def run_location(loc_name, lat, lon):
|
||||
print(f"\n{'='*60}")
|
||||
print(f" {loc_name} ({lat}°, {lon}°)")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# ---- ARCHIVE ----
|
||||
print("\n[Archive]")
|
||||
archive_data = {}
|
||||
|
||||
print(" Fetching Open-Meteo ERA5...")
|
||||
try:
|
||||
archive_data["ERA5 (Open-Meteo)"] = fetch_openmeteo_archive(
|
||||
lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5"
|
||||
)
|
||||
print(f" → {len(archive_data['ERA5 (Open-Meteo)'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ ERA5 failed: {e}")
|
||||
archive_data["ERA5 (Open-Meteo)"] = None
|
||||
|
||||
time.sleep(0.5)
|
||||
print(" Fetching Open-Meteo ERA5-Land...")
|
||||
try:
|
||||
archive_data["ERA5-Land (Open-Meteo)"] = fetch_openmeteo_archive(
|
||||
lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5_land"
|
||||
)
|
||||
print(f" → {len(archive_data['ERA5-Land (Open-Meteo)'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ ERA5-Land failed: {e}")
|
||||
archive_data["ERA5-Land (Open-Meteo)"] = None
|
||||
|
||||
time.sleep(0.5)
|
||||
# CERRA only covers Europe (roughly 20°W–45°E, 30°N–80°N)
|
||||
if -20 <= lon <= 45 and 30 <= lat <= 80:
|
||||
print(" Fetching Open-Meteo CERRA (EU only)...")
|
||||
try:
|
||||
archive_data["CERRA (Open-Meteo)"] = fetch_openmeteo_archive(
|
||||
lat, lon, ARCHIVE_START, ARCHIVE_END, model="cerra"
|
||||
)
|
||||
print(f" → {len(archive_data['CERRA (Open-Meteo)'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ CERRA failed: {e}")
|
||||
archive_data["CERRA (Open-Meteo)"] = None
|
||||
else:
|
||||
print(" Skipping CERRA (outside EU coverage)")
|
||||
archive_data["CERRA (Open-Meteo)"] = None
|
||||
|
||||
time.sleep(0.5)
|
||||
print(" Fetching NASA POWER...")
|
||||
try:
|
||||
archive_data["NASA POWER"] = fetch_nasa_power(lat, lon, ARCHIVE_START, ARCHIVE_END)
|
||||
print(f" → {len(archive_data['NASA POWER'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ NASA POWER failed: {e}")
|
||||
archive_data["NASA POWER"] = None
|
||||
|
||||
# Stats vs ERA5 reference
|
||||
print("\n Stats vs ERA5 (Open-Meteo) reference:")
|
||||
ref_df = archive_data.get("ERA5 (Open-Meteo)")
|
||||
for name, df in archive_data.items():
|
||||
if name == "ERA5 (Open-Meteo)" or df is None:
|
||||
continue
|
||||
if ref_df is None:
|
||||
continue
|
||||
merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
|
||||
stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp")
|
||||
print(f" {name:30s} n={stats['n']:4d} MAE={stats['MAE']} "
|
||||
f"RMSE={stats['RMSE']} Bias={stats['Bias']} r={stats['r']}")
|
||||
|
||||
plot_archive(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR)
|
||||
plot_cumulative(archive_data, loc_name, OUTPUT_DIR)
|
||||
plot_vs_era5(archive_data, loc_name, OUTPUT_DIR)
|
||||
|
||||
# ---- FORECAST ----
|
||||
print("\n[Forecast]")
|
||||
forecast_data = {}
|
||||
|
||||
print(" Fetching Open-Meteo forecast...")
|
||||
try:
|
||||
forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon)
|
||||
print(f" → {len(forecast_data['Open-Meteo Forecast'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ Open-Meteo forecast failed: {e}")
|
||||
forecast_data["Open-Meteo Forecast"] = None
|
||||
|
||||
time.sleep(0.5)
|
||||
print(" Fetching YR.no LocationForecast...")
|
||||
try:
|
||||
forecast_data["YR.no"] = fetch_yr_forecast(lat, lon)
|
||||
print(f" → {len(forecast_data['YR.no'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ YR.no failed: {e}")
|
||||
forecast_data["YR.no"] = None
|
||||
|
||||
if OPENWEATHERMAP_KEY:
|
||||
time.sleep(0.5)
|
||||
print(" Fetching OpenWeatherMap forecast...")
|
||||
try:
|
||||
forecast_data["OpenWeatherMap"] = fetch_openweathermap_forecast(
|
||||
lat, lon, OPENWEATHERMAP_KEY
|
||||
)
|
||||
print(f" → {len(forecast_data['OpenWeatherMap'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ OpenWeatherMap failed: {e}")
|
||||
forecast_data["OpenWeatherMap"] = None
|
||||
|
||||
if WEATHERAPI_KEY:
|
||||
time.sleep(0.5)
|
||||
print(" Fetching WeatherAPI.com forecast...")
|
||||
try:
|
||||
forecast_data["WeatherAPI.com"] = fetch_weatherapi_forecast(
|
||||
lat, lon, WEATHERAPI_KEY
|
||||
)
|
||||
print(f" → {len(forecast_data['WeatherAPI.com'])} days")
|
||||
except Exception as e:
|
||||
print(f" ✗ WeatherAPI.com failed: {e}")
|
||||
forecast_data["WeatherAPI.com"] = None
|
||||
|
||||
plot_forecast(forecast_data, loc_name, OUTPUT_DIR)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Weather API Comparison — {datetime.date.today()}")
|
||||
print(f"Archive: {ARCHIVE_START} → {ARCHIVE_END}")
|
||||
print(f"Forecast: {FORECAST_START} → {FORECAST_END}")
|
||||
print(f"Output: {OUTPUT_DIR.resolve()}")
|
||||
|
||||
for loc_name, coords in LOCATIONS.items():
|
||||
run_location(loc_name, coords["lat"], coords["lon"])
|
||||
time.sleep(1)
|
||||
|
||||
print(f"\nDone. Plots saved to: {OUTPUT_DIR.resolve()}")
|
||||
|
|
@ -128,23 +128,60 @@ main <- function() {
|
|||
}
|
||||
|
||||
# Process each DATE (load merged TIFF once, extract all fields from it)
|
||||
total_success <- 0
|
||||
total_processed_dates <- 0
|
||||
total_skipped_dates <- 0
|
||||
total_already_complete_dates <- 0
|
||||
total_error <- 0
|
||||
|
||||
for (date_str in dates_filter) {
|
||||
# Load the MERGED TIFF (farm-wide) ONCE for this date
|
||||
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
|
||||
output_tifs <- file.path(setup$field_tiles_ci_dir, fields, sprintf("%s.tif", date_str))
|
||||
output_rds <- file.path(setup$daily_ci_vals_dir, fields, sprintf("%s.rds", date_str))
|
||||
names(output_tifs) <- fields
|
||||
names(output_rds) <- fields
|
||||
tif_exists <- file.exists(output_tifs)
|
||||
rds_exists <- file.exists(output_rds)
|
||||
fields_need_rds_only <- fields[tif_exists & !rds_exists]
|
||||
fields_need_raster <- fields[!tif_exists]
|
||||
|
||||
if (length(fields_need_rds_only) == 0 && length(fields_need_raster) == 0) {
|
||||
total_already_complete_dates <- total_already_complete_dates + 1
|
||||
safe_log(sprintf(" %s: All field outputs already exist (skipping)", date_str))
|
||||
next
|
||||
}
|
||||
|
||||
fields_processed_this_date <- 0
|
||||
rds_only_processed <- 0
|
||||
raster_processed_this_date <- 0
|
||||
|
||||
if (length(fields_need_rds_only) > 0) {
|
||||
for (field in fields_need_rds_only) {
|
||||
tryCatch({
|
||||
extract_rds_from_ci_tiff(output_tifs[[field]], output_rds[[field]], field_boundaries_sf, field)
|
||||
fields_processed_this_date <- fields_processed_this_date + 1
|
||||
rds_only_processed <- rds_only_processed + 1
|
||||
}, error = function(e) {
|
||||
safe_log(sprintf(" Error regenerating RDS for field %s: %s", field, e$message), "WARNING")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (length(fields_need_raster) == 0) {
|
||||
total_processed_dates <- total_processed_dates + 1
|
||||
safe_log(sprintf(" %s: Regenerated %d RDS files from existing CI TIFFs", date_str, rds_only_processed))
|
||||
next
|
||||
}
|
||||
|
||||
if (!file.exists(input_tif_merged)) {
|
||||
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
|
||||
total_error <<- total_error + 1
|
||||
total_skipped_dates <- total_skipped_dates + 1
|
||||
next
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
# Load 4-band TIFF ONCE
|
||||
# Load the merged TIFF only when at least one field still needs a CI TIFF.
|
||||
raster_4band <- terra::rast(input_tif_merged)
|
||||
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
|
||||
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields_need_raster)))
|
||||
|
||||
# Calculate CI from 4-band
|
||||
ci_raster <- calc_ci_from_raster(raster_4band)
|
||||
|
|
@ -154,44 +191,18 @@ main <- function() {
|
|||
five_band <- c(raster_4band, ci_raster)
|
||||
names(five_band) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||
|
||||
# Now process all fields from this single merged TIFF
|
||||
fields_processed_this_date <- 0
|
||||
# Now process only the fields that still need CI TIFF output for this date.
|
||||
for (field in fields_need_raster) {
|
||||
output_tif_path <- output_tifs[[field]]
|
||||
output_rds_path <- output_rds[[field]]
|
||||
|
||||
for (field in fields) {
|
||||
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
|
||||
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
|
||||
|
||||
# Pre-create output directories
|
||||
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
|
||||
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
|
||||
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
|
||||
|
||||
# MODE 3: Skip if both outputs already exist
|
||||
if (file.exists(output_tif) && file.exists(output_rds)) {
|
||||
next
|
||||
}
|
||||
|
||||
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
|
||||
if (file.exists(output_tif) && !file.exists(output_rds)) {
|
||||
tryCatch({
|
||||
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
|
||||
fields_processed_this_date <- fields_processed_this_date + 1
|
||||
}, error = function(e) {
|
||||
# Continue to next field
|
||||
})
|
||||
next
|
||||
}
|
||||
|
||||
# MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
|
||||
tryCatch({
|
||||
# Crop 5-band TIFF to field boundary
|
||||
field_geom <- field_boundaries_sf %>% filter(field == !!field)
|
||||
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
|
||||
|
||||
# Save 5-band field TIFF
|
||||
terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
|
||||
terra::writeRaster(five_band_cropped, output_tif_path, overwrite = TRUE)
|
||||
|
||||
# Extract CI statistics by sub_field (from cropped CI raster)
|
||||
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
|
||||
|
|
@ -199,10 +210,11 @@ main <- function() {
|
|||
|
||||
# Save RDS
|
||||
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
|
||||
saveRDS(ci_stats, output_rds)
|
||||
saveRDS(ci_stats, output_rds_path)
|
||||
}
|
||||
|
||||
fields_processed_this_date <- fields_processed_this_date + 1
|
||||
raster_processed_this_date <- raster_processed_this_date + 1
|
||||
|
||||
}, error = function(e) {
|
||||
# Error in individual field, continue to next
|
||||
|
|
@ -210,24 +222,34 @@ main <- function() {
|
|||
})
|
||||
}
|
||||
|
||||
# Increment success counter if at least one field succeeded
|
||||
if (fields_processed_this_date > 0) {
|
||||
total_success <<- total_success + 1
|
||||
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
|
||||
total_processed_dates <- total_processed_dates + 1
|
||||
safe_log(sprintf(
|
||||
" %s: Processed %d fields (%d CI TIFFs created, %d RDS-only regenerated)",
|
||||
date_str,
|
||||
fields_processed_this_date,
|
||||
raster_processed_this_date,
|
||||
rds_only_processed
|
||||
))
|
||||
} else {
|
||||
total_error <- total_error + 1
|
||||
safe_log(sprintf(" %s: No field outputs were created; see warnings above", date_str), "ERROR")
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
total_error <<- total_error + 1
|
||||
total_error <- total_error + 1
|
||||
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
|
||||
})
|
||||
}
|
||||
|
||||
# Summary
|
||||
safe_log(sprintf("\n=== Processing Complete ==="))
|
||||
safe_log(sprintf("Successfully processed: %d", total_success))
|
||||
safe_log(sprintf("Dates processed: %d", total_processed_dates))
|
||||
safe_log(sprintf("Dates skipped (missing merged_tif): %d", total_skipped_dates))
|
||||
safe_log(sprintf("Dates already complete: %d", total_already_complete_dates))
|
||||
safe_log(sprintf("Errors encountered: %d", total_error))
|
||||
|
||||
if (total_success > 0) {
|
||||
if (total_processed_dates > 0) {
|
||||
safe_log("Output files created in:")
|
||||
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
|
||||
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))
|
||||
|
|
|
|||
|
|
@ -56,7 +56,6 @@ suppressPackageStartupMessages({
|
|||
# Visualization
|
||||
library(tmap) # For interactive maps (field boundary visualization)
|
||||
library(ggspatial) # For basemap tiles and spatial annotations (OSM basemap with ggplot2)
|
||||
|
||||
# Reporting
|
||||
library(knitr) # For R Markdown document generation (code execution and output)
|
||||
library(flextable) # For formatted tables in Word output (professional table styling)
|
||||
|
|
@ -84,6 +83,16 @@ tryCatch({
|
|||
})
|
||||
})
|
||||
|
||||
tryCatch({
|
||||
source("90_rainfall_utils.R")
|
||||
}, error = function(e) {
|
||||
tryCatch({
|
||||
source(here::here("r_app", "90_rainfall_utils.R"))
|
||||
}, error = function(e) {
|
||||
message("Could not load 90_rainfall_utils.R - rain overlay disabled: ", e$message)
|
||||
})
|
||||
})
|
||||
|
||||
```
|
||||
|
||||
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
|
||||
|
|
@ -1184,6 +1193,46 @@ tryCatch({
|
|||
|
||||
# load_per_field_mosaic() is defined in 90_report_utils.R (sourced above)
|
||||
|
||||
# --- Fetch rain for all fields (one API call per 0.25° tile) ---
|
||||
all_rain_data <- NULL
|
||||
if (exists("rain_fetch_for_fields") && !is.null(CI_quadrant) && nrow(CI_quadrant) > 0) {
|
||||
tryCatch({
|
||||
# Compute field centroids from AllPivots0 (already-loaded sf object)
|
||||
field_centroids <- AllPivots0 %>%
|
||||
dplyr::group_by(field) %>%
|
||||
dplyr::summarise(geometry = sf::st_union(geometry), .groups = "drop") %>%
|
||||
sf::st_centroid() %>%
|
||||
dplyr::mutate(
|
||||
longitude = sf::st_coordinates(geometry)[, 1],
|
||||
latitude = sf::st_coordinates(geometry)[, 2]
|
||||
) %>%
|
||||
sf::st_drop_geometry() %>%
|
||||
dplyr::select(field_id = field, latitude, longitude)
|
||||
|
||||
# Determine current-season date range across all fields
|
||||
rain_start <- CI_quadrant %>%
|
||||
dplyr::group_by(field) %>%
|
||||
dplyr::filter(season == max(season)) %>%
|
||||
dplyr::summarise(s = min(Date, na.rm = TRUE), .groups = "drop") %>%
|
||||
dplyr::pull(s) %>%
|
||||
min(na.rm = TRUE)
|
||||
rain_end <- as.Date(today)
|
||||
|
||||
message(paste("Fetching rain data from", rain_start, "to", rain_end,
|
||||
"for", nrow(field_centroids), "fields"))
|
||||
|
||||
all_rain_data <- rain_fetch_for_fields(
|
||||
centroids_df = field_centroids,
|
||||
start_date = rain_start,
|
||||
end_date = rain_end
|
||||
)
|
||||
message(paste("Rain data fetched:", nrow(all_rain_data), "rows"))
|
||||
}, error = function(e) {
|
||||
message("Rain fetch failed - overlay disabled: ", e$message)
|
||||
all_rain_data <<- NULL
|
||||
})
|
||||
}
|
||||
|
||||
# Iterate through fields using purrr::walk
|
||||
purrr::walk(AllPivots_merged$field, function(field_name) {
|
||||
tryCatch({
|
||||
|
|
@ -1249,6 +1298,14 @@ tryCatch({
|
|||
|
||||
# Call cum_ci_plot for trend analysis
|
||||
if (!is.null(CI_quadrant)) {
|
||||
field_rain <- if (!is.null(all_rain_data)) {
|
||||
all_rain_data %>%
|
||||
dplyr::filter(field_id == field_name) %>%
|
||||
dplyr::select(date, rain_mm)
|
||||
} else {
|
||||
NULL
|
||||
}
|
||||
|
||||
cum_ci_plot(
|
||||
pivotName = field_name,
|
||||
ci_quadrant_data = ci_quadrant_data,
|
||||
|
|
@ -1259,7 +1316,8 @@ tryCatch({
|
|||
show_benchmarks = TRUE,
|
||||
estate_name = project_dir,
|
||||
benchmark_percentiles = c(10, 50, 90),
|
||||
benchmark_data = benchmarks
|
||||
benchmark_data = benchmarks,
|
||||
rain_data = field_rain
|
||||
)
|
||||
#cat("\n")
|
||||
}
|
||||
|
|
|
|||
369
r_app/90_rainfall_utils.R
Normal file
369
r_app/90_rainfall_utils.R
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
# 90_RAINFALL_UTILS.R
|
||||
# ============================================================================
|
||||
# Rainfall data fetching and ggplot overlay utilities for SmartCane CI reports.
|
||||
#
|
||||
# Provider-agnostic design: to swap weather provider, implement a new
|
||||
# rain_fetch_<provider>() function and update the dispatch in rain_fetch().
|
||||
#
|
||||
# EXPORTS:
|
||||
# - rain_snap_to_tile() Snap lat/lon to Open-Meteo native 0.25° grid
|
||||
# - rain_fetch() Generic fetch wrapper (dispatches to provider)
|
||||
# - rain_fetch_for_fields() Batch fetch with spatial tile deduplication
|
||||
# - rain_join_to_ci() Join rain to latest-season CI data by Date
|
||||
# - rain_add_to_plot() Overlay rain on single CI plot (abs or cum)
|
||||
# - rain_add_to_faceted_plot() Overlay rain on faceted "both" CI plot
|
||||
# ============================================================================
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(httr2)
|
||||
library(purrr)
|
||||
})
|
||||
|
||||
# ============================================================================
|
||||
# PROVIDER LAYER — swap here to change data source
|
||||
# ============================================================================
|
||||
|
||||
#' Fetch daily precipitation from Open-Meteo ERA5 archive
|
||||
#'
|
||||
#' ERA5 covers 1940-present at 0.25° (~28 km) resolution.
|
||||
#' No API key required.
|
||||
#'
|
||||
#' @param latitude Numeric. WGS84 latitude.
|
||||
#' @param longitude Numeric. WGS84 longitude.
|
||||
#' @param start_date Date or "YYYY-MM-DD" string.
|
||||
#' @param end_date Date or "YYYY-MM-DD" string.
|
||||
#' @return data.frame with columns: date (Date), rain_mm (numeric)
|
||||
rain_fetch_open_meteo_archive <- function(latitude, longitude, start_date, end_date) {
|
||||
url <- paste0(
|
||||
"https://archive-api.open-meteo.com/v1/archive",
|
||||
"?latitude=", latitude,
|
||||
"&longitude=", longitude,
|
||||
"&daily=precipitation_sum",
|
||||
"&start_date=", format(as.Date(start_date), "%Y-%m-%d"),
|
||||
"&end_date=", format(as.Date(end_date), "%Y-%m-%d"),
|
||||
"&timezone=auto"
|
||||
)
|
||||
|
||||
resp <- tryCatch(
|
||||
httr2::request(url) %>% httr2::req_timeout(30) %>% httr2::req_perform(),
|
||||
error = function(e) stop(paste("Rain archive request failed:", e$message), call. = FALSE)
|
||||
)
|
||||
|
||||
if (httr2::resp_status(resp) != 200) {
|
||||
stop(paste("Rain archive API returned status", httr2::resp_status(resp)), call. = FALSE)
|
||||
}
|
||||
|
||||
body <- httr2::resp_body_json(resp)
|
||||
|
||||
if (is.null(body$daily) || is.null(body$daily$time)) {
|
||||
stop("Unexpected Open-Meteo archive response: missing daily/time.", call. = FALSE)
|
||||
}
|
||||
|
||||
data.frame(
|
||||
date = as.Date(unlist(body$daily$time)),
|
||||
rain_mm = as.numeric(unlist(body$daily$precipitation_sum)),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# GENERIC FETCH WRAPPER
|
||||
# ============================================================================
|
||||
|
||||
#' Fetch daily precipitation via the configured provider
|
||||
#'
|
||||
#' @param latitude Numeric.
|
||||
#' @param longitude Numeric.
|
||||
#' @param start_date Date or "YYYY-MM-DD".
|
||||
#' @param end_date Date or "YYYY-MM-DD".
|
||||
#' @param provider Character. Currently only "open_meteo_archive".
|
||||
#' @return data.frame(date, rain_mm)
|
||||
rain_fetch <- function(latitude, longitude, start_date, end_date,
|
||||
provider = "open_meteo_archive") {
|
||||
# --- ADD NEW PROVIDERS HERE ---
|
||||
switch(provider,
|
||||
open_meteo_archive = rain_fetch_open_meteo_archive(
|
||||
latitude, longitude, start_date, end_date
|
||||
),
|
||||
stop(paste("Unknown rain provider:", provider), call. = FALSE)
|
||||
)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# SPATIAL HELPERS
|
||||
# ============================================================================
|
||||
|
||||
#' Snap a coordinate to the nearest ERA5 tile centre
|
||||
#'
|
||||
#' Open-Meteo ERA5 has 0.25° native resolution (~28 km). Snapping coordinates
|
||||
#' to this grid avoids redundant API calls for nearby fields.
|
||||
#'
|
||||
#' @param latitude Numeric.
|
||||
#' @param longitude Numeric.
|
||||
#' @param resolution Numeric. Grid resolution in degrees (default 0.25).
|
||||
#' @return Named list: tile_lat, tile_lon, tile_id
|
||||
rain_snap_to_tile <- function(latitude, longitude, resolution = 0.25) {
|
||||
tile_lat <- round(latitude / resolution) * resolution
|
||||
tile_lon <- round(longitude / resolution) * resolution
|
||||
list(
|
||||
tile_lat = tile_lat,
|
||||
tile_lon = tile_lon,
|
||||
tile_id = paste0(tile_lat, "_", tile_lon)
|
||||
)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# BATCH FETCH WITH TILE DEDUPLICATION
|
||||
# ============================================================================
|
||||
|
||||
#' Fetch rain for multiple fields, calling the API once per unique 0.25° tile
|
||||
#'
|
||||
#' Fields that fall on the same ERA5 tile share one API call. This handles
|
||||
#' estates spread over 100+ km (e.g. Angata) without over-calling the API.
|
||||
#'
|
||||
#' @param centroids_df data.frame with columns: field_id, latitude, longitude
|
||||
#' @param start_date Date or "YYYY-MM-DD". Start of fetch window.
|
||||
#' @param end_date Date or "YYYY-MM-DD". End of fetch window.
|
||||
#' @param provider Character. Passed to rain_fetch().
|
||||
#' @return data.frame with columns: field_id, date, rain_mm
|
||||
rain_fetch_for_fields <- function(centroids_df, start_date, end_date,
|
||||
provider = "open_meteo_archive") {
|
||||
required_cols <- c("field_id", "latitude", "longitude")
|
||||
if (!all(required_cols %in% names(centroids_df))) {
|
||||
stop(paste("centroids_df must have:", paste(required_cols, collapse = ", ")), call. = FALSE)
|
||||
}
|
||||
|
||||
# Snap each field to its tile
|
||||
centroids_tiled <- centroids_df %>%
|
||||
dplyr::mutate(
|
||||
tile_info = purrr::map2(latitude, longitude, rain_snap_to_tile),
|
||||
tile_lat = purrr::map_dbl(tile_info, "tile_lat"),
|
||||
tile_lon = purrr::map_dbl(tile_info, "tile_lon"),
|
||||
tile_id = purrr::map_chr(tile_info, "tile_id")
|
||||
) %>%
|
||||
dplyr::select(-tile_info)
|
||||
|
||||
# Fetch once per unique tile
|
||||
unique_tiles <- centroids_tiled %>%
|
||||
dplyr::distinct(tile_id, tile_lat, tile_lon)
|
||||
|
||||
tile_rain <- unique_tiles %>%
|
||||
dplyr::mutate(rain_data = purrr::pmap(
|
||||
list(tile_lat, tile_lon),
|
||||
function(lat, lon) {
|
||||
tryCatch(
|
||||
rain_fetch(lat, lon, start_date, end_date, provider = provider),
|
||||
error = function(e) {
|
||||
safe_log(paste0("Rain fetch failed for tile ", lat, "_", lon, ": ", e$message), "WARNING")
|
||||
data.frame(date = as.Date(character(0)), rain_mm = numeric(0))
|
||||
}
|
||||
)
|
||||
}
|
||||
))
|
||||
|
||||
# Join tile rain back to fields and unnest
|
||||
centroids_tiled %>%
|
||||
dplyr::left_join(tile_rain %>% dplyr::select(tile_id, rain_data), by = "tile_id") %>%
|
||||
dplyr::select(field_id, rain_data) %>%
|
||||
tidyr::unnest(rain_data)
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PROCESSING
|
||||
# ============================================================================
|
||||
|
||||
#' Join rain data to latest-season CI data by Date
|
||||
#'
|
||||
#' @param rain_df data.frame(date, rain_mm) for the field.
|
||||
#' @param ci_season_data data.frame with at minimum: Date, DAH, week columns
|
||||
#' (latest season only, one row per date).
|
||||
#' @return data.frame with Date, DAH, week, rain_mm, cum_rain_mm columns,
|
||||
#' ordered by Date. Only dates present in ci_season_data are returned.
|
||||
rain_join_to_ci <- function(rain_df, ci_season_data) {
|
||||
if (is.null(rain_df) || nrow(rain_df) == 0) return(NULL)
|
||||
|
||||
ci_dates <- ci_season_data %>%
|
||||
dplyr::distinct(Date, DAH, week) %>%
|
||||
dplyr::arrange(Date)
|
||||
|
||||
joined <- ci_dates %>%
|
||||
dplyr::left_join(
|
||||
rain_df %>% dplyr::rename(Date = date),
|
||||
by = "Date"
|
||||
) %>%
|
||||
dplyr::mutate(
|
||||
rain_mm = tidyr::replace_na(rain_mm, 0),
|
||||
cum_rain_mm = cumsum(rain_mm)
|
||||
)
|
||||
|
||||
joined
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PLOT OVERLAY — single plot (absolute or cumulative)
|
||||
# ============================================================================
|
||||
|
||||
#' Add a rainfall overlay to a single-panel CI ggplot
|
||||
#'
|
||||
#' For absolute CI (mean_rolling_10_days): adds daily precipitation bars
|
||||
#' (steelblue, semi-transparent) scaled to the primary y-axis range, with a
|
||||
#' secondary right y-axis labelled in mm/day.
|
||||
#'
|
||||
#' For cumulative CI: adds a cumulative precipitation filled area scaled to the
|
||||
#' primary y-axis range, with a secondary right y-axis labelled in mm.
|
||||
#'
|
||||
#' The secondary axis is a linear transform of the primary axis (ggplot2
|
||||
#' requirement). The rain values are scaled so the maximum rain never exceeds
|
||||
#' y_max on screen. The right axis labels show the real mm values.
|
||||
#'
|
||||
#' @param g ggplot object.
|
||||
#' @param rain_joined Result of rain_join_to_ci(). NULL returns g unchanged.
|
||||
#' @param ci_type "mean_rolling_10_days" or "cumulative_CI".
|
||||
#' @param x_var Character: "DAH", "week", or "Date".
|
||||
#' @param y_max Numeric. Max value of the primary y-axis (used for scaling
|
||||
#' and for setting limits = c(0, y_max) on the primary axis).
|
||||
#' @return Modified ggplot object.
|
||||
rain_add_to_plot <- function(g, rain_joined, ci_type, x_var, y_max) {
|
||||
if (is.null(rain_joined) || nrow(rain_joined) == 0) {
|
||||
# Still enforce y limits even without rain
|
||||
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
|
||||
}
|
||||
|
||||
if (ci_type == "mean_rolling_10_days") {
|
||||
# --- Daily precipitation bars ---
|
||||
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
|
||||
if (is.na(max_rain) || max_rain == 0) {
|
||||
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
|
||||
}
|
||||
|
||||
scale_factor <- y_max / max_rain
|
||||
|
||||
bar_width <- switch(x_var,
|
||||
"DAH" = 1,
|
||||
"week" = 0.5,
|
||||
1 # Date: 1 day width handled by ggplot
|
||||
)
|
||||
|
||||
g +
|
||||
ggplot2::geom_col(
|
||||
data = rain_joined,
|
||||
ggplot2::aes(x = .data[[x_var]], y = rain_mm * scale_factor),
|
||||
fill = "steelblue",
|
||||
alpha = 0.65,
|
||||
width = bar_width,
|
||||
inherit.aes = FALSE
|
||||
) +
|
||||
ggplot2::scale_y_continuous(
|
||||
limits = c(0, y_max),
|
||||
sec.axis = ggplot2::sec_axis(
|
||||
~ . / scale_factor,
|
||||
name = tr_key("lbl_rain_mm_day", "Precipitation (mm/day)")
|
||||
)
|
||||
)
|
||||
|
||||
} else if (ci_type == "cumulative_CI") {
|
||||
# --- Cumulative precipitation area ---
|
||||
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
|
||||
if (is.na(max_cum_rain) || max_cum_rain == 0) {
|
||||
return(g)
|
||||
}
|
||||
|
||||
scale_factor <- (y_max * 0.30) / max_cum_rain
|
||||
|
||||
g +
|
||||
ggplot2::geom_line(
|
||||
data = rain_joined,
|
||||
ggplot2::aes(x = .data[[x_var]], y = cum_rain_mm * scale_factor),
|
||||
color = "steelblue",
|
||||
linewidth = 0.8,
|
||||
inherit.aes = FALSE
|
||||
) +
|
||||
ggplot2::scale_y_continuous(
|
||||
limits = c(0, y_max),
|
||||
sec.axis = ggplot2::sec_axis(
|
||||
~ . / scale_factor,
|
||||
name = tr_key("lbl_cum_rain_mm", "Cumulative Rain (mm)")
|
||||
)
|
||||
)
|
||||
|
||||
} else {
|
||||
g
|
||||
}
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PLOT OVERLAY — faceted "both" plot
|
||||
# ============================================================================
|
||||
|
||||
#' Add rainfall overlays to a faceted (plot_type = "both") CI ggplot
|
||||
#'
|
||||
#' ggplot2 does not support per-facet secondary axes with free_y scales.
|
||||
#' This function adds the rain layers to the correct facets by setting
|
||||
#' ci_type_label to match the facet strip, but omits secondary axis labels
|
||||
#' to avoid misleading scale information across facets.
|
||||
#'
|
||||
#' Rolling mean facet receives: daily precipitation bars.
|
||||
#' Cumulative facet receives: cumulative precipitation filled area.
|
||||
#'
|
||||
#' @param g ggplot object with facet_wrap(~ci_type_label).
|
||||
#' @param rain_joined Result of rain_join_to_ci().
|
||||
#' @param rolling_mean_label Character. Must match the strip label for the
|
||||
#' rolling mean facet (from tr_key).
|
||||
#' @param cumulative_label Character. Must match the strip label for the
|
||||
#' cumulative CI facet (from tr_key).
|
||||
#' @param x_var Character: "DAH", "week", or "Date".
|
||||
#' @param y_max_abs Numeric. Primary y-axis max for rolling mean facet (typically 7).
|
||||
#' @param y_max_cum Numeric. Primary y-axis max for cumulative CI facet.
|
||||
#' @return Modified ggplot object.
|
||||
rain_add_to_faceted_plot <- function(g, rain_joined, rolling_mean_label, cumulative_label,
|
||||
x_var, y_max_abs, y_max_cum) {
|
||||
if (is.null(rain_joined) || nrow(rain_joined) == 0) return(g)
|
||||
|
||||
# --- Bars in rolling mean facet ---
|
||||
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
|
||||
if (!is.na(max_rain) && max_rain > 0) {
|
||||
scale_abs <- y_max_abs / max_rain
|
||||
bar_width <- switch(x_var, "DAH" = 1, "week" = 0.5, 1)
|
||||
|
||||
rain_bars <- rain_joined %>%
|
||||
dplyr::mutate(
|
||||
ci_type_label = rolling_mean_label,
|
||||
y_scaled = rain_mm * scale_abs
|
||||
)
|
||||
|
||||
g <- g +
|
||||
ggplot2::geom_col(
|
||||
data = rain_bars,
|
||||
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
|
||||
fill = "steelblue",
|
||||
alpha = 0.65,
|
||||
width = bar_width,
|
||||
inherit.aes = FALSE
|
||||
)
|
||||
}
|
||||
|
||||
# --- Area in cumulative facet ---
|
||||
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
|
||||
if (!is.na(max_cum_rain) && max_cum_rain > 0) {
|
||||
scale_cum <- (y_max_cum * 0.30) / max_cum_rain
|
||||
|
||||
rain_area <- rain_joined %>%
|
||||
dplyr::mutate(
|
||||
ci_type_label = cumulative_label,
|
||||
y_scaled = cum_rain_mm * scale_cum
|
||||
)
|
||||
|
||||
g <- g +
|
||||
ggplot2::geom_line(
|
||||
data = rain_area,
|
||||
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
|
||||
color = "steelblue",
|
||||
linewidth = 0.8,
|
||||
inherit.aes = FALSE
|
||||
)
|
||||
}
|
||||
|
||||
g
|
||||
}
|
||||
|
|
@ -13,8 +13,15 @@
|
|||
#'
|
||||
subchunkify <- function(g, fig_height=7, fig_width=5) {
|
||||
g_deparsed <- paste0(deparse(
|
||||
function() {g}
|
||||
), collapse = '')
|
||||
function() {
|
||||
if (inherits(g, c("gtable", "grob", "gTree"))) {
|
||||
grid::grid.newpage()
|
||||
grid::grid.draw(g)
|
||||
} else {
|
||||
print(g)
|
||||
}
|
||||
}
|
||||
), collapse = '\n')
|
||||
|
||||
sub_chunk <- paste0("
|
||||
`","``{r sub_chunk_", floor(runif(1) * 10000), ", fig.height=", fig_height, ", fig.width=", fig_width, ", dpi=300, dev='png', out.width='100%', echo=FALSE}",
|
||||
|
|
@ -394,9 +401,11 @@ ci_plot <- function(pivotName,
|
|||
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
|
||||
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
|
||||
#' @param benchmark_percentiles Vector of percentiles for benchmarks (default: c(10, 50, 90))
|
||||
#' @param rain_data data.frame(date, rain_mm) for this field's current season,
|
||||
#' as returned by rain_fetch_for_fields() / rain_join_to_ci(). NULL disables overlay.
|
||||
#' @return NULL (adds output directly to R Markdown document)
|
||||
#'
|
||||
cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "absolute", facet_on = FALSE, x_unit = "days", colorblind_friendly = FALSE, show_benchmarks = FALSE, estate_name = NULL, benchmark_percentiles = c(10, 50, 90), benchmark_data = NULL) {
|
||||
cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "absolute", facet_on = FALSE, x_unit = "days", colorblind_friendly = FALSE, show_benchmarks = FALSE, estate_name = NULL, benchmark_percentiles = c(10, 50, 90), benchmark_data = NULL, rain_data = NULL) {
|
||||
# Input validation
|
||||
if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
|
||||
stop("pivotName is required")
|
||||
|
|
@ -638,9 +647,23 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
||||
}
|
||||
|
||||
# Add y-axis limits for absolute CI (10-day rolling mean) to fix scale at 0-7
|
||||
if (ci_type_filter == "mean_rolling_10_days") {
|
||||
g <- g + ggplot2::ylim(0, 7)
|
||||
# Y-axis limits + optional rain overlay (single-panel plots)
|
||||
y_max <- if (ci_type_filter == "mean_rolling_10_days") {
|
||||
7
|
||||
} else {
|
||||
max(plot_data$ci_value, na.rm = TRUE) * 1.05
|
||||
}
|
||||
|
||||
if (!is.null(rain_data)) {
|
||||
latest_ci_dates <- plot_data %>%
|
||||
dplyr::filter(is_latest) %>%
|
||||
dplyr::distinct(Date, DAH, week)
|
||||
rain_joined <- rain_join_to_ci(rain_data, latest_ci_dates)
|
||||
g <- rain_add_to_plot(g, rain_joined, ci_type_filter, x_var, y_max)
|
||||
} else {
|
||||
if (ci_type_filter == "mean_rolling_10_days") {
|
||||
g <- g + ggplot2::ylim(0, y_max)
|
||||
}
|
||||
}
|
||||
|
||||
return(g)
|
||||
|
|
@ -654,174 +677,20 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
|||
g <- create_plot("cumulative_CI", cumulative_label, "")
|
||||
subchunkify(g, 2.8, 10)
|
||||
} else if (plot_type == "both") {
|
||||
# Create faceted plot with both CI types using pivot_longer approach
|
||||
plot_data_both <- data_ci3 %>%
|
||||
dplyr::filter(season %in% unique_seasons) %>%
|
||||
dplyr::mutate(
|
||||
ci_type_label = factor(case_when(
|
||||
ci_type == "mean_rolling_10_days" ~ rolling_mean_label,
|
||||
ci_type == "cumulative_CI" ~ cumulative_label,
|
||||
TRUE ~ ci_type
|
||||
), levels = c(rolling_mean_label, cumulative_label)),
|
||||
is_latest = season == latest_season # Flag for latest season
|
||||
)
|
||||
|
||||
# Determine x-axis variable based on x_unit parameter
|
||||
x_var <- if (x_unit == "days") {
|
||||
if (facet_on) "Date" else "DAH"
|
||||
} else {
|
||||
"week"
|
||||
}
|
||||
|
||||
x_label <- switch(x_unit,
|
||||
"days" = if (facet_on) tr_key("lbl_date", "Date") else tr_key("lbl_age_of_crop_days", "Age of Crop (Days)"),
|
||||
"weeks" = tr_key("lbl_week_number", "Week Number"))
|
||||
|
||||
# Choose color palette based on colorblind_friendly flag
|
||||
color_scale <- if (colorblind_friendly) {
|
||||
ggplot2::scale_color_brewer(type = "qual", palette = "Set2")
|
||||
} else {
|
||||
ggplot2::scale_color_discrete()
|
||||
}
|
||||
|
||||
# Calculate dynamic max values for breaks
|
||||
max_dah_both <- max(plot_data_both$DAH, na.rm = TRUE) + 20
|
||||
max_week_both <- max(as.numeric(plot_data_both$week), na.rm = TRUE) + ceiling(20 / 7)
|
||||
|
||||
# Pre-evaluate translated title here (not inside labs()) so {pivotName} resolves correctly
|
||||
# Build each panel independently so each gets its own secondary rain y-axis.
|
||||
# (facet_wrap + free_y does not support per-facet sec.axis in ggplot2.)
|
||||
both_plot_title <- tr_key("lbl_ci_analysis_title", "CI Analysis for Field {pivotName}")
|
||||
|
||||
# Create the faceted plot
|
||||
g_both <- ggplot2::ggplot(data = plot_data_both) +
|
||||
# Add benchmark lines first (behind season lines)
|
||||
{
|
||||
if (!is.null(benchmark_data)) {
|
||||
# Clip benchmark to max DAH of plotted seasons + 10% buffer
|
||||
max_dah_clip <- max(plot_data_both$DAH, na.rm = TRUE) * 1.1
|
||||
benchmark_subset <- benchmark_data %>%
|
||||
dplyr::filter(DAH <= max_dah_clip) %>%
|
||||
dplyr::mutate(
|
||||
benchmark_x = if (x_var == "DAH") {
|
||||
DAH
|
||||
} else if (x_var == "week") {
|
||||
DAH / 7
|
||||
} else {
|
||||
DAH
|
||||
},
|
||||
ci_type_label = factor(case_when(
|
||||
ci_type == "value" ~ rolling_mean_label,
|
||||
ci_type == "cumulative_CI" ~ cumulative_label,
|
||||
TRUE ~ ci_type
|
||||
), levels = c(rolling_mean_label, cumulative_label))
|
||||
)
|
||||
ggplot2::geom_smooth(
|
||||
data = benchmark_subset,
|
||||
ggplot2::aes(
|
||||
x = .data[["benchmark_x"]],
|
||||
y = .data[["benchmark_value"]],
|
||||
group = factor(.data[["percentile"]])
|
||||
),
|
||||
color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE, fullrange = FALSE
|
||||
)
|
||||
}
|
||||
} +
|
||||
ggplot2::facet_wrap(~ci_type_label, scales = "free_y") +
|
||||
# Plot older seasons with lighter lines
|
||||
ggplot2::geom_line(
|
||||
data = plot_data_both %>% dplyr::filter(!is_latest),
|
||||
ggplot2::aes(
|
||||
x = .data[[x_var]],
|
||||
y = .data[["ci_value"]],
|
||||
col = .data[["season"]],
|
||||
group = .data[["season"]]
|
||||
),
|
||||
linewidth = 0.7, alpha = 0.4
|
||||
) +
|
||||
# Plot latest season with thicker, more prominent line
|
||||
ggplot2::geom_line(
|
||||
data = plot_data_both %>% dplyr::filter(is_latest),
|
||||
ggplot2::aes(
|
||||
x = .data[[x_var]],
|
||||
y = .data[["ci_value"]],
|
||||
col = .data[["season"]],
|
||||
group = .data[["season"]]
|
||||
),
|
||||
linewidth = 1.5, alpha = 1
|
||||
) +
|
||||
ggplot2::labs(title = both_plot_title,
|
||||
color = tr_key("lbl_season", "Season"),
|
||||
y = tr_key("lbl_ci_value", "CI Value"),
|
||||
x = x_label) +
|
||||
color_scale +
|
||||
{
|
||||
if (x_var == "DAH") {
|
||||
# Dynamic breaks based on actual data range
|
||||
dah_breaks <- scales::pretty_breaks(n = 5)(c(0, max_dah_both))
|
||||
# Month breaks: in secondary axis scale (month numbers 0, 1, 2, ...)
|
||||
n_months <- ceiling(max_dah_both / 30.44)
|
||||
month_breaks <- seq(0, n_months, by = 1)
|
||||
ggplot2::scale_x_continuous(
|
||||
breaks = dah_breaks,
|
||||
sec.axis = ggplot2::sec_axis(
|
||||
~ . / 30.44,
|
||||
name = tr_key("lbl_age_in_months", "Age in Months"),
|
||||
breaks = month_breaks,
|
||||
labels = function(x) as.integer(x) # Show all month labels
|
||||
)
|
||||
)
|
||||
} else if (x_var == "week") {
|
||||
# Dynamic breaks based on actual data range
|
||||
week_breaks <- scales::pretty_breaks(n = 5)(c(0, max_week_both))
|
||||
# Month breaks: in secondary axis scale (month numbers 0, 1, 2, ...)
|
||||
n_months <- ceiling(max_week_both / 4.348)
|
||||
month_breaks <- seq(0, n_months, by = 1)
|
||||
ggplot2::scale_x_continuous(
|
||||
breaks = week_breaks,
|
||||
sec.axis = ggplot2::sec_axis(
|
||||
~ . / 4.348,
|
||||
name = tr_key("lbl_age_in_months", "Age in Months"),
|
||||
breaks = month_breaks,
|
||||
labels = function(x) as.integer(x) # Show all month labels
|
||||
)
|
||||
)
|
||||
} else if (x_var == "Date") {
|
||||
ggplot2::scale_x_date(breaks = "1 month", date_labels = "%b-%Y", sec.axis = ggplot2::sec_axis(~ ., name = tr_key("lbl_age_in_months", "Age in Months"), breaks = scales::breaks_pretty()))
|
||||
}
|
||||
} +
|
||||
ggplot2::theme_minimal() +
|
||||
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
|
||||
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
|
||||
axis.title.x.top = ggplot2::element_text(size = 8),
|
||||
legend.justification = c(1, 0),
|
||||
legend.position = "inside",
|
||||
legend.position.inside = c(1, 0),
|
||||
legend.title = ggplot2::element_text(size = 8),
|
||||
legend.text = ggplot2::element_text(size = 8)) +
|
||||
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
||||
g_abs <- create_plot("mean_rolling_10_days", rolling_mean_label, "") +
|
||||
ggplot2::labs(title = rolling_mean_label) +
|
||||
ggplot2::theme(legend.position = "none")
|
||||
|
||||
# For the rolling mean data, we want to set reasonable y-axis limits
|
||||
# Since we're using free_y scales, each facet will have its own y-axis
|
||||
# The rolling mean will automatically scale to its data range,
|
||||
# but we can ensure it shows the 0-7 context by adding invisible points
|
||||
g_cum <- create_plot("cumulative_CI", cumulative_label, "") +
|
||||
ggplot2::labs(title = cumulative_label)
|
||||
|
||||
# Add invisible points to set the y-axis range for rolling mean facet
|
||||
dummy_data <- data.frame(
|
||||
ci_type_label = rolling_mean_label,
|
||||
ci_value = c(0, 7),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
dummy_data[[x_var]] <- range(plot_data_both[[x_var]], na.rm = TRUE)
|
||||
dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]]))
|
||||
combined <- gridExtra::arrangeGrob(g_abs, g_cum, ncol = 2, top = both_plot_title)
|
||||
|
||||
g_both <- g_both +
|
||||
ggplot2::geom_point(
|
||||
data = dummy_data,
|
||||
ggplot2::aes(x = .data[[x_var]], y = .data[["ci_value"]]),
|
||||
alpha = 0, size = 0
|
||||
) # Invisible points to set scale
|
||||
|
||||
# Display the combined faceted plot
|
||||
subchunkify(g_both, 2.8, 10)
|
||||
subchunkify(combined, 2.8, 10)
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue