Add weather API comparison scripts for precipitation analysis

- Implemented `weather_api_comparison.py` to compare daily precipitation from multiple weather APIs for Arnhem, Netherlands and Angata, Kenya.
- Integrated fetching functions for various weather data sources including Open-Meteo, NASA POWER, OpenWeatherMap, and WeatherAPI.com.
- Added plotting functions to visualize archive and forecast data, including cumulative precipitation and comparison against ERA5 reference.
- Created `90_rainfall_utils.R` for R to fetch rainfall data and overlay it on CI plots, supporting multiple providers with a generic fetch wrapper.
- Included spatial helpers for efficient API calls based on unique geographical tiles.
This commit is contained in:
Timon 2026-03-12 17:30:01 +01:00
parent 003bb8255e
commit 1da5c0d0a7
5 changed files with 1105 additions and 220 deletions

View file

@ -0,0 +1,567 @@
"""
weather_api_comparison.py
=========================
Compare daily precipitation from multiple free weather APIs across two locations:
- Arnhem, Netherlands (51.985°N, 5.899°E) European climate
- Angata, Kenya ( 1.330°S, 34.738°E) tropical / sugarcane context
ARCHIVE providers (no API key required):
1. Open-Meteo ERA5 current SmartCane provider (0.25°, global)
2. Open-Meteo ERA5-Land higher resolution variant (0.10°, global)
3. Open-Meteo CERRA EU regional reanalysis (0.05°, EU only)
4. NASA POWER completely independent source (0.50°, global)
FORECAST providers (no API key required):
5. Open-Meteo Forecast ensemble of NWP models (global)
6. YR.no LocationForecast Norwegian Met Institute (~10 days, global)
FORECAST providers (API key required set in CONFIG below, leave "" to skip):
7. OpenWeatherMap free tier, 1000 calls/day
8. WeatherAPI.com free tier
OUTPUT:
Plots saved to: weather_comparison_plots/
Summary stats printed to console.
Usage:
python weather_api_comparison.py
"""
import os
import json
import time
import datetime
import requests
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from pathlib import Path
# ============================================================
# CONFIG
# ============================================================
LOCATIONS = {
"Arnhem_NL": {"lat": 51.985, "lon": 5.899},
"Angata_KE": {"lat": -1.330, "lon": 34.738},
}
# Archive: last 12 months
ARCHIVE_END = datetime.date.today() - datetime.timedelta(days=2) # ERA5 lags ~2 days
ARCHIVE_START = ARCHIVE_END - datetime.timedelta(days=365)
# Forecast: today + 7 days
FORECAST_START = datetime.date.today()
FORECAST_END = FORECAST_START + datetime.timedelta(days=7)
# Optional API keys — leave "" to skip that provider
OPENWEATHERMAP_KEY = "" # https://openweathermap.org/api
WEATHERAPI_KEY = "" # https://www.weatherapi.com/
OUTPUT_DIR = Path("weather_comparison_plots")
OUTPUT_DIR.mkdir(exist_ok=True)
USER_AGENT = "SmartCane-WeatherComparison/1.0 (research; contact via github)"
# ============================================================
# ARCHIVE FETCHERS
# ============================================================
def fetch_openmeteo_archive(lat, lon, start, end, model="era5"):
"""Open-Meteo ERA5 / ERA5-Land / CERRA archive.
ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names.
"""
# ERA5 is the default model — adding models param with wrong casing causes 400
model_suffix = "" if model == "era5" else f"&models={model}"
url = (
f"https://archive-api.open-meteo.com/v1/archive"
f"?latitude={lat}&longitude={lon}"
f"&daily=precipitation_sum"
f"&start_date={start}&end_date={end}"
f"{model_suffix}"
f"&timezone=UTC"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
df = pd.DataFrame({
"date": pd.to_datetime(body["daily"]["time"]),
"rain_mm": body["daily"]["precipitation_sum"],
})
df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").clip(lower=0).fillna(0)
# ERA5-Land sometimes returns values in meters (Open-Meteo API quirk).
# Auto-detect: if annual total < 50mm for any non-polar location, assume m → convert.
if df["rain_mm"].sum() < 50 and len(df) > 30:
df["rain_mm"] = df["rain_mm"] * 1000
print(f" ⚠ Unit auto-converted m→mm (values were implausibly small)")
return df
def fetch_nasa_power(lat, lon, start, end):
"""NASA POWER — daily PRECTOTCORR (precipitation corrected), 0.5° grid."""
url = (
"https://power.larc.nasa.gov/api/temporal/daily/point"
f"?parameters=PRECTOTCORR"
f"&community=AG"
f"&longitude={lon}&latitude={lat}"
f"&start={start.strftime('%Y%m%d')}&end={end.strftime('%Y%m%d')}"
f"&format=JSON"
)
r = requests.get(url, timeout=60)
r.raise_for_status()
body = r.json()
raw = body["properties"]["parameter"]["PRECTOTCORR"]
df = pd.DataFrame([
{"date": pd.to_datetime(k, format="%Y%m%d"), "rain_mm": max(v, 0)}
for k, v in raw.items()
if v != -999 # NASA POWER fill value
])
return df.sort_values("date").reset_index(drop=True)
# ============================================================
# FORECAST FETCHERS
# ============================================================
def fetch_openmeteo_forecast(lat, lon, days=8):
"""Open-Meteo NWP forecast — default best_match model."""
end = datetime.date.today() + datetime.timedelta(days=days)
url = (
f"https://api.open-meteo.com/v1/forecast"
f"?latitude={lat}&longitude={lon}"
f"&daily=precipitation_sum"
f"&forecast_days={days + 1}"
f"&timezone=UTC"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
df = pd.DataFrame({
"date": pd.to_datetime(body["daily"]["time"]),
"rain_mm": body["daily"]["precipitation_sum"],
})
df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").fillna(0)
return df
def fetch_yr_forecast(lat, lon):
"""YR.no LocationForecast 2.0 — hourly precip aggregated to daily."""
url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}"
headers = {"User-Agent": USER_AGENT}
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
body = r.json()
records = []
for entry in body["properties"]["timeseries"]:
ts = pd.to_datetime(entry["time"])
data = entry["data"]
precip = 0.0
if "next_1_hours" in data:
precip = data["next_1_hours"]["details"].get("precipitation_amount", 0.0)
elif "next_6_hours" in data:
precip = data["next_6_hours"]["details"].get("precipitation_amount", 0.0) / 6
records.append({"datetime": ts, "precip_hour": precip})
hourly = pd.DataFrame(records)
hourly["date"] = hourly["datetime"].dt.date
daily = (
hourly.groupby("date")["precip_hour"]
.sum()
.reset_index()
.rename(columns={"precip_hour": "rain_mm"})
)
daily["date"] = pd.to_datetime(daily["date"])
return daily
def fetch_openweathermap_forecast(lat, lon, api_key):
"""OpenWeatherMap One Call 3.0 — daily forecast (needs paid/free key)."""
url = (
f"https://api.openweathermap.org/data/3.0/onecall"
f"?lat={lat}&lon={lon}"
f"&exclude=current,minutely,hourly,alerts"
f"&appid={api_key}&units=metric"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
records = []
for day in body.get("daily", []):
records.append({
"date": pd.to_datetime(day["dt"], unit="s").normalize(),
"rain_mm": day.get("rain", 0.0),
})
return pd.DataFrame(records)
def fetch_weatherapi_forecast(lat, lon, api_key, days=7):
"""WeatherAPI.com free forecast (up to 3 days on free tier, 14 on paid)."""
url = (
f"https://api.weatherapi.com/v1/forecast.json"
f"?key={api_key}&q={lat},{lon}&days={days}&aqi=no&alerts=no"
)
r = requests.get(url, timeout=30)
r.raise_for_status()
body = r.json()
records = []
for day in body["forecast"]["forecastday"]:
records.append({
"date": pd.to_datetime(day["date"]),
"rain_mm": day["day"].get("totalprecip_mm", 0.0),
})
return pd.DataFrame(records)
# ============================================================
# STATS
# ============================================================
def compare_stats(df, ref_col, other_col):
"""Compute MAE, RMSE, bias, Pearson r between two columns."""
valid = df[[ref_col, other_col]].dropna()
if len(valid) < 5:
return {"n": len(valid), "MAE": None, "RMSE": None, "Bias": None, "r": None}
diff = valid[other_col] - valid[ref_col]
mae = diff.abs().mean()
rmse = (diff**2).mean()**0.5
bias = diff.mean()
r = valid[ref_col].corr(valid[other_col])
return {"n": len(valid), "MAE": round(mae,2), "RMSE": round(rmse,2),
"Bias": round(bias,2), "r": round(r,3)}
# ============================================================
# PLOTTING
# ============================================================
def plot_archive(data_dict, location_name, start, end, output_dir):
"""Line plot of all archive providers for one location."""
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)
colors = {
"ERA5 (Open-Meteo)": "#1f77b4",
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
# Top: daily raw
ax1 = axes[0]
for name, df in data_dict.items():
if df is not None and len(df) > 0:
ax1.plot(df["date"], df["rain_mm"], label=name,
color=colors.get(name), linewidth=0.8, alpha=0.85)
ax1.set_ylabel("Precipitation (mm/day)")
ax1.set_title(f"{location_name} — Daily Precipitation (archive {start}{end})")
ax1.legend(fontsize=8)
ax1.grid(True, alpha=0.3)
# Bottom: 30-day rolling mean
ax2 = axes[1]
for name, df in data_dict.items():
if df is not None and len(df) > 0:
rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean()
ax2.plot(rolled.index, rolled.values, label=name,
color=colors.get(name), linewidth=1.5)
ax2.set_ylabel("30-day rolling mean (mm/day)")
ax2.set_xlabel("Date")
ax2.legend(fontsize=8)
ax2.grid(True, alpha=0.3)
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"archive_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
def plot_forecast(data_dict, location_name, output_dir):
"""Bar chart comparing 7-day forecasts across providers."""
fig, ax = plt.subplots(figsize=(12, 5))
providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0]
n = len(providers)
if n == 0:
plt.close()
return
# Collect all forecast dates
all_dates = sorted(set(
d for _, df in providers
for d in df["date"].dt.date.tolist()
))
x = np.arange(len(all_dates))
width = 0.8 / n
cmap = matplotlib.colormaps["tab10"].resampled(n)
for i, (name, df) in enumerate(providers):
vals = []
date_map = dict(zip(df["date"].dt.date, df["rain_mm"]))
for d in all_dates:
vals.append(date_map.get(d, 0.0))
ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85)
ax.set_xticks(x + width * (n - 1) / 2)
ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right")
ax.set_ylabel("Precipitation (mm/day)")
ax.set_title(f"{location_name} — 7-Day Forecast Comparison")
ax.legend(fontsize=9)
ax.grid(True, axis="y", alpha=0.3)
plt.tight_layout()
path = output_dir / f"forecast_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
def plot_vs_era5(data_dict, location_name, output_dir):
"""Each provider vs ERA5 reference: scatter + regression line.
How to read:
- Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip.
- Points on the red diagonal = perfect agreement.
- Points above = provider wetter than ERA5 on that day.
- r = Pearson correlation (1 = perfect). MAE = mean absolute error in mm/day.
- Bias = provider minus ERA5 on average (positive = provider wetter).
"""
ref_name = "ERA5 (Open-Meteo)"
ref_df = data_dict.get(ref_name)
if ref_df is None:
return
others = [(n, df) for n, df in data_dict.items()
if n != ref_name and df is not None and len(df) > 0]
if not others:
return
n = len(others)
fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False)
colors = {
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
for i, (name, df) in enumerate(others):
ax = axes[0][i]
merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
valid = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna()
color = colors.get(name, "steelblue")
ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"],
s=4, alpha=0.35, color=color)
# Perfect-agreement diagonal
lim = max(valid.max().max(), 1) * 1.05
ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement")
# Linear regression line
if len(valid) > 5:
coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1)
x_fit = np.linspace(0, lim, 100)
ax.plot(x_fit, np.polyval(coeffs, x_fit), "k-", linewidth=1,
alpha=0.6, label=f"Regression (slope={coeffs[0]:.2f})")
stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp")
ax.set_xlim(0, lim); ax.set_ylim(0, lim)
ax.set_xlabel("ERA5 (Open-Meteo) mm/day", fontsize=9)
ax.set_ylabel(f"{name} mm/day", fontsize=9)
ax.set_title(
f"{name}\nr={stats['r']} MAE={stats['MAE']} mm Bias={stats['Bias']:+.2f} mm",
fontsize=9
)
ax.legend(fontsize=7)
ax.grid(True, alpha=0.3)
fig.suptitle(
f"{location_name} — Daily Precip vs ERA5 Reference\n"
"Red dashed = perfect agreement. Points above line = provider wetter than ERA5.",
fontsize=10
)
plt.tight_layout()
path = output_dir / f"vs_era5_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
def plot_cumulative(data_dict, location_name, output_dir):
"""Cumulative annual precipitation — most relevant for crop/irrigation context."""
fig, ax = plt.subplots(figsize=(14, 5))
colors = {
"ERA5 (Open-Meteo)": "#1f77b4",
"ERA5-Land (Open-Meteo)": "#ff7f0e",
"CERRA (Open-Meteo)": "#2ca02c",
"NASA POWER": "#d62728",
}
for name, df in data_dict.items():
if df is None or len(df) == 0:
continue
s = df.set_index("date")["rain_mm"].sort_index().cumsum()
total = s.iloc[-1]
ax.plot(s.index, s.values, label=f"{name} (total: {total:.0f} mm)",
color=colors.get(name), linewidth=1.8)
ax.set_ylabel("Cumulative precipitation (mm)")
ax.set_xlabel("Date")
ax.set_title(
f"{location_name} — Cumulative Annual Precipitation by Provider\n"
"Divergence = sources disagree on total seasonal rainfall"
)
ax.legend(fontsize=9)
ax.grid(True, alpha=0.3)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
fig.autofmt_xdate()
plt.tight_layout()
path = output_dir / f"cumulative_{location_name}.png"
plt.savefig(path, dpi=150)
plt.close()
print(f" Saved: {path}")
# ============================================================
# MAIN
# ============================================================
def run_location(loc_name, lat, lon):
print(f"\n{'='*60}")
print(f" {loc_name} ({lat}°, {lon}°)")
print(f"{'='*60}")
# ---- ARCHIVE ----
print("\n[Archive]")
archive_data = {}
print(" Fetching Open-Meteo ERA5...")
try:
archive_data["ERA5 (Open-Meteo)"] = fetch_openmeteo_archive(
lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5"
)
print(f"{len(archive_data['ERA5 (Open-Meteo)'])} days")
except Exception as e:
print(f" ✗ ERA5 failed: {e}")
archive_data["ERA5 (Open-Meteo)"] = None
time.sleep(0.5)
print(" Fetching Open-Meteo ERA5-Land...")
try:
archive_data["ERA5-Land (Open-Meteo)"] = fetch_openmeteo_archive(
lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5_land"
)
print(f"{len(archive_data['ERA5-Land (Open-Meteo)'])} days")
except Exception as e:
print(f" ✗ ERA5-Land failed: {e}")
archive_data["ERA5-Land (Open-Meteo)"] = None
time.sleep(0.5)
# CERRA only covers Europe (roughly 20°W45°E, 30°N80°N)
if -20 <= lon <= 45 and 30 <= lat <= 80:
print(" Fetching Open-Meteo CERRA (EU only)...")
try:
archive_data["CERRA (Open-Meteo)"] = fetch_openmeteo_archive(
lat, lon, ARCHIVE_START, ARCHIVE_END, model="cerra"
)
print(f"{len(archive_data['CERRA (Open-Meteo)'])} days")
except Exception as e:
print(f" ✗ CERRA failed: {e}")
archive_data["CERRA (Open-Meteo)"] = None
else:
print(" Skipping CERRA (outside EU coverage)")
archive_data["CERRA (Open-Meteo)"] = None
time.sleep(0.5)
print(" Fetching NASA POWER...")
try:
archive_data["NASA POWER"] = fetch_nasa_power(lat, lon, ARCHIVE_START, ARCHIVE_END)
print(f"{len(archive_data['NASA POWER'])} days")
except Exception as e:
print(f" ✗ NASA POWER failed: {e}")
archive_data["NASA POWER"] = None
# Stats vs ERA5 reference
print("\n Stats vs ERA5 (Open-Meteo) reference:")
ref_df = archive_data.get("ERA5 (Open-Meteo)")
for name, df in archive_data.items():
if name == "ERA5 (Open-Meteo)" or df is None:
continue
if ref_df is None:
continue
merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp"))
stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp")
print(f" {name:30s} n={stats['n']:4d} MAE={stats['MAE']} "
f"RMSE={stats['RMSE']} Bias={stats['Bias']} r={stats['r']}")
plot_archive(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR)
plot_cumulative(archive_data, loc_name, OUTPUT_DIR)
plot_vs_era5(archive_data, loc_name, OUTPUT_DIR)
# ---- FORECAST ----
print("\n[Forecast]")
forecast_data = {}
print(" Fetching Open-Meteo forecast...")
try:
forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon)
print(f"{len(forecast_data['Open-Meteo Forecast'])} days")
except Exception as e:
print(f" ✗ Open-Meteo forecast failed: {e}")
forecast_data["Open-Meteo Forecast"] = None
time.sleep(0.5)
print(" Fetching YR.no LocationForecast...")
try:
forecast_data["YR.no"] = fetch_yr_forecast(lat, lon)
print(f"{len(forecast_data['YR.no'])} days")
except Exception as e:
print(f" ✗ YR.no failed: {e}")
forecast_data["YR.no"] = None
if OPENWEATHERMAP_KEY:
time.sleep(0.5)
print(" Fetching OpenWeatherMap forecast...")
try:
forecast_data["OpenWeatherMap"] = fetch_openweathermap_forecast(
lat, lon, OPENWEATHERMAP_KEY
)
print(f"{len(forecast_data['OpenWeatherMap'])} days")
except Exception as e:
print(f" ✗ OpenWeatherMap failed: {e}")
forecast_data["OpenWeatherMap"] = None
if WEATHERAPI_KEY:
time.sleep(0.5)
print(" Fetching WeatherAPI.com forecast...")
try:
forecast_data["WeatherAPI.com"] = fetch_weatherapi_forecast(
lat, lon, WEATHERAPI_KEY
)
print(f"{len(forecast_data['WeatherAPI.com'])} days")
except Exception as e:
print(f" ✗ WeatherAPI.com failed: {e}")
forecast_data["WeatherAPI.com"] = None
plot_forecast(forecast_data, loc_name, OUTPUT_DIR)
if __name__ == "__main__":
print(f"Weather API Comparison — {datetime.date.today()}")
print(f"Archive: {ARCHIVE_START}{ARCHIVE_END}")
print(f"Forecast: {FORECAST_START}{FORECAST_END}")
print(f"Output: {OUTPUT_DIR.resolve()}")
for loc_name, coords in LOCATIONS.items():
run_location(loc_name, coords["lat"], coords["lon"])
time.sleep(1)
print(f"\nDone. Plots saved to: {OUTPUT_DIR.resolve()}")

View file

@ -128,23 +128,60 @@ main <- function() {
}
# Process each DATE (load merged TIFF once, extract all fields from it)
total_success <- 0
total_processed_dates <- 0
total_skipped_dates <- 0
total_already_complete_dates <- 0
total_error <- 0
for (date_str in dates_filter) {
# Load the MERGED TIFF (farm-wide) ONCE for this date
input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str))
output_tifs <- file.path(setup$field_tiles_ci_dir, fields, sprintf("%s.tif", date_str))
output_rds <- file.path(setup$daily_ci_vals_dir, fields, sprintf("%s.rds", date_str))
names(output_tifs) <- fields
names(output_rds) <- fields
tif_exists <- file.exists(output_tifs)
rds_exists <- file.exists(output_rds)
fields_need_rds_only <- fields[tif_exists & !rds_exists]
fields_need_raster <- fields[!tif_exists]
if (length(fields_need_rds_only) == 0 && length(fields_need_raster) == 0) {
total_already_complete_dates <- total_already_complete_dates + 1
safe_log(sprintf(" %s: All field outputs already exist (skipping)", date_str))
next
}
fields_processed_this_date <- 0
rds_only_processed <- 0
raster_processed_this_date <- 0
if (length(fields_need_rds_only) > 0) {
for (field in fields_need_rds_only) {
tryCatch({
extract_rds_from_ci_tiff(output_tifs[[field]], output_rds[[field]], field_boundaries_sf, field)
fields_processed_this_date <- fields_processed_this_date + 1
rds_only_processed <- rds_only_processed + 1
}, error = function(e) {
safe_log(sprintf(" Error regenerating RDS for field %s: %s", field, e$message), "WARNING")
})
}
}
if (length(fields_need_raster) == 0) {
total_processed_dates <- total_processed_dates + 1
safe_log(sprintf(" %s: Regenerated %d RDS files from existing CI TIFFs", date_str, rds_only_processed))
next
}
if (!file.exists(input_tif_merged)) {
safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str))
total_error <<- total_error + 1
total_skipped_dates <- total_skipped_dates + 1
next
}
tryCatch({
# Load 4-band TIFF ONCE
# Load the merged TIFF only when at least one field still needs a CI TIFF.
raster_4band <- terra::rast(input_tif_merged)
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields)))
safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields_need_raster)))
# Calculate CI from 4-band
ci_raster <- calc_ci_from_raster(raster_4band)
@ -154,44 +191,18 @@ main <- function() {
five_band <- c(raster_4band, ci_raster)
names(five_band) <- c("Red", "Green", "Blue", "NIR", "CI")
# Now process all fields from this single merged TIFF
fields_processed_this_date <- 0
# Now process only the fields that still need CI TIFF output for this date.
for (field in fields_need_raster) {
output_tif_path <- output_tifs[[field]]
output_rds_path <- output_rds[[field]]
for (field in fields) {
field_ci_path <- file.path(setup$field_tiles_ci_dir, field)
field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field)
# Pre-create output directories
dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE)
dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE)
output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str))
output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str))
# MODE 3: Skip if both outputs already exist
if (file.exists(output_tif) && file.exists(output_rds)) {
next
}
# MODE 2: Regeneration mode - RDS missing but CI TIFF exists
if (file.exists(output_tif) && !file.exists(output_rds)) {
tryCatch({
extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field)
fields_processed_this_date <- fields_processed_this_date + 1
}, error = function(e) {
# Continue to next field
})
next
}
# MODE 1: Normal mode - crop 5-band TIFF to field boundary and save
tryCatch({
# Crop 5-band TIFF to field boundary
field_geom <- field_boundaries_sf %>% filter(field == !!field)
five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE)
# Save 5-band field TIFF
terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE)
terra::writeRaster(five_band_cropped, output_tif_path, overwrite = TRUE)
# Extract CI statistics by sub_field (from cropped CI raster)
ci_cropped <- five_band_cropped[[5]] # 5th band is CI
@ -199,10 +210,11 @@ main <- function() {
# Save RDS
if (!is.null(ci_stats) && nrow(ci_stats) > 0) {
saveRDS(ci_stats, output_rds)
saveRDS(ci_stats, output_rds_path)
}
fields_processed_this_date <- fields_processed_this_date + 1
raster_processed_this_date <- raster_processed_this_date + 1
}, error = function(e) {
# Error in individual field, continue to next
@ -210,24 +222,34 @@ main <- function() {
})
}
# Increment success counter if at least one field succeeded
if (fields_processed_this_date > 0) {
total_success <<- total_success + 1
safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date))
total_processed_dates <- total_processed_dates + 1
safe_log(sprintf(
" %s: Processed %d fields (%d CI TIFFs created, %d RDS-only regenerated)",
date_str,
fields_processed_this_date,
raster_processed_this_date,
rds_only_processed
))
} else {
total_error <- total_error + 1
safe_log(sprintf(" %s: No field outputs were created; see warnings above", date_str), "ERROR")
}
}, error = function(e) {
total_error <<- total_error + 1
total_error <- total_error + 1
safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR")
})
}
# Summary
safe_log(sprintf("\n=== Processing Complete ==="))
safe_log(sprintf("Successfully processed: %d", total_success))
safe_log(sprintf("Dates processed: %d", total_processed_dates))
safe_log(sprintf("Dates skipped (missing merged_tif): %d", total_skipped_dates))
safe_log(sprintf("Dates already complete: %d", total_already_complete_dates))
safe_log(sprintf("Errors encountered: %d", total_error))
if (total_success > 0) {
if (total_processed_dates > 0) {
safe_log("Output files created in:")
safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir))
safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir))

View file

@ -56,7 +56,6 @@ suppressPackageStartupMessages({
# Visualization
library(tmap) # For interactive maps (field boundary visualization)
library(ggspatial) # For basemap tiles and spatial annotations (OSM basemap with ggplot2)
# Reporting
library(knitr) # For R Markdown document generation (code execution and output)
library(flextable) # For formatted tables in Word output (professional table styling)
@ -84,6 +83,16 @@ tryCatch({
})
})
tryCatch({
source("90_rainfall_utils.R")
}, error = function(e) {
tryCatch({
source(here::here("r_app", "90_rainfall_utils.R"))
}, error = function(e) {
message("Could not load 90_rainfall_utils.R - rain overlay disabled: ", e$message)
})
})
```
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
@ -1184,6 +1193,46 @@ tryCatch({
# load_per_field_mosaic() is defined in 90_report_utils.R (sourced above)
# --- Fetch rain for all fields (one API call per 0.25° tile) ---
all_rain_data <- NULL
if (exists("rain_fetch_for_fields") && !is.null(CI_quadrant) && nrow(CI_quadrant) > 0) {
tryCatch({
# Compute field centroids from AllPivots0 (already-loaded sf object)
field_centroids <- AllPivots0 %>%
dplyr::group_by(field) %>%
dplyr::summarise(geometry = sf::st_union(geometry), .groups = "drop") %>%
sf::st_centroid() %>%
dplyr::mutate(
longitude = sf::st_coordinates(geometry)[, 1],
latitude = sf::st_coordinates(geometry)[, 2]
) %>%
sf::st_drop_geometry() %>%
dplyr::select(field_id = field, latitude, longitude)
# Determine current-season date range across all fields
rain_start <- CI_quadrant %>%
dplyr::group_by(field) %>%
dplyr::filter(season == max(season)) %>%
dplyr::summarise(s = min(Date, na.rm = TRUE), .groups = "drop") %>%
dplyr::pull(s) %>%
min(na.rm = TRUE)
rain_end <- as.Date(today)
message(paste("Fetching rain data from", rain_start, "to", rain_end,
"for", nrow(field_centroids), "fields"))
all_rain_data <- rain_fetch_for_fields(
centroids_df = field_centroids,
start_date = rain_start,
end_date = rain_end
)
message(paste("Rain data fetched:", nrow(all_rain_data), "rows"))
}, error = function(e) {
message("Rain fetch failed - overlay disabled: ", e$message)
all_rain_data <<- NULL
})
}
# Iterate through fields using purrr::walk
purrr::walk(AllPivots_merged$field, function(field_name) {
tryCatch({
@ -1249,6 +1298,14 @@ tryCatch({
# Call cum_ci_plot for trend analysis
if (!is.null(CI_quadrant)) {
field_rain <- if (!is.null(all_rain_data)) {
all_rain_data %>%
dplyr::filter(field_id == field_name) %>%
dplyr::select(date, rain_mm)
} else {
NULL
}
cum_ci_plot(
pivotName = field_name,
ci_quadrant_data = ci_quadrant_data,
@ -1259,7 +1316,8 @@ tryCatch({
show_benchmarks = TRUE,
estate_name = project_dir,
benchmark_percentiles = c(10, 50, 90),
benchmark_data = benchmarks
benchmark_data = benchmarks,
rain_data = field_rain
)
#cat("\n")
}

369
r_app/90_rainfall_utils.R Normal file
View file

@ -0,0 +1,369 @@
# 90_RAINFALL_UTILS.R
# ============================================================================
# Rainfall data fetching and ggplot overlay utilities for SmartCane CI reports.
#
# Provider-agnostic design: to swap weather provider, implement a new
# rain_fetch_<provider>() function and update the dispatch in rain_fetch().
#
# EXPORTS:
# - rain_snap_to_tile() Snap lat/lon to Open-Meteo native 0.25° grid
# - rain_fetch() Generic fetch wrapper (dispatches to provider)
# - rain_fetch_for_fields() Batch fetch with spatial tile deduplication
# - rain_join_to_ci() Join rain to latest-season CI data by Date
# - rain_add_to_plot() Overlay rain on single CI plot (abs or cum)
# - rain_add_to_faceted_plot() Overlay rain on faceted "both" CI plot
# ============================================================================
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(httr2)
library(purrr)
})
# ============================================================================
# PROVIDER LAYER — swap here to change data source
# ============================================================================
#' Fetch daily precipitation from Open-Meteo ERA5 archive
#'
#' ERA5 covers 1940-present at 0.25° (~28 km) resolution.
#' No API key required.
#'
#' @param latitude Numeric. WGS84 latitude.
#' @param longitude Numeric. WGS84 longitude.
#' @param start_date Date or "YYYY-MM-DD" string.
#' @param end_date Date or "YYYY-MM-DD" string.
#' @return data.frame with columns: date (Date), rain_mm (numeric)
rain_fetch_open_meteo_archive <- function(latitude, longitude, start_date, end_date) {
url <- paste0(
"https://archive-api.open-meteo.com/v1/archive",
"?latitude=", latitude,
"&longitude=", longitude,
"&daily=precipitation_sum",
"&start_date=", format(as.Date(start_date), "%Y-%m-%d"),
"&end_date=", format(as.Date(end_date), "%Y-%m-%d"),
"&timezone=auto"
)
resp <- tryCatch(
httr2::request(url) %>% httr2::req_timeout(30) %>% httr2::req_perform(),
error = function(e) stop(paste("Rain archive request failed:", e$message), call. = FALSE)
)
if (httr2::resp_status(resp) != 200) {
stop(paste("Rain archive API returned status", httr2::resp_status(resp)), call. = FALSE)
}
body <- httr2::resp_body_json(resp)
if (is.null(body$daily) || is.null(body$daily$time)) {
stop("Unexpected Open-Meteo archive response: missing daily/time.", call. = FALSE)
}
data.frame(
date = as.Date(unlist(body$daily$time)),
rain_mm = as.numeric(unlist(body$daily$precipitation_sum)),
stringsAsFactors = FALSE
)
}
# ============================================================================
# GENERIC FETCH WRAPPER
# ============================================================================
#' Fetch daily precipitation via the configured provider
#'
#' @param latitude Numeric.
#' @param longitude Numeric.
#' @param start_date Date or "YYYY-MM-DD".
#' @param end_date Date or "YYYY-MM-DD".
#' @param provider Character. Currently only "open_meteo_archive".
#' @return data.frame(date, rain_mm)
rain_fetch <- function(latitude, longitude, start_date, end_date,
provider = "open_meteo_archive") {
# --- ADD NEW PROVIDERS HERE ---
switch(provider,
open_meteo_archive = rain_fetch_open_meteo_archive(
latitude, longitude, start_date, end_date
),
stop(paste("Unknown rain provider:", provider), call. = FALSE)
)
}
# ============================================================================
# SPATIAL HELPERS
# ============================================================================
#' Snap a coordinate to the nearest ERA5 tile centre
#'
#' Open-Meteo ERA5 has 0.25° native resolution (~28 km). Snapping coordinates
#' to this grid avoids redundant API calls for nearby fields.
#'
#' @param latitude Numeric.
#' @param longitude Numeric.
#' @param resolution Numeric. Grid resolution in degrees (default 0.25).
#' @return Named list: tile_lat, tile_lon, tile_id
rain_snap_to_tile <- function(latitude, longitude, resolution = 0.25) {
tile_lat <- round(latitude / resolution) * resolution
tile_lon <- round(longitude / resolution) * resolution
list(
tile_lat = tile_lat,
tile_lon = tile_lon,
tile_id = paste0(tile_lat, "_", tile_lon)
)
}
# ============================================================================
# BATCH FETCH WITH TILE DEDUPLICATION
# ============================================================================
#' Fetch rain for multiple fields, calling the API once per unique 0.25° tile
#'
#' Fields that fall on the same ERA5 tile share one API call. This handles
#' estates spread over 100+ km (e.g. Angata) without over-calling the API.
#'
#' @param centroids_df data.frame with columns: field_id, latitude, longitude
#' @param start_date Date or "YYYY-MM-DD". Start of fetch window.
#' @param end_date Date or "YYYY-MM-DD". End of fetch window.
#' @param provider Character. Passed to rain_fetch().
#' @return data.frame with columns: field_id, date, rain_mm
rain_fetch_for_fields <- function(centroids_df, start_date, end_date,
provider = "open_meteo_archive") {
required_cols <- c("field_id", "latitude", "longitude")
if (!all(required_cols %in% names(centroids_df))) {
stop(paste("centroids_df must have:", paste(required_cols, collapse = ", ")), call. = FALSE)
}
# Snap each field to its tile
centroids_tiled <- centroids_df %>%
dplyr::mutate(
tile_info = purrr::map2(latitude, longitude, rain_snap_to_tile),
tile_lat = purrr::map_dbl(tile_info, "tile_lat"),
tile_lon = purrr::map_dbl(tile_info, "tile_lon"),
tile_id = purrr::map_chr(tile_info, "tile_id")
) %>%
dplyr::select(-tile_info)
# Fetch once per unique tile
unique_tiles <- centroids_tiled %>%
dplyr::distinct(tile_id, tile_lat, tile_lon)
tile_rain <- unique_tiles %>%
dplyr::mutate(rain_data = purrr::pmap(
list(tile_lat, tile_lon),
function(lat, lon) {
tryCatch(
rain_fetch(lat, lon, start_date, end_date, provider = provider),
error = function(e) {
safe_log(paste0("Rain fetch failed for tile ", lat, "_", lon, ": ", e$message), "WARNING")
data.frame(date = as.Date(character(0)), rain_mm = numeric(0))
}
)
}
))
# Join tile rain back to fields and unnest
centroids_tiled %>%
dplyr::left_join(tile_rain %>% dplyr::select(tile_id, rain_data), by = "tile_id") %>%
dplyr::select(field_id, rain_data) %>%
tidyr::unnest(rain_data)
}
# ============================================================================
# PROCESSING
# ============================================================================
#' Join rain data to latest-season CI data by Date
#'
#' @param rain_df data.frame(date, rain_mm) for the field.
#' @param ci_season_data data.frame with at minimum: Date, DAH, week columns
#' (latest season only, one row per date).
#' @return data.frame with Date, DAH, week, rain_mm, cum_rain_mm columns,
#' ordered by Date. Only dates present in ci_season_data are returned.
rain_join_to_ci <- function(rain_df, ci_season_data) {
if (is.null(rain_df) || nrow(rain_df) == 0) return(NULL)
ci_dates <- ci_season_data %>%
dplyr::distinct(Date, DAH, week) %>%
dplyr::arrange(Date)
joined <- ci_dates %>%
dplyr::left_join(
rain_df %>% dplyr::rename(Date = date),
by = "Date"
) %>%
dplyr::mutate(
rain_mm = tidyr::replace_na(rain_mm, 0),
cum_rain_mm = cumsum(rain_mm)
)
joined
}
# ============================================================================
# PLOT OVERLAY — single plot (absolute or cumulative)
# ============================================================================
#' Add a rainfall overlay to a single-panel CI ggplot
#'
#' For absolute CI (mean_rolling_10_days): adds daily precipitation bars
#' (steelblue, semi-transparent) scaled to the primary y-axis range, with a
#' secondary right y-axis labelled in mm/day.
#'
#' For cumulative CI: adds a cumulative precipitation filled area scaled to the
#' primary y-axis range, with a secondary right y-axis labelled in mm.
#'
#' The secondary axis is a linear transform of the primary axis (ggplot2
#' requirement). The rain values are scaled so the maximum rain never exceeds
#' y_max on screen. The right axis labels show the real mm values.
#'
#' @param g ggplot object.
#' @param rain_joined Result of rain_join_to_ci(). NULL returns g unchanged.
#' @param ci_type "mean_rolling_10_days" or "cumulative_CI".
#' @param x_var Character: "DAH", "week", or "Date".
#' @param y_max Numeric. Max value of the primary y-axis (used for scaling
#' and for setting limits = c(0, y_max) on the primary axis).
#' @return Modified ggplot object.
rain_add_to_plot <- function(g, rain_joined, ci_type, x_var, y_max) {
if (is.null(rain_joined) || nrow(rain_joined) == 0) {
# Still enforce y limits even without rain
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
}
if (ci_type == "mean_rolling_10_days") {
# --- Daily precipitation bars ---
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
if (is.na(max_rain) || max_rain == 0) {
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
}
scale_factor <- y_max / max_rain
bar_width <- switch(x_var,
"DAH" = 1,
"week" = 0.5,
1 # Date: 1 day width handled by ggplot
)
g +
ggplot2::geom_col(
data = rain_joined,
ggplot2::aes(x = .data[[x_var]], y = rain_mm * scale_factor),
fill = "steelblue",
alpha = 0.65,
width = bar_width,
inherit.aes = FALSE
) +
ggplot2::scale_y_continuous(
limits = c(0, y_max),
sec.axis = ggplot2::sec_axis(
~ . / scale_factor,
name = tr_key("lbl_rain_mm_day", "Precipitation (mm/day)")
)
)
} else if (ci_type == "cumulative_CI") {
# --- Cumulative precipitation area ---
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
if (is.na(max_cum_rain) || max_cum_rain == 0) {
return(g)
}
scale_factor <- (y_max * 0.30) / max_cum_rain
g +
ggplot2::geom_line(
data = rain_joined,
ggplot2::aes(x = .data[[x_var]], y = cum_rain_mm * scale_factor),
color = "steelblue",
linewidth = 0.8,
inherit.aes = FALSE
) +
ggplot2::scale_y_continuous(
limits = c(0, y_max),
sec.axis = ggplot2::sec_axis(
~ . / scale_factor,
name = tr_key("lbl_cum_rain_mm", "Cumulative Rain (mm)")
)
)
} else {
g
}
}
# ============================================================================
# PLOT OVERLAY — faceted "both" plot
# ============================================================================
#' Add rainfall overlays to a faceted (plot_type = "both") CI ggplot
#'
#' ggplot2 does not support per-facet secondary axes with free_y scales.
#' This function adds the rain layers to the correct facets by setting
#' ci_type_label to match the facet strip, but omits secondary axis labels
#' to avoid misleading scale information across facets.
#'
#' Rolling mean facet receives: daily precipitation bars.
#' Cumulative facet receives: cumulative precipitation filled area.
#'
#' @param g ggplot object with facet_wrap(~ci_type_label).
#' @param rain_joined Result of rain_join_to_ci().
#' @param rolling_mean_label Character. Must match the strip label for the
#' rolling mean facet (from tr_key).
#' @param cumulative_label Character. Must match the strip label for the
#' cumulative CI facet (from tr_key).
#' @param x_var Character: "DAH", "week", or "Date".
#' @param y_max_abs Numeric. Primary y-axis max for rolling mean facet (typically 7).
#' @param y_max_cum Numeric. Primary y-axis max for cumulative CI facet.
#' @return Modified ggplot object.
rain_add_to_faceted_plot <- function(g, rain_joined, rolling_mean_label, cumulative_label,
x_var, y_max_abs, y_max_cum) {
if (is.null(rain_joined) || nrow(rain_joined) == 0) return(g)
# --- Bars in rolling mean facet ---
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
if (!is.na(max_rain) && max_rain > 0) {
scale_abs <- y_max_abs / max_rain
bar_width <- switch(x_var, "DAH" = 1, "week" = 0.5, 1)
rain_bars <- rain_joined %>%
dplyr::mutate(
ci_type_label = rolling_mean_label,
y_scaled = rain_mm * scale_abs
)
g <- g +
ggplot2::geom_col(
data = rain_bars,
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
fill = "steelblue",
alpha = 0.65,
width = bar_width,
inherit.aes = FALSE
)
}
# --- Area in cumulative facet ---
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
if (!is.na(max_cum_rain) && max_cum_rain > 0) {
scale_cum <- (y_max_cum * 0.30) / max_cum_rain
rain_area <- rain_joined %>%
dplyr::mutate(
ci_type_label = cumulative_label,
y_scaled = cum_rain_mm * scale_cum
)
g <- g +
ggplot2::geom_line(
data = rain_area,
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
color = "steelblue",
linewidth = 0.8,
inherit.aes = FALSE
)
}
g
}

View file

@ -13,8 +13,15 @@
#'
subchunkify <- function(g, fig_height=7, fig_width=5) {
g_deparsed <- paste0(deparse(
function() {g}
), collapse = '')
function() {
if (inherits(g, c("gtable", "grob", "gTree"))) {
grid::grid.newpage()
grid::grid.draw(g)
} else {
print(g)
}
}
), collapse = '\n')
sub_chunk <- paste0("
`","``{r sub_chunk_", floor(runif(1) * 10000), ", fig.height=", fig_height, ", fig.width=", fig_width, ", dpi=300, dev='png', out.width='100%', echo=FALSE}",
@ -394,9 +401,11 @@ ci_plot <- function(pivotName,
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
#' @param benchmark_percentiles Vector of percentiles for benchmarks (default: c(10, 50, 90))
#' @param rain_data data.frame(date, rain_mm) for this field's current season,
#' as returned by rain_fetch_for_fields() / rain_join_to_ci(). NULL disables overlay.
#' @return NULL (adds output directly to R Markdown document)
#'
cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "absolute", facet_on = FALSE, x_unit = "days", colorblind_friendly = FALSE, show_benchmarks = FALSE, estate_name = NULL, benchmark_percentiles = c(10, 50, 90), benchmark_data = NULL) {
cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "absolute", facet_on = FALSE, x_unit = "days", colorblind_friendly = FALSE, show_benchmarks = FALSE, estate_name = NULL, benchmark_percentiles = c(10, 50, 90), benchmark_data = NULL, rain_data = NULL) {
# Input validation
if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
stop("pivotName is required")
@ -638,9 +647,23 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
}
# Add y-axis limits for absolute CI (10-day rolling mean) to fix scale at 0-7
if (ci_type_filter == "mean_rolling_10_days") {
g <- g + ggplot2::ylim(0, 7)
# Y-axis limits + optional rain overlay (single-panel plots)
y_max <- if (ci_type_filter == "mean_rolling_10_days") {
7
} else {
max(plot_data$ci_value, na.rm = TRUE) * 1.05
}
if (!is.null(rain_data)) {
latest_ci_dates <- plot_data %>%
dplyr::filter(is_latest) %>%
dplyr::distinct(Date, DAH, week)
rain_joined <- rain_join_to_ci(rain_data, latest_ci_dates)
g <- rain_add_to_plot(g, rain_joined, ci_type_filter, x_var, y_max)
} else {
if (ci_type_filter == "mean_rolling_10_days") {
g <- g + ggplot2::ylim(0, y_max)
}
}
return(g)
@ -654,174 +677,20 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
g <- create_plot("cumulative_CI", cumulative_label, "")
subchunkify(g, 2.8, 10)
} else if (plot_type == "both") {
# Create faceted plot with both CI types using pivot_longer approach
plot_data_both <- data_ci3 %>%
dplyr::filter(season %in% unique_seasons) %>%
dplyr::mutate(
ci_type_label = factor(case_when(
ci_type == "mean_rolling_10_days" ~ rolling_mean_label,
ci_type == "cumulative_CI" ~ cumulative_label,
TRUE ~ ci_type
), levels = c(rolling_mean_label, cumulative_label)),
is_latest = season == latest_season # Flag for latest season
)
# Determine x-axis variable based on x_unit parameter
x_var <- if (x_unit == "days") {
if (facet_on) "Date" else "DAH"
} else {
"week"
}
x_label <- switch(x_unit,
"days" = if (facet_on) tr_key("lbl_date", "Date") else tr_key("lbl_age_of_crop_days", "Age of Crop (Days)"),
"weeks" = tr_key("lbl_week_number", "Week Number"))
# Choose color palette based on colorblind_friendly flag
color_scale <- if (colorblind_friendly) {
ggplot2::scale_color_brewer(type = "qual", palette = "Set2")
} else {
ggplot2::scale_color_discrete()
}
# Calculate dynamic max values for breaks
max_dah_both <- max(plot_data_both$DAH, na.rm = TRUE) + 20
max_week_both <- max(as.numeric(plot_data_both$week), na.rm = TRUE) + ceiling(20 / 7)
# Pre-evaluate translated title here (not inside labs()) so {pivotName} resolves correctly
# Build each panel independently so each gets its own secondary rain y-axis.
# (facet_wrap + free_y does not support per-facet sec.axis in ggplot2.)
both_plot_title <- tr_key("lbl_ci_analysis_title", "CI Analysis for Field {pivotName}")
# Create the faceted plot
g_both <- ggplot2::ggplot(data = plot_data_both) +
# Add benchmark lines first (behind season lines)
{
if (!is.null(benchmark_data)) {
# Clip benchmark to max DAH of plotted seasons + 10% buffer
max_dah_clip <- max(plot_data_both$DAH, na.rm = TRUE) * 1.1
benchmark_subset <- benchmark_data %>%
dplyr::filter(DAH <= max_dah_clip) %>%
dplyr::mutate(
benchmark_x = if (x_var == "DAH") {
DAH
} else if (x_var == "week") {
DAH / 7
} else {
DAH
},
ci_type_label = factor(case_when(
ci_type == "value" ~ rolling_mean_label,
ci_type == "cumulative_CI" ~ cumulative_label,
TRUE ~ ci_type
), levels = c(rolling_mean_label, cumulative_label))
)
ggplot2::geom_smooth(
data = benchmark_subset,
ggplot2::aes(
x = .data[["benchmark_x"]],
y = .data[["benchmark_value"]],
group = factor(.data[["percentile"]])
),
color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE, fullrange = FALSE
)
}
} +
ggplot2::facet_wrap(~ci_type_label, scales = "free_y") +
# Plot older seasons with lighter lines
ggplot2::geom_line(
data = plot_data_both %>% dplyr::filter(!is_latest),
ggplot2::aes(
x = .data[[x_var]],
y = .data[["ci_value"]],
col = .data[["season"]],
group = .data[["season"]]
),
linewidth = 0.7, alpha = 0.4
) +
# Plot latest season with thicker, more prominent line
ggplot2::geom_line(
data = plot_data_both %>% dplyr::filter(is_latest),
ggplot2::aes(
x = .data[[x_var]],
y = .data[["ci_value"]],
col = .data[["season"]],
group = .data[["season"]]
),
linewidth = 1.5, alpha = 1
) +
ggplot2::labs(title = both_plot_title,
color = tr_key("lbl_season", "Season"),
y = tr_key("lbl_ci_value", "CI Value"),
x = x_label) +
color_scale +
{
if (x_var == "DAH") {
# Dynamic breaks based on actual data range
dah_breaks <- scales::pretty_breaks(n = 5)(c(0, max_dah_both))
# Month breaks: in secondary axis scale (month numbers 0, 1, 2, ...)
n_months <- ceiling(max_dah_both / 30.44)
month_breaks <- seq(0, n_months, by = 1)
ggplot2::scale_x_continuous(
breaks = dah_breaks,
sec.axis = ggplot2::sec_axis(
~ . / 30.44,
name = tr_key("lbl_age_in_months", "Age in Months"),
breaks = month_breaks,
labels = function(x) as.integer(x) # Show all month labels
)
)
} else if (x_var == "week") {
# Dynamic breaks based on actual data range
week_breaks <- scales::pretty_breaks(n = 5)(c(0, max_week_both))
# Month breaks: in secondary axis scale (month numbers 0, 1, 2, ...)
n_months <- ceiling(max_week_both / 4.348)
month_breaks <- seq(0, n_months, by = 1)
ggplot2::scale_x_continuous(
breaks = week_breaks,
sec.axis = ggplot2::sec_axis(
~ . / 4.348,
name = tr_key("lbl_age_in_months", "Age in Months"),
breaks = month_breaks,
labels = function(x) as.integer(x) # Show all month labels
)
)
} else if (x_var == "Date") {
ggplot2::scale_x_date(breaks = "1 month", date_labels = "%b-%Y", sec.axis = ggplot2::sec_axis(~ ., name = tr_key("lbl_age_in_months", "Age in Months"), breaks = scales::breaks_pretty()))
}
} +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
axis.title.x.top = ggplot2::element_text(size = 8),
legend.justification = c(1, 0),
legend.position = "inside",
legend.position.inside = c(1, 0),
legend.title = ggplot2::element_text(size = 8),
legend.text = ggplot2::element_text(size = 8)) +
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
g_abs <- create_plot("mean_rolling_10_days", rolling_mean_label, "") +
ggplot2::labs(title = rolling_mean_label) +
ggplot2::theme(legend.position = "none")
# For the rolling mean data, we want to set reasonable y-axis limits
# Since we're using free_y scales, each facet will have its own y-axis
# The rolling mean will automatically scale to its data range,
# but we can ensure it shows the 0-7 context by adding invisible points
g_cum <- create_plot("cumulative_CI", cumulative_label, "") +
ggplot2::labs(title = cumulative_label)
# Add invisible points to set the y-axis range for rolling mean facet
dummy_data <- data.frame(
ci_type_label = rolling_mean_label,
ci_value = c(0, 7),
stringsAsFactors = FALSE
)
dummy_data[[x_var]] <- range(plot_data_both[[x_var]], na.rm = TRUE)
dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]]))
combined <- gridExtra::arrangeGrob(g_abs, g_cum, ncol = 2, top = both_plot_title)
g_both <- g_both +
ggplot2::geom_point(
data = dummy_data,
ggplot2::aes(x = .data[[x_var]], y = .data[["ci_value"]]),
alpha = 0, size = 0
) # Invisible points to set scale
# Display the combined faceted plot
subchunkify(g_both, 2.8, 10)
subchunkify(combined, 2.8, 10)
}
}, error = function(e) {