diff --git a/python_app/weather_api_comparison.py b/python_app/weather_api_comparison.py new file mode 100644 index 0000000..d8cb238 --- /dev/null +++ b/python_app/weather_api_comparison.py @@ -0,0 +1,567 @@ +""" +weather_api_comparison.py +========================= +Compare daily precipitation from multiple free weather APIs across two locations: + - Arnhem, Netherlands (51.985°N, 5.899°E) — European climate + - Angata, Kenya ( 1.330°S, 34.738°E) — tropical / sugarcane context + +ARCHIVE providers (no API key required): + 1. Open-Meteo ERA5 — current SmartCane provider (0.25°, global) + 2. Open-Meteo ERA5-Land — higher resolution variant (0.10°, global) + 3. Open-Meteo CERRA — EU regional reanalysis (0.05°, EU only) + 4. NASA POWER — completely independent source (0.50°, global) + +FORECAST providers (no API key required): + 5. Open-Meteo Forecast — ensemble of NWP models (global) + 6. YR.no LocationForecast — Norwegian Met Institute (~10 days, global) + +FORECAST providers (API key required — set in CONFIG below, leave "" to skip): + 7. OpenWeatherMap — free tier, 1000 calls/day + 8. WeatherAPI.com — free tier + +OUTPUT: + Plots saved to: weather_comparison_plots/ + Summary stats printed to console. + +Usage: + python weather_api_comparison.py +""" + +import os +import json +import time +import datetime +import requests +import pandas as pd +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +import numpy as np +from pathlib import Path + +# ============================================================ +# CONFIG +# ============================================================ + +LOCATIONS = { + "Arnhem_NL": {"lat": 51.985, "lon": 5.899}, + "Angata_KE": {"lat": -1.330, "lon": 34.738}, +} + +# Archive: last 12 months +ARCHIVE_END = datetime.date.today() - datetime.timedelta(days=2) # ERA5 lags ~2 days +ARCHIVE_START = ARCHIVE_END - datetime.timedelta(days=365) + +# Forecast: today + 7 days +FORECAST_START = datetime.date.today() +FORECAST_END = FORECAST_START + datetime.timedelta(days=7) + +# Optional API keys — leave "" to skip that provider +OPENWEATHERMAP_KEY = "" # https://openweathermap.org/api +WEATHERAPI_KEY = "" # https://www.weatherapi.com/ + +OUTPUT_DIR = Path("weather_comparison_plots") +OUTPUT_DIR.mkdir(exist_ok=True) + +USER_AGENT = "SmartCane-WeatherComparison/1.0 (research; contact via github)" + +# ============================================================ +# ARCHIVE FETCHERS +# ============================================================ + +def fetch_openmeteo_archive(lat, lon, start, end, model="era5"): + """Open-Meteo ERA5 / ERA5-Land / CERRA archive. + ERA5 is the default (no models param needed). ERA5-Land and CERRA use lowercase names. + """ + # ERA5 is the default model — adding models param with wrong casing causes 400 + model_suffix = "" if model == "era5" else f"&models={model}" + url = ( + f"https://archive-api.open-meteo.com/v1/archive" + f"?latitude={lat}&longitude={lon}" + f"&daily=precipitation_sum" + f"&start_date={start}&end_date={end}" + f"{model_suffix}" + f"&timezone=UTC" + ) + r = requests.get(url, timeout=30) + r.raise_for_status() + body = r.json() + df = pd.DataFrame({ + "date": pd.to_datetime(body["daily"]["time"]), + "rain_mm": body["daily"]["precipitation_sum"], + }) + df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").clip(lower=0).fillna(0) + # ERA5-Land sometimes returns values in meters (Open-Meteo API quirk). + # Auto-detect: if annual total < 50mm for any non-polar location, assume m → convert. + if df["rain_mm"].sum() < 50 and len(df) > 30: + df["rain_mm"] = df["rain_mm"] * 1000 + print(f" ⚠ Unit auto-converted m→mm (values were implausibly small)") + return df + + +def fetch_nasa_power(lat, lon, start, end): + """NASA POWER — daily PRECTOTCORR (precipitation corrected), 0.5° grid.""" + url = ( + "https://power.larc.nasa.gov/api/temporal/daily/point" + f"?parameters=PRECTOTCORR" + f"&community=AG" + f"&longitude={lon}&latitude={lat}" + f"&start={start.strftime('%Y%m%d')}&end={end.strftime('%Y%m%d')}" + f"&format=JSON" + ) + r = requests.get(url, timeout=60) + r.raise_for_status() + body = r.json() + raw = body["properties"]["parameter"]["PRECTOTCORR"] + df = pd.DataFrame([ + {"date": pd.to_datetime(k, format="%Y%m%d"), "rain_mm": max(v, 0)} + for k, v in raw.items() + if v != -999 # NASA POWER fill value + ]) + return df.sort_values("date").reset_index(drop=True) + + +# ============================================================ +# FORECAST FETCHERS +# ============================================================ + +def fetch_openmeteo_forecast(lat, lon, days=8): + """Open-Meteo NWP forecast — default best_match model.""" + end = datetime.date.today() + datetime.timedelta(days=days) + url = ( + f"https://api.open-meteo.com/v1/forecast" + f"?latitude={lat}&longitude={lon}" + f"&daily=precipitation_sum" + f"&forecast_days={days + 1}" + f"&timezone=UTC" + ) + r = requests.get(url, timeout=30) + r.raise_for_status() + body = r.json() + df = pd.DataFrame({ + "date": pd.to_datetime(body["daily"]["time"]), + "rain_mm": body["daily"]["precipitation_sum"], + }) + df["rain_mm"] = pd.to_numeric(df["rain_mm"], errors="coerce").fillna(0) + return df + + +def fetch_yr_forecast(lat, lon): + """YR.no LocationForecast 2.0 — hourly precip aggregated to daily.""" + url = f"https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={lat}&lon={lon}" + headers = {"User-Agent": USER_AGENT} + r = requests.get(url, headers=headers, timeout=30) + r.raise_for_status() + body = r.json() + + records = [] + for entry in body["properties"]["timeseries"]: + ts = pd.to_datetime(entry["time"]) + data = entry["data"] + precip = 0.0 + if "next_1_hours" in data: + precip = data["next_1_hours"]["details"].get("precipitation_amount", 0.0) + elif "next_6_hours" in data: + precip = data["next_6_hours"]["details"].get("precipitation_amount", 0.0) / 6 + records.append({"datetime": ts, "precip_hour": precip}) + + hourly = pd.DataFrame(records) + hourly["date"] = hourly["datetime"].dt.date + daily = ( + hourly.groupby("date")["precip_hour"] + .sum() + .reset_index() + .rename(columns={"precip_hour": "rain_mm"}) + ) + daily["date"] = pd.to_datetime(daily["date"]) + return daily + + +def fetch_openweathermap_forecast(lat, lon, api_key): + """OpenWeatherMap One Call 3.0 — daily forecast (needs paid/free key).""" + url = ( + f"https://api.openweathermap.org/data/3.0/onecall" + f"?lat={lat}&lon={lon}" + f"&exclude=current,minutely,hourly,alerts" + f"&appid={api_key}&units=metric" + ) + r = requests.get(url, timeout=30) + r.raise_for_status() + body = r.json() + records = [] + for day in body.get("daily", []): + records.append({ + "date": pd.to_datetime(day["dt"], unit="s").normalize(), + "rain_mm": day.get("rain", 0.0), + }) + return pd.DataFrame(records) + + +def fetch_weatherapi_forecast(lat, lon, api_key, days=7): + """WeatherAPI.com free forecast (up to 3 days on free tier, 14 on paid).""" + url = ( + f"https://api.weatherapi.com/v1/forecast.json" + f"?key={api_key}&q={lat},{lon}&days={days}&aqi=no&alerts=no" + ) + r = requests.get(url, timeout=30) + r.raise_for_status() + body = r.json() + records = [] + for day in body["forecast"]["forecastday"]: + records.append({ + "date": pd.to_datetime(day["date"]), + "rain_mm": day["day"].get("totalprecip_mm", 0.0), + }) + return pd.DataFrame(records) + + +# ============================================================ +# STATS +# ============================================================ + +def compare_stats(df, ref_col, other_col): + """Compute MAE, RMSE, bias, Pearson r between two columns.""" + valid = df[[ref_col, other_col]].dropna() + if len(valid) < 5: + return {"n": len(valid), "MAE": None, "RMSE": None, "Bias": None, "r": None} + diff = valid[other_col] - valid[ref_col] + mae = diff.abs().mean() + rmse = (diff**2).mean()**0.5 + bias = diff.mean() + r = valid[ref_col].corr(valid[other_col]) + return {"n": len(valid), "MAE": round(mae,2), "RMSE": round(rmse,2), + "Bias": round(bias,2), "r": round(r,3)} + + +# ============================================================ +# PLOTTING +# ============================================================ + +def plot_archive(data_dict, location_name, start, end, output_dir): + """Line plot of all archive providers for one location.""" + fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True) + + colors = { + "ERA5 (Open-Meteo)": "#1f77b4", + "ERA5-Land (Open-Meteo)": "#ff7f0e", + "CERRA (Open-Meteo)": "#2ca02c", + "NASA POWER": "#d62728", + } + + # Top: daily raw + ax1 = axes[0] + for name, df in data_dict.items(): + if df is not None and len(df) > 0: + ax1.plot(df["date"], df["rain_mm"], label=name, + color=colors.get(name), linewidth=0.8, alpha=0.85) + ax1.set_ylabel("Precipitation (mm/day)") + ax1.set_title(f"{location_name} — Daily Precipitation (archive {start} → {end})") + ax1.legend(fontsize=8) + ax1.grid(True, alpha=0.3) + + # Bottom: 30-day rolling mean + ax2 = axes[1] + for name, df in data_dict.items(): + if df is not None and len(df) > 0: + rolled = df.set_index("date")["rain_mm"].rolling(30, min_periods=15).mean() + ax2.plot(rolled.index, rolled.values, label=name, + color=colors.get(name), linewidth=1.5) + ax2.set_ylabel("30-day rolling mean (mm/day)") + ax2.set_xlabel("Date") + ax2.legend(fontsize=8) + ax2.grid(True, alpha=0.3) + ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y")) + fig.autofmt_xdate() + + plt.tight_layout() + path = output_dir / f"archive_{location_name}.png" + plt.savefig(path, dpi=150) + plt.close() + print(f" Saved: {path}") + + +def plot_forecast(data_dict, location_name, output_dir): + """Bar chart comparing 7-day forecasts across providers.""" + fig, ax = plt.subplots(figsize=(12, 5)) + + providers = [(name, df) for name, df in data_dict.items() if df is not None and len(df) > 0] + n = len(providers) + if n == 0: + plt.close() + return + + # Collect all forecast dates + all_dates = sorted(set( + d for _, df in providers + for d in df["date"].dt.date.tolist() + )) + x = np.arange(len(all_dates)) + width = 0.8 / n + + cmap = matplotlib.colormaps["tab10"].resampled(n) + for i, (name, df) in enumerate(providers): + vals = [] + date_map = dict(zip(df["date"].dt.date, df["rain_mm"])) + for d in all_dates: + vals.append(date_map.get(d, 0.0)) + ax.bar(x + i * width, vals, width, label=name, color=cmap(i), alpha=0.85) + + ax.set_xticks(x + width * (n - 1) / 2) + ax.set_xticklabels([d.strftime("%d %b") for d in all_dates], rotation=45, ha="right") + ax.set_ylabel("Precipitation (mm/day)") + ax.set_title(f"{location_name} — 7-Day Forecast Comparison") + ax.legend(fontsize=9) + ax.grid(True, axis="y", alpha=0.3) + plt.tight_layout() + + path = output_dir / f"forecast_{location_name}.png" + plt.savefig(path, dpi=150) + plt.close() + print(f" Saved: {path}") + + +def plot_vs_era5(data_dict, location_name, output_dir): + """Each provider vs ERA5 reference: scatter + regression line. + + How to read: + - Each panel shows one provider (y-axis) vs ERA5 (x-axis) for daily precip. + - Points on the red diagonal = perfect agreement. + - Points above = provider wetter than ERA5 on that day. + - r = Pearson correlation (1 = perfect). MAE = mean absolute error in mm/day. + - Bias = provider minus ERA5 on average (positive = provider wetter). + """ + ref_name = "ERA5 (Open-Meteo)" + ref_df = data_dict.get(ref_name) + if ref_df is None: + return + + others = [(n, df) for n, df in data_dict.items() + if n != ref_name and df is not None and len(df) > 0] + if not others: + return + + n = len(others) + fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), squeeze=False) + + colors = { + "ERA5-Land (Open-Meteo)": "#ff7f0e", + "CERRA (Open-Meteo)": "#2ca02c", + "NASA POWER": "#d62728", + } + + for i, (name, df) in enumerate(others): + ax = axes[0][i] + merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp")) + valid = merged[["rain_mm_ref", "rain_mm_cmp"]].dropna() + + color = colors.get(name, "steelblue") + ax.scatter(valid["rain_mm_ref"], valid["rain_mm_cmp"], + s=4, alpha=0.35, color=color) + + # Perfect-agreement diagonal + lim = max(valid.max().max(), 1) * 1.05 + ax.plot([0, lim], [0, lim], "r--", linewidth=1, label="Perfect agreement") + + # Linear regression line + if len(valid) > 5: + coeffs = np.polyfit(valid["rain_mm_ref"], valid["rain_mm_cmp"], 1) + x_fit = np.linspace(0, lim, 100) + ax.plot(x_fit, np.polyval(coeffs, x_fit), "k-", linewidth=1, + alpha=0.6, label=f"Regression (slope={coeffs[0]:.2f})") + + stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp") + ax.set_xlim(0, lim); ax.set_ylim(0, lim) + ax.set_xlabel("ERA5 (Open-Meteo) mm/day", fontsize=9) + ax.set_ylabel(f"{name} mm/day", fontsize=9) + ax.set_title( + f"{name}\nr={stats['r']} MAE={stats['MAE']} mm Bias={stats['Bias']:+.2f} mm", + fontsize=9 + ) + ax.legend(fontsize=7) + ax.grid(True, alpha=0.3) + + fig.suptitle( + f"{location_name} — Daily Precip vs ERA5 Reference\n" + "Red dashed = perfect agreement. Points above line = provider wetter than ERA5.", + fontsize=10 + ) + plt.tight_layout() + path = output_dir / f"vs_era5_{location_name}.png" + plt.savefig(path, dpi=150) + plt.close() + print(f" Saved: {path}") + + +def plot_cumulative(data_dict, location_name, output_dir): + """Cumulative annual precipitation — most relevant for crop/irrigation context.""" + fig, ax = plt.subplots(figsize=(14, 5)) + + colors = { + "ERA5 (Open-Meteo)": "#1f77b4", + "ERA5-Land (Open-Meteo)": "#ff7f0e", + "CERRA (Open-Meteo)": "#2ca02c", + "NASA POWER": "#d62728", + } + + for name, df in data_dict.items(): + if df is None or len(df) == 0: + continue + s = df.set_index("date")["rain_mm"].sort_index().cumsum() + total = s.iloc[-1] + ax.plot(s.index, s.values, label=f"{name} (total: {total:.0f} mm)", + color=colors.get(name), linewidth=1.8) + + ax.set_ylabel("Cumulative precipitation (mm)") + ax.set_xlabel("Date") + ax.set_title( + f"{location_name} — Cumulative Annual Precipitation by Provider\n" + "Divergence = sources disagree on total seasonal rainfall" + ) + ax.legend(fontsize=9) + ax.grid(True, alpha=0.3) + ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y")) + fig.autofmt_xdate() + plt.tight_layout() + + path = output_dir / f"cumulative_{location_name}.png" + plt.savefig(path, dpi=150) + plt.close() + print(f" Saved: {path}") + + +# ============================================================ +# MAIN +# ============================================================ + +def run_location(loc_name, lat, lon): + print(f"\n{'='*60}") + print(f" {loc_name} ({lat}°, {lon}°)") + print(f"{'='*60}") + + # ---- ARCHIVE ---- + print("\n[Archive]") + archive_data = {} + + print(" Fetching Open-Meteo ERA5...") + try: + archive_data["ERA5 (Open-Meteo)"] = fetch_openmeteo_archive( + lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5" + ) + print(f" → {len(archive_data['ERA5 (Open-Meteo)'])} days") + except Exception as e: + print(f" ✗ ERA5 failed: {e}") + archive_data["ERA5 (Open-Meteo)"] = None + + time.sleep(0.5) + print(" Fetching Open-Meteo ERA5-Land...") + try: + archive_data["ERA5-Land (Open-Meteo)"] = fetch_openmeteo_archive( + lat, lon, ARCHIVE_START, ARCHIVE_END, model="era5_land" + ) + print(f" → {len(archive_data['ERA5-Land (Open-Meteo)'])} days") + except Exception as e: + print(f" ✗ ERA5-Land failed: {e}") + archive_data["ERA5-Land (Open-Meteo)"] = None + + time.sleep(0.5) + # CERRA only covers Europe (roughly 20°W–45°E, 30°N–80°N) + if -20 <= lon <= 45 and 30 <= lat <= 80: + print(" Fetching Open-Meteo CERRA (EU only)...") + try: + archive_data["CERRA (Open-Meteo)"] = fetch_openmeteo_archive( + lat, lon, ARCHIVE_START, ARCHIVE_END, model="cerra" + ) + print(f" → {len(archive_data['CERRA (Open-Meteo)'])} days") + except Exception as e: + print(f" ✗ CERRA failed: {e}") + archive_data["CERRA (Open-Meteo)"] = None + else: + print(" Skipping CERRA (outside EU coverage)") + archive_data["CERRA (Open-Meteo)"] = None + + time.sleep(0.5) + print(" Fetching NASA POWER...") + try: + archive_data["NASA POWER"] = fetch_nasa_power(lat, lon, ARCHIVE_START, ARCHIVE_END) + print(f" → {len(archive_data['NASA POWER'])} days") + except Exception as e: + print(f" ✗ NASA POWER failed: {e}") + archive_data["NASA POWER"] = None + + # Stats vs ERA5 reference + print("\n Stats vs ERA5 (Open-Meteo) reference:") + ref_df = archive_data.get("ERA5 (Open-Meteo)") + for name, df in archive_data.items(): + if name == "ERA5 (Open-Meteo)" or df is None: + continue + if ref_df is None: + continue + merged = ref_df.merge(df, on="date", suffixes=("_ref", "_cmp")) + stats = compare_stats(merged, "rain_mm_ref", "rain_mm_cmp") + print(f" {name:30s} n={stats['n']:4d} MAE={stats['MAE']} " + f"RMSE={stats['RMSE']} Bias={stats['Bias']} r={stats['r']}") + + plot_archive(archive_data, loc_name, ARCHIVE_START, ARCHIVE_END, OUTPUT_DIR) + plot_cumulative(archive_data, loc_name, OUTPUT_DIR) + plot_vs_era5(archive_data, loc_name, OUTPUT_DIR) + + # ---- FORECAST ---- + print("\n[Forecast]") + forecast_data = {} + + print(" Fetching Open-Meteo forecast...") + try: + forecast_data["Open-Meteo Forecast"] = fetch_openmeteo_forecast(lat, lon) + print(f" → {len(forecast_data['Open-Meteo Forecast'])} days") + except Exception as e: + print(f" ✗ Open-Meteo forecast failed: {e}") + forecast_data["Open-Meteo Forecast"] = None + + time.sleep(0.5) + print(" Fetching YR.no LocationForecast...") + try: + forecast_data["YR.no"] = fetch_yr_forecast(lat, lon) + print(f" → {len(forecast_data['YR.no'])} days") + except Exception as e: + print(f" ✗ YR.no failed: {e}") + forecast_data["YR.no"] = None + + if OPENWEATHERMAP_KEY: + time.sleep(0.5) + print(" Fetching OpenWeatherMap forecast...") + try: + forecast_data["OpenWeatherMap"] = fetch_openweathermap_forecast( + lat, lon, OPENWEATHERMAP_KEY + ) + print(f" → {len(forecast_data['OpenWeatherMap'])} days") + except Exception as e: + print(f" ✗ OpenWeatherMap failed: {e}") + forecast_data["OpenWeatherMap"] = None + + if WEATHERAPI_KEY: + time.sleep(0.5) + print(" Fetching WeatherAPI.com forecast...") + try: + forecast_data["WeatherAPI.com"] = fetch_weatherapi_forecast( + lat, lon, WEATHERAPI_KEY + ) + print(f" → {len(forecast_data['WeatherAPI.com'])} days") + except Exception as e: + print(f" ✗ WeatherAPI.com failed: {e}") + forecast_data["WeatherAPI.com"] = None + + plot_forecast(forecast_data, loc_name, OUTPUT_DIR) + + +if __name__ == "__main__": + print(f"Weather API Comparison — {datetime.date.today()}") + print(f"Archive: {ARCHIVE_START} → {ARCHIVE_END}") + print(f"Forecast: {FORECAST_START} → {FORECAST_END}") + print(f"Output: {OUTPUT_DIR.resolve()}") + + for loc_name, coords in LOCATIONS.items(): + run_location(loc_name, coords["lat"], coords["lon"]) + time.sleep(1) + + print(f"\nDone. Plots saved to: {OUTPUT_DIR.resolve()}") diff --git a/r_app/20_ci_extraction_per_field.R b/r_app/20_ci_extraction_per_field.R index 1a85ad0..d0289ee 100644 --- a/r_app/20_ci_extraction_per_field.R +++ b/r_app/20_ci_extraction_per_field.R @@ -128,23 +128,60 @@ main <- function() { } # Process each DATE (load merged TIFF once, extract all fields from it) - total_success <- 0 + total_processed_dates <- 0 + total_skipped_dates <- 0 + total_already_complete_dates <- 0 total_error <- 0 for (date_str in dates_filter) { - # Load the MERGED TIFF (farm-wide) ONCE for this date input_tif_merged <- file.path(setup$merged_tif_folder, sprintf("%s.tif", date_str)) + output_tifs <- file.path(setup$field_tiles_ci_dir, fields, sprintf("%s.tif", date_str)) + output_rds <- file.path(setup$daily_ci_vals_dir, fields, sprintf("%s.rds", date_str)) + names(output_tifs) <- fields + names(output_rds) <- fields + tif_exists <- file.exists(output_tifs) + rds_exists <- file.exists(output_rds) + fields_need_rds_only <- fields[tif_exists & !rds_exists] + fields_need_raster <- fields[!tif_exists] + + if (length(fields_need_rds_only) == 0 && length(fields_need_raster) == 0) { + total_already_complete_dates <- total_already_complete_dates + 1 + safe_log(sprintf(" %s: All field outputs already exist (skipping)", date_str)) + next + } + + fields_processed_this_date <- 0 + rds_only_processed <- 0 + raster_processed_this_date <- 0 + + if (length(fields_need_rds_only) > 0) { + for (field in fields_need_rds_only) { + tryCatch({ + extract_rds_from_ci_tiff(output_tifs[[field]], output_rds[[field]], field_boundaries_sf, field) + fields_processed_this_date <- fields_processed_this_date + 1 + rds_only_processed <- rds_only_processed + 1 + }, error = function(e) { + safe_log(sprintf(" Error regenerating RDS for field %s: %s", field, e$message), "WARNING") + }) + } + } + + if (length(fields_need_raster) == 0) { + total_processed_dates <- total_processed_dates + 1 + safe_log(sprintf(" %s: Regenerated %d RDS files from existing CI TIFFs", date_str, rds_only_processed)) + next + } if (!file.exists(input_tif_merged)) { safe_log(sprintf(" %s: merged_tif not found (skipping)", date_str)) - total_error <<- total_error + 1 + total_skipped_dates <- total_skipped_dates + 1 next } tryCatch({ - # Load 4-band TIFF ONCE + # Load the merged TIFF only when at least one field still needs a CI TIFF. raster_4band <- terra::rast(input_tif_merged) - safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields))) + safe_log(sprintf(" %s: Loaded merged TIFF, processing %d fields...", date_str, length(fields_need_raster))) # Calculate CI from 4-band ci_raster <- calc_ci_from_raster(raster_4band) @@ -154,44 +191,18 @@ main <- function() { five_band <- c(raster_4band, ci_raster) names(five_band) <- c("Red", "Green", "Blue", "NIR", "CI") - # Now process all fields from this single merged TIFF - fields_processed_this_date <- 0 - - for (field in fields) { - field_ci_path <- file.path(setup$field_tiles_ci_dir, field) - field_daily_vals_path <- file.path(setup$daily_ci_vals_dir, field) + # Now process only the fields that still need CI TIFF output for this date. + for (field in fields_need_raster) { + output_tif_path <- output_tifs[[field]] + output_rds_path <- output_rds[[field]] - # Pre-create output directories - dir.create(field_ci_path, showWarnings = FALSE, recursive = TRUE) - dir.create(field_daily_vals_path, showWarnings = FALSE, recursive = TRUE) - - output_tif <- file.path(field_ci_path, sprintf("%s.tif", date_str)) - output_rds <- file.path(field_daily_vals_path, sprintf("%s.rds", date_str)) - - # MODE 3: Skip if both outputs already exist - if (file.exists(output_tif) && file.exists(output_rds)) { - next - } - - # MODE 2: Regeneration mode - RDS missing but CI TIFF exists - if (file.exists(output_tif) && !file.exists(output_rds)) { - tryCatch({ - extract_rds_from_ci_tiff(output_tif, output_rds, field_boundaries_sf, field) - fields_processed_this_date <- fields_processed_this_date + 1 - }, error = function(e) { - # Continue to next field - }) - next - } - - # MODE 1: Normal mode - crop 5-band TIFF to field boundary and save tryCatch({ # Crop 5-band TIFF to field boundary field_geom <- field_boundaries_sf %>% filter(field == !!field) five_band_cropped <- terra::crop(five_band, field_geom, mask = TRUE) # Save 5-band field TIFF - terra::writeRaster(five_band_cropped, output_tif, overwrite = TRUE) + terra::writeRaster(five_band_cropped, output_tif_path, overwrite = TRUE) # Extract CI statistics by sub_field (from cropped CI raster) ci_cropped <- five_band_cropped[[5]] # 5th band is CI @@ -199,10 +210,11 @@ main <- function() { # Save RDS if (!is.null(ci_stats) && nrow(ci_stats) > 0) { - saveRDS(ci_stats, output_rds) + saveRDS(ci_stats, output_rds_path) } fields_processed_this_date <- fields_processed_this_date + 1 + raster_processed_this_date <- raster_processed_this_date + 1 }, error = function(e) { # Error in individual field, continue to next @@ -210,24 +222,34 @@ main <- function() { }) } - # Increment success counter if at least one field succeeded if (fields_processed_this_date > 0) { - total_success <<- total_success + 1 - safe_log(sprintf(" %s: Processed %d fields", date_str, fields_processed_this_date)) + total_processed_dates <- total_processed_dates + 1 + safe_log(sprintf( + " %s: Processed %d fields (%d CI TIFFs created, %d RDS-only regenerated)", + date_str, + fields_processed_this_date, + raster_processed_this_date, + rds_only_processed + )) + } else { + total_error <- total_error + 1 + safe_log(sprintf(" %s: No field outputs were created; see warnings above", date_str), "ERROR") } }, error = function(e) { - total_error <<- total_error + 1 + total_error <- total_error + 1 safe_log(sprintf(" %s: Error loading or processing merged TIFF - %s", date_str, e$message), "ERROR") }) } # Summary safe_log(sprintf("\n=== Processing Complete ===")) - safe_log(sprintf("Successfully processed: %d", total_success)) + safe_log(sprintf("Dates processed: %d", total_processed_dates)) + safe_log(sprintf("Dates skipped (missing merged_tif): %d", total_skipped_dates)) + safe_log(sprintf("Dates already complete: %d", total_already_complete_dates)) safe_log(sprintf("Errors encountered: %d", total_error)) - if (total_success > 0) { + if (total_processed_dates > 0) { safe_log("Output files created in:") safe_log(sprintf(" TIFFs: %s", setup$field_tiles_ci_dir)) safe_log(sprintf(" RDS: %s", setup$daily_ci_vals_dir)) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 6fe4f9b..9d512e6 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -56,7 +56,6 @@ suppressPackageStartupMessages({ # Visualization library(tmap) # For interactive maps (field boundary visualization) library(ggspatial) # For basemap tiles and spatial annotations (OSM basemap with ggplot2) - # Reporting library(knitr) # For R Markdown document generation (code execution and output) library(flextable) # For formatted tables in Word output (professional table styling) @@ -84,6 +83,16 @@ tryCatch({ }) }) +tryCatch({ + source("90_rainfall_utils.R") +}, error = function(e) { + tryCatch({ + source(here::here("r_app", "90_rainfall_utils.R")) + }, error = function(e) { + message("Could not load 90_rainfall_utils.R - rain overlay disabled: ", e$message) + }) +}) + ``` ```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE} @@ -1179,11 +1188,51 @@ tryCatch({ minus_2_ww <- get_week_year(as.Date(today) - lubridate::weeks(2)) minus_3_ww <- get_week_year(as.Date(today) - lubridate::weeks(3)) - message(paste("Processing", nrow(AllPivots_merged), "fields for weeks:", + message(paste("Processing", nrow(AllPivots_merged), "fields for weeks:", current_ww$week, minus_1_ww$week, minus_2_ww$week, minus_3_ww$week)) - + # load_per_field_mosaic() is defined in 90_report_utils.R (sourced above) + # --- Fetch rain for all fields (one API call per 0.25° tile) --- + all_rain_data <- NULL + if (exists("rain_fetch_for_fields") && !is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { + tryCatch({ + # Compute field centroids from AllPivots0 (already-loaded sf object) + field_centroids <- AllPivots0 %>% + dplyr::group_by(field) %>% + dplyr::summarise(geometry = sf::st_union(geometry), .groups = "drop") %>% + sf::st_centroid() %>% + dplyr::mutate( + longitude = sf::st_coordinates(geometry)[, 1], + latitude = sf::st_coordinates(geometry)[, 2] + ) %>% + sf::st_drop_geometry() %>% + dplyr::select(field_id = field, latitude, longitude) + + # Determine current-season date range across all fields + rain_start <- CI_quadrant %>% + dplyr::group_by(field) %>% + dplyr::filter(season == max(season)) %>% + dplyr::summarise(s = min(Date, na.rm = TRUE), .groups = "drop") %>% + dplyr::pull(s) %>% + min(na.rm = TRUE) + rain_end <- as.Date(today) + + message(paste("Fetching rain data from", rain_start, "to", rain_end, + "for", nrow(field_centroids), "fields")) + + all_rain_data <- rain_fetch_for_fields( + centroids_df = field_centroids, + start_date = rain_start, + end_date = rain_end + ) + message(paste("Rain data fetched:", nrow(all_rain_data), "rows")) + }, error = function(e) { + message("Rain fetch failed - overlay disabled: ", e$message) + all_rain_data <<- NULL + }) + } + # Iterate through fields using purrr::walk purrr::walk(AllPivots_merged$field, function(field_name) { tryCatch({ @@ -1249,6 +1298,14 @@ tryCatch({ # Call cum_ci_plot for trend analysis if (!is.null(CI_quadrant)) { + field_rain <- if (!is.null(all_rain_data)) { + all_rain_data %>% + dplyr::filter(field_id == field_name) %>% + dplyr::select(date, rain_mm) + } else { + NULL + } + cum_ci_plot( pivotName = field_name, ci_quadrant_data = ci_quadrant_data, @@ -1259,7 +1316,8 @@ tryCatch({ show_benchmarks = TRUE, estate_name = project_dir, benchmark_percentiles = c(10, 50, 90), - benchmark_data = benchmarks + benchmark_data = benchmarks, + rain_data = field_rain ) #cat("\n") } diff --git a/r_app/90_rainfall_utils.R b/r_app/90_rainfall_utils.R new file mode 100644 index 0000000..ed14724 --- /dev/null +++ b/r_app/90_rainfall_utils.R @@ -0,0 +1,369 @@ +# 90_RAINFALL_UTILS.R +# ============================================================================ +# Rainfall data fetching and ggplot overlay utilities for SmartCane CI reports. +# +# Provider-agnostic design: to swap weather provider, implement a new +# rain_fetch_() function and update the dispatch in rain_fetch(). +# +# EXPORTS: +# - rain_snap_to_tile() Snap lat/lon to Open-Meteo native 0.25° grid +# - rain_fetch() Generic fetch wrapper (dispatches to provider) +# - rain_fetch_for_fields() Batch fetch with spatial tile deduplication +# - rain_join_to_ci() Join rain to latest-season CI data by Date +# - rain_add_to_plot() Overlay rain on single CI plot (abs or cum) +# - rain_add_to_faceted_plot() Overlay rain on faceted "both" CI plot +# ============================================================================ + +suppressPackageStartupMessages({ + library(dplyr) + library(tidyr) + library(httr2) + library(purrr) +}) + +# ============================================================================ +# PROVIDER LAYER — swap here to change data source +# ============================================================================ + +#' Fetch daily precipitation from Open-Meteo ERA5 archive +#' +#' ERA5 covers 1940-present at 0.25° (~28 km) resolution. +#' No API key required. +#' +#' @param latitude Numeric. WGS84 latitude. +#' @param longitude Numeric. WGS84 longitude. +#' @param start_date Date or "YYYY-MM-DD" string. +#' @param end_date Date or "YYYY-MM-DD" string. +#' @return data.frame with columns: date (Date), rain_mm (numeric) +rain_fetch_open_meteo_archive <- function(latitude, longitude, start_date, end_date) { + url <- paste0( + "https://archive-api.open-meteo.com/v1/archive", + "?latitude=", latitude, + "&longitude=", longitude, + "&daily=precipitation_sum", + "&start_date=", format(as.Date(start_date), "%Y-%m-%d"), + "&end_date=", format(as.Date(end_date), "%Y-%m-%d"), + "&timezone=auto" + ) + + resp <- tryCatch( + httr2::request(url) %>% httr2::req_timeout(30) %>% httr2::req_perform(), + error = function(e) stop(paste("Rain archive request failed:", e$message), call. = FALSE) + ) + + if (httr2::resp_status(resp) != 200) { + stop(paste("Rain archive API returned status", httr2::resp_status(resp)), call. = FALSE) + } + + body <- httr2::resp_body_json(resp) + + if (is.null(body$daily) || is.null(body$daily$time)) { + stop("Unexpected Open-Meteo archive response: missing daily/time.", call. = FALSE) + } + + data.frame( + date = as.Date(unlist(body$daily$time)), + rain_mm = as.numeric(unlist(body$daily$precipitation_sum)), + stringsAsFactors = FALSE + ) +} + +# ============================================================================ +# GENERIC FETCH WRAPPER +# ============================================================================ + +#' Fetch daily precipitation via the configured provider +#' +#' @param latitude Numeric. +#' @param longitude Numeric. +#' @param start_date Date or "YYYY-MM-DD". +#' @param end_date Date or "YYYY-MM-DD". +#' @param provider Character. Currently only "open_meteo_archive". +#' @return data.frame(date, rain_mm) +rain_fetch <- function(latitude, longitude, start_date, end_date, + provider = "open_meteo_archive") { + # --- ADD NEW PROVIDERS HERE --- + switch(provider, + open_meteo_archive = rain_fetch_open_meteo_archive( + latitude, longitude, start_date, end_date + ), + stop(paste("Unknown rain provider:", provider), call. = FALSE) + ) +} + +# ============================================================================ +# SPATIAL HELPERS +# ============================================================================ + +#' Snap a coordinate to the nearest ERA5 tile centre +#' +#' Open-Meteo ERA5 has 0.25° native resolution (~28 km). Snapping coordinates +#' to this grid avoids redundant API calls for nearby fields. +#' +#' @param latitude Numeric. +#' @param longitude Numeric. +#' @param resolution Numeric. Grid resolution in degrees (default 0.25). +#' @return Named list: tile_lat, tile_lon, tile_id +rain_snap_to_tile <- function(latitude, longitude, resolution = 0.25) { + tile_lat <- round(latitude / resolution) * resolution + tile_lon <- round(longitude / resolution) * resolution + list( + tile_lat = tile_lat, + tile_lon = tile_lon, + tile_id = paste0(tile_lat, "_", tile_lon) + ) +} + +# ============================================================================ +# BATCH FETCH WITH TILE DEDUPLICATION +# ============================================================================ + +#' Fetch rain for multiple fields, calling the API once per unique 0.25° tile +#' +#' Fields that fall on the same ERA5 tile share one API call. This handles +#' estates spread over 100+ km (e.g. Angata) without over-calling the API. +#' +#' @param centroids_df data.frame with columns: field_id, latitude, longitude +#' @param start_date Date or "YYYY-MM-DD". Start of fetch window. +#' @param end_date Date or "YYYY-MM-DD". End of fetch window. +#' @param provider Character. Passed to rain_fetch(). +#' @return data.frame with columns: field_id, date, rain_mm +rain_fetch_for_fields <- function(centroids_df, start_date, end_date, + provider = "open_meteo_archive") { + required_cols <- c("field_id", "latitude", "longitude") + if (!all(required_cols %in% names(centroids_df))) { + stop(paste("centroids_df must have:", paste(required_cols, collapse = ", ")), call. = FALSE) + } + + # Snap each field to its tile + centroids_tiled <- centroids_df %>% + dplyr::mutate( + tile_info = purrr::map2(latitude, longitude, rain_snap_to_tile), + tile_lat = purrr::map_dbl(tile_info, "tile_lat"), + tile_lon = purrr::map_dbl(tile_info, "tile_lon"), + tile_id = purrr::map_chr(tile_info, "tile_id") + ) %>% + dplyr::select(-tile_info) + + # Fetch once per unique tile + unique_tiles <- centroids_tiled %>% + dplyr::distinct(tile_id, tile_lat, tile_lon) + + tile_rain <- unique_tiles %>% + dplyr::mutate(rain_data = purrr::pmap( + list(tile_lat, tile_lon), + function(lat, lon) { + tryCatch( + rain_fetch(lat, lon, start_date, end_date, provider = provider), + error = function(e) { + safe_log(paste0("Rain fetch failed for tile ", lat, "_", lon, ": ", e$message), "WARNING") + data.frame(date = as.Date(character(0)), rain_mm = numeric(0)) + } + ) + } + )) + + # Join tile rain back to fields and unnest + centroids_tiled %>% + dplyr::left_join(tile_rain %>% dplyr::select(tile_id, rain_data), by = "tile_id") %>% + dplyr::select(field_id, rain_data) %>% + tidyr::unnest(rain_data) +} + +# ============================================================================ +# PROCESSING +# ============================================================================ + +#' Join rain data to latest-season CI data by Date +#' +#' @param rain_df data.frame(date, rain_mm) for the field. +#' @param ci_season_data data.frame with at minimum: Date, DAH, week columns +#' (latest season only, one row per date). +#' @return data.frame with Date, DAH, week, rain_mm, cum_rain_mm columns, +#' ordered by Date. Only dates present in ci_season_data are returned. +rain_join_to_ci <- function(rain_df, ci_season_data) { + if (is.null(rain_df) || nrow(rain_df) == 0) return(NULL) + + ci_dates <- ci_season_data %>% + dplyr::distinct(Date, DAH, week) %>% + dplyr::arrange(Date) + + joined <- ci_dates %>% + dplyr::left_join( + rain_df %>% dplyr::rename(Date = date), + by = "Date" + ) %>% + dplyr::mutate( + rain_mm = tidyr::replace_na(rain_mm, 0), + cum_rain_mm = cumsum(rain_mm) + ) + + joined +} + +# ============================================================================ +# PLOT OVERLAY — single plot (absolute or cumulative) +# ============================================================================ + +#' Add a rainfall overlay to a single-panel CI ggplot +#' +#' For absolute CI (mean_rolling_10_days): adds daily precipitation bars +#' (steelblue, semi-transparent) scaled to the primary y-axis range, with a +#' secondary right y-axis labelled in mm/day. +#' +#' For cumulative CI: adds a cumulative precipitation filled area scaled to the +#' primary y-axis range, with a secondary right y-axis labelled in mm. +#' +#' The secondary axis is a linear transform of the primary axis (ggplot2 +#' requirement). The rain values are scaled so the maximum rain never exceeds +#' y_max on screen. The right axis labels show the real mm values. +#' +#' @param g ggplot object. +#' @param rain_joined Result of rain_join_to_ci(). NULL returns g unchanged. +#' @param ci_type "mean_rolling_10_days" or "cumulative_CI". +#' @param x_var Character: "DAH", "week", or "Date". +#' @param y_max Numeric. Max value of the primary y-axis (used for scaling +#' and for setting limits = c(0, y_max) on the primary axis). +#' @return Modified ggplot object. +rain_add_to_plot <- function(g, rain_joined, ci_type, x_var, y_max) { + if (is.null(rain_joined) || nrow(rain_joined) == 0) { + # Still enforce y limits even without rain + return(g + ggplot2::scale_y_continuous(limits = c(0, y_max))) + } + + if (ci_type == "mean_rolling_10_days") { + # --- Daily precipitation bars --- + max_rain <- max(rain_joined$rain_mm, na.rm = TRUE) + if (is.na(max_rain) || max_rain == 0) { + return(g + ggplot2::scale_y_continuous(limits = c(0, y_max))) + } + + scale_factor <- y_max / max_rain + + bar_width <- switch(x_var, + "DAH" = 1, + "week" = 0.5, + 1 # Date: 1 day width handled by ggplot + ) + + g + + ggplot2::geom_col( + data = rain_joined, + ggplot2::aes(x = .data[[x_var]], y = rain_mm * scale_factor), + fill = "steelblue", + alpha = 0.65, + width = bar_width, + inherit.aes = FALSE + ) + + ggplot2::scale_y_continuous( + limits = c(0, y_max), + sec.axis = ggplot2::sec_axis( + ~ . / scale_factor, + name = tr_key("lbl_rain_mm_day", "Precipitation (mm/day)") + ) + ) + + } else if (ci_type == "cumulative_CI") { + # --- Cumulative precipitation area --- + max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE) + if (is.na(max_cum_rain) || max_cum_rain == 0) { + return(g) + } + + scale_factor <- (y_max * 0.30) / max_cum_rain + + g + + ggplot2::geom_line( + data = rain_joined, + ggplot2::aes(x = .data[[x_var]], y = cum_rain_mm * scale_factor), + color = "steelblue", + linewidth = 0.8, + inherit.aes = FALSE + ) + + ggplot2::scale_y_continuous( + limits = c(0, y_max), + sec.axis = ggplot2::sec_axis( + ~ . / scale_factor, + name = tr_key("lbl_cum_rain_mm", "Cumulative Rain (mm)") + ) + ) + + } else { + g + } +} + +# ============================================================================ +# PLOT OVERLAY — faceted "both" plot +# ============================================================================ + +#' Add rainfall overlays to a faceted (plot_type = "both") CI ggplot +#' +#' ggplot2 does not support per-facet secondary axes with free_y scales. +#' This function adds the rain layers to the correct facets by setting +#' ci_type_label to match the facet strip, but omits secondary axis labels +#' to avoid misleading scale information across facets. +#' +#' Rolling mean facet receives: daily precipitation bars. +#' Cumulative facet receives: cumulative precipitation filled area. +#' +#' @param g ggplot object with facet_wrap(~ci_type_label). +#' @param rain_joined Result of rain_join_to_ci(). +#' @param rolling_mean_label Character. Must match the strip label for the +#' rolling mean facet (from tr_key). +#' @param cumulative_label Character. Must match the strip label for the +#' cumulative CI facet (from tr_key). +#' @param x_var Character: "DAH", "week", or "Date". +#' @param y_max_abs Numeric. Primary y-axis max for rolling mean facet (typically 7). +#' @param y_max_cum Numeric. Primary y-axis max for cumulative CI facet. +#' @return Modified ggplot object. +rain_add_to_faceted_plot <- function(g, rain_joined, rolling_mean_label, cumulative_label, + x_var, y_max_abs, y_max_cum) { + if (is.null(rain_joined) || nrow(rain_joined) == 0) return(g) + + # --- Bars in rolling mean facet --- + max_rain <- max(rain_joined$rain_mm, na.rm = TRUE) + if (!is.na(max_rain) && max_rain > 0) { + scale_abs <- y_max_abs / max_rain + bar_width <- switch(x_var, "DAH" = 1, "week" = 0.5, 1) + + rain_bars <- rain_joined %>% + dplyr::mutate( + ci_type_label = rolling_mean_label, + y_scaled = rain_mm * scale_abs + ) + + g <- g + + ggplot2::geom_col( + data = rain_bars, + ggplot2::aes(x = .data[[x_var]], y = y_scaled), + fill = "steelblue", + alpha = 0.65, + width = bar_width, + inherit.aes = FALSE + ) + } + + # --- Area in cumulative facet --- + max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE) + if (!is.na(max_cum_rain) && max_cum_rain > 0) { + scale_cum <- (y_max_cum * 0.30) / max_cum_rain + + rain_area <- rain_joined %>% + dplyr::mutate( + ci_type_label = cumulative_label, + y_scaled = cum_rain_mm * scale_cum + ) + + g <- g + + ggplot2::geom_line( + data = rain_area, + ggplot2::aes(x = .data[[x_var]], y = y_scaled), + color = "steelblue", + linewidth = 0.8, + inherit.aes = FALSE + ) + } + + g +} diff --git a/r_app/90_report_utils.R b/r_app/90_report_utils.R index 1ce61c2..25d7d20 100644 --- a/r_app/90_report_utils.R +++ b/r_app/90_report_utils.R @@ -13,8 +13,15 @@ #' subchunkify <- function(g, fig_height=7, fig_width=5) { g_deparsed <- paste0(deparse( - function() {g} - ), collapse = '') + function() { + if (inherits(g, c("gtable", "grob", "gTree"))) { + grid::grid.newpage() + grid::grid.draw(g) + } else { + print(g) + } + } + ), collapse = '\n') sub_chunk <- paste0(" `","``{r sub_chunk_", floor(runif(1) * 10000), ", fig.height=", fig_height, ", fig.width=", fig_width, ", dpi=300, dev='png', out.width='100%', echo=FALSE}", @@ -394,9 +401,11 @@ ci_plot <- function(pivotName, #' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE) #' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE) #' @param benchmark_percentiles Vector of percentiles for benchmarks (default: c(10, 50, 90)) +#' @param rain_data data.frame(date, rain_mm) for this field's current season, +#' as returned by rain_fetch_for_fields() / rain_join_to_ci(). NULL disables overlay. #' @return NULL (adds output directly to R Markdown document) #' -cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "absolute", facet_on = FALSE, x_unit = "days", colorblind_friendly = FALSE, show_benchmarks = FALSE, estate_name = NULL, benchmark_percentiles = c(10, 50, 90), benchmark_data = NULL) { +cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "absolute", facet_on = FALSE, x_unit = "days", colorblind_friendly = FALSE, show_benchmarks = FALSE, estate_name = NULL, benchmark_percentiles = c(10, 50, 90), benchmark_data = NULL, rain_data = NULL) { # Input validation if (missing(pivotName) || is.null(pivotName) || pivotName == "") { stop("pivotName is required") @@ -638,11 +647,25 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE)) } - # Add y-axis limits for absolute CI (10-day rolling mean) to fix scale at 0-7 - if (ci_type_filter == "mean_rolling_10_days") { - g <- g + ggplot2::ylim(0, 7) + # Y-axis limits + optional rain overlay (single-panel plots) + y_max <- if (ci_type_filter == "mean_rolling_10_days") { + 7 + } else { + max(plot_data$ci_value, na.rm = TRUE) * 1.05 } - + + if (!is.null(rain_data)) { + latest_ci_dates <- plot_data %>% + dplyr::filter(is_latest) %>% + dplyr::distinct(Date, DAH, week) + rain_joined <- rain_join_to_ci(rain_data, latest_ci_dates) + g <- rain_add_to_plot(g, rain_joined, ci_type_filter, x_var, y_max) + } else { + if (ci_type_filter == "mean_rolling_10_days") { + g <- g + ggplot2::ylim(0, y_max) + } + } + return(g) } @@ -654,174 +677,20 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " g <- create_plot("cumulative_CI", cumulative_label, "") subchunkify(g, 2.8, 10) } else if (plot_type == "both") { - # Create faceted plot with both CI types using pivot_longer approach - plot_data_both <- data_ci3 %>% - dplyr::filter(season %in% unique_seasons) %>% - dplyr::mutate( - ci_type_label = factor(case_when( - ci_type == "mean_rolling_10_days" ~ rolling_mean_label, - ci_type == "cumulative_CI" ~ cumulative_label, - TRUE ~ ci_type - ), levels = c(rolling_mean_label, cumulative_label)), - is_latest = season == latest_season # Flag for latest season - ) - - # Determine x-axis variable based on x_unit parameter - x_var <- if (x_unit == "days") { - if (facet_on) "Date" else "DAH" - } else { - "week" - } - - x_label <- switch(x_unit, - "days" = if (facet_on) tr_key("lbl_date", "Date") else tr_key("lbl_age_of_crop_days", "Age of Crop (Days)"), - "weeks" = tr_key("lbl_week_number", "Week Number")) - - # Choose color palette based on colorblind_friendly flag - color_scale <- if (colorblind_friendly) { - ggplot2::scale_color_brewer(type = "qual", palette = "Set2") - } else { - ggplot2::scale_color_discrete() - } - - # Calculate dynamic max values for breaks - max_dah_both <- max(plot_data_both$DAH, na.rm = TRUE) + 20 - max_week_both <- max(as.numeric(plot_data_both$week), na.rm = TRUE) + ceiling(20 / 7) - - # Pre-evaluate translated title here (not inside labs()) so {pivotName} resolves correctly + # Build each panel independently so each gets its own secondary rain y-axis. + # (facet_wrap + free_y does not support per-facet sec.axis in ggplot2.) both_plot_title <- tr_key("lbl_ci_analysis_title", "CI Analysis for Field {pivotName}") - # Create the faceted plot - g_both <- ggplot2::ggplot(data = plot_data_both) + - # Add benchmark lines first (behind season lines) - { - if (!is.null(benchmark_data)) { - # Clip benchmark to max DAH of plotted seasons + 10% buffer - max_dah_clip <- max(plot_data_both$DAH, na.rm = TRUE) * 1.1 - benchmark_subset <- benchmark_data %>% - dplyr::filter(DAH <= max_dah_clip) %>% - dplyr::mutate( - benchmark_x = if (x_var == "DAH") { - DAH - } else if (x_var == "week") { - DAH / 7 - } else { - DAH - }, - ci_type_label = factor(case_when( - ci_type == "value" ~ rolling_mean_label, - ci_type == "cumulative_CI" ~ cumulative_label, - TRUE ~ ci_type - ), levels = c(rolling_mean_label, cumulative_label)) - ) - ggplot2::geom_smooth( - data = benchmark_subset, - ggplot2::aes( - x = .data[["benchmark_x"]], - y = .data[["benchmark_value"]], - group = factor(.data[["percentile"]]) - ), - color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE, fullrange = FALSE - ) - } - } + - ggplot2::facet_wrap(~ci_type_label, scales = "free_y") + - # Plot older seasons with lighter lines - ggplot2::geom_line( - data = plot_data_both %>% dplyr::filter(!is_latest), - ggplot2::aes( - x = .data[[x_var]], - y = .data[["ci_value"]], - col = .data[["season"]], - group = .data[["season"]] - ), - linewidth = 0.7, alpha = 0.4 - ) + - # Plot latest season with thicker, more prominent line - ggplot2::geom_line( - data = plot_data_both %>% dplyr::filter(is_latest), - ggplot2::aes( - x = .data[[x_var]], - y = .data[["ci_value"]], - col = .data[["season"]], - group = .data[["season"]] - ), - linewidth = 1.5, alpha = 1 - ) + - ggplot2::labs(title = both_plot_title, - color = tr_key("lbl_season", "Season"), - y = tr_key("lbl_ci_value", "CI Value"), - x = x_label) + - color_scale + - { - if (x_var == "DAH") { - # Dynamic breaks based on actual data range - dah_breaks <- scales::pretty_breaks(n = 5)(c(0, max_dah_both)) - # Month breaks: in secondary axis scale (month numbers 0, 1, 2, ...) - n_months <- ceiling(max_dah_both / 30.44) - month_breaks <- seq(0, n_months, by = 1) - ggplot2::scale_x_continuous( - breaks = dah_breaks, - sec.axis = ggplot2::sec_axis( - ~ . / 30.44, - name = tr_key("lbl_age_in_months", "Age in Months"), - breaks = month_breaks, - labels = function(x) as.integer(x) # Show all month labels - ) - ) - } else if (x_var == "week") { - # Dynamic breaks based on actual data range - week_breaks <- scales::pretty_breaks(n = 5)(c(0, max_week_both)) - # Month breaks: in secondary axis scale (month numbers 0, 1, 2, ...) - n_months <- ceiling(max_week_both / 4.348) - month_breaks <- seq(0, n_months, by = 1) - ggplot2::scale_x_continuous( - breaks = week_breaks, - sec.axis = ggplot2::sec_axis( - ~ . / 4.348, - name = tr_key("lbl_age_in_months", "Age in Months"), - breaks = month_breaks, - labels = function(x) as.integer(x) # Show all month labels - ) - ) - } else if (x_var == "Date") { - ggplot2::scale_x_date(breaks = "1 month", date_labels = "%b-%Y", sec.axis = ggplot2::sec_axis(~ ., name = tr_key("lbl_age_in_months", "Age in Months"), breaks = scales::breaks_pretty())) - } - } + - ggplot2::theme_minimal() + - ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5), - axis.text.x.top = ggplot2::element_text(hjust = 0.5), - axis.title.x.top = ggplot2::element_text(size = 8), - legend.justification = c(1, 0), - legend.position = "inside", - legend.position.inside = c(1, 0), - legend.title = ggplot2::element_text(size = 8), - legend.text = ggplot2::element_text(size = 8)) + - ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE)) - - # For the rolling mean data, we want to set reasonable y-axis limits - # Since we're using free_y scales, each facet will have its own y-axis - # The rolling mean will automatically scale to its data range, - # but we can ensure it shows the 0-7 context by adding invisible points - - # Add invisible points to set the y-axis range for rolling mean facet - dummy_data <- data.frame( - ci_type_label = rolling_mean_label, - ci_value = c(0, 7), - stringsAsFactors = FALSE - ) - dummy_data[[x_var]] <- range(plot_data_both[[x_var]], na.rm = TRUE) - dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]])) - - g_both <- g_both + - ggplot2::geom_point( - data = dummy_data, - ggplot2::aes(x = .data[[x_var]], y = .data[["ci_value"]]), - alpha = 0, size = 0 - ) # Invisible points to set scale - - # Display the combined faceted plot - subchunkify(g_both, 2.8, 10) + g_abs <- create_plot("mean_rolling_10_days", rolling_mean_label, "") + + ggplot2::labs(title = rolling_mean_label) + + ggplot2::theme(legend.position = "none") + + g_cum <- create_plot("cumulative_CI", cumulative_label, "") + + ggplot2::labs(title = cumulative_label) + + combined <- gridExtra::arrangeGrob(g_abs, g_cum, ncol = 2, top = both_plot_title) + + subchunkify(combined, 2.8, 10) } }, error = function(e) {