- Implemented `weather_api_comparison.py` to compare daily precipitation from multiple weather APIs for Arnhem, Netherlands and Angata, Kenya. - Integrated fetching functions for various weather data sources including Open-Meteo, NASA POWER, OpenWeatherMap, and WeatherAPI.com. - Added plotting functions to visualize archive and forecast data, including cumulative precipitation and comparison against ERA5 reference. - Created `90_rainfall_utils.R` for R to fetch rainfall data and overlay it on CI plots, supporting multiple providers with a generic fetch wrapper. - Included spatial helpers for efficient API calls based on unique geographical tiles.
370 lines
14 KiB
R
370 lines
14 KiB
R
# 90_RAINFALL_UTILS.R
|
|
# ============================================================================
|
|
# Rainfall data fetching and ggplot overlay utilities for SmartCane CI reports.
|
|
#
|
|
# Provider-agnostic design: to swap weather provider, implement a new
|
|
# rain_fetch_<provider>() function and update the dispatch in rain_fetch().
|
|
#
|
|
# EXPORTS:
|
|
# - rain_snap_to_tile() Snap lat/lon to Open-Meteo native 0.25° grid
|
|
# - rain_fetch() Generic fetch wrapper (dispatches to provider)
|
|
# - rain_fetch_for_fields() Batch fetch with spatial tile deduplication
|
|
# - rain_join_to_ci() Join rain to latest-season CI data by Date
|
|
# - rain_add_to_plot() Overlay rain on single CI plot (abs or cum)
|
|
# - rain_add_to_faceted_plot() Overlay rain on faceted "both" CI plot
|
|
# ============================================================================
|
|
|
|
suppressPackageStartupMessages({
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(httr2)
|
|
library(purrr)
|
|
})
|
|
|
|
# ============================================================================
|
|
# PROVIDER LAYER — swap here to change data source
|
|
# ============================================================================
|
|
|
|
#' Fetch daily precipitation from Open-Meteo ERA5 archive
|
|
#'
|
|
#' ERA5 covers 1940-present at 0.25° (~28 km) resolution.
|
|
#' No API key required.
|
|
#'
|
|
#' @param latitude Numeric. WGS84 latitude.
|
|
#' @param longitude Numeric. WGS84 longitude.
|
|
#' @param start_date Date or "YYYY-MM-DD" string.
|
|
#' @param end_date Date or "YYYY-MM-DD" string.
|
|
#' @return data.frame with columns: date (Date), rain_mm (numeric)
|
|
rain_fetch_open_meteo_archive <- function(latitude, longitude, start_date, end_date) {
|
|
url <- paste0(
|
|
"https://archive-api.open-meteo.com/v1/archive",
|
|
"?latitude=", latitude,
|
|
"&longitude=", longitude,
|
|
"&daily=precipitation_sum",
|
|
"&start_date=", format(as.Date(start_date), "%Y-%m-%d"),
|
|
"&end_date=", format(as.Date(end_date), "%Y-%m-%d"),
|
|
"&timezone=auto"
|
|
)
|
|
|
|
resp <- tryCatch(
|
|
httr2::request(url) %>% httr2::req_timeout(30) %>% httr2::req_perform(),
|
|
error = function(e) stop(paste("Rain archive request failed:", e$message), call. = FALSE)
|
|
)
|
|
|
|
if (httr2::resp_status(resp) != 200) {
|
|
stop(paste("Rain archive API returned status", httr2::resp_status(resp)), call. = FALSE)
|
|
}
|
|
|
|
body <- httr2::resp_body_json(resp)
|
|
|
|
if (is.null(body$daily) || is.null(body$daily$time)) {
|
|
stop("Unexpected Open-Meteo archive response: missing daily/time.", call. = FALSE)
|
|
}
|
|
|
|
data.frame(
|
|
date = as.Date(unlist(body$daily$time)),
|
|
rain_mm = as.numeric(unlist(body$daily$precipitation_sum)),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
}
|
|
|
|
# ============================================================================
|
|
# GENERIC FETCH WRAPPER
|
|
# ============================================================================
|
|
|
|
#' Fetch daily precipitation via the configured provider
|
|
#'
|
|
#' @param latitude Numeric.
|
|
#' @param longitude Numeric.
|
|
#' @param start_date Date or "YYYY-MM-DD".
|
|
#' @param end_date Date or "YYYY-MM-DD".
|
|
#' @param provider Character. Currently only "open_meteo_archive".
|
|
#' @return data.frame(date, rain_mm)
|
|
rain_fetch <- function(latitude, longitude, start_date, end_date,
|
|
provider = "open_meteo_archive") {
|
|
# --- ADD NEW PROVIDERS HERE ---
|
|
switch(provider,
|
|
open_meteo_archive = rain_fetch_open_meteo_archive(
|
|
latitude, longitude, start_date, end_date
|
|
),
|
|
stop(paste("Unknown rain provider:", provider), call. = FALSE)
|
|
)
|
|
}
|
|
|
|
# ============================================================================
|
|
# SPATIAL HELPERS
|
|
# ============================================================================
|
|
|
|
#' Snap a coordinate to the nearest ERA5 tile centre
|
|
#'
|
|
#' Open-Meteo ERA5 has 0.25° native resolution (~28 km). Snapping coordinates
|
|
#' to this grid avoids redundant API calls for nearby fields.
|
|
#'
|
|
#' @param latitude Numeric.
|
|
#' @param longitude Numeric.
|
|
#' @param resolution Numeric. Grid resolution in degrees (default 0.25).
|
|
#' @return Named list: tile_lat, tile_lon, tile_id
|
|
rain_snap_to_tile <- function(latitude, longitude, resolution = 0.25) {
|
|
tile_lat <- round(latitude / resolution) * resolution
|
|
tile_lon <- round(longitude / resolution) * resolution
|
|
list(
|
|
tile_lat = tile_lat,
|
|
tile_lon = tile_lon,
|
|
tile_id = paste0(tile_lat, "_", tile_lon)
|
|
)
|
|
}
|
|
|
|
# ============================================================================
|
|
# BATCH FETCH WITH TILE DEDUPLICATION
|
|
# ============================================================================
|
|
|
|
#' Fetch rain for multiple fields, calling the API once per unique 0.25° tile
|
|
#'
|
|
#' Fields that fall on the same ERA5 tile share one API call. This handles
|
|
#' estates spread over 100+ km (e.g. Angata) without over-calling the API.
|
|
#'
|
|
#' @param centroids_df data.frame with columns: field_id, latitude, longitude
|
|
#' @param start_date Date or "YYYY-MM-DD". Start of fetch window.
|
|
#' @param end_date Date or "YYYY-MM-DD". End of fetch window.
|
|
#' @param provider Character. Passed to rain_fetch().
|
|
#' @return data.frame with columns: field_id, date, rain_mm
|
|
rain_fetch_for_fields <- function(centroids_df, start_date, end_date,
|
|
provider = "open_meteo_archive") {
|
|
required_cols <- c("field_id", "latitude", "longitude")
|
|
if (!all(required_cols %in% names(centroids_df))) {
|
|
stop(paste("centroids_df must have:", paste(required_cols, collapse = ", ")), call. = FALSE)
|
|
}
|
|
|
|
# Snap each field to its tile
|
|
centroids_tiled <- centroids_df %>%
|
|
dplyr::mutate(
|
|
tile_info = purrr::map2(latitude, longitude, rain_snap_to_tile),
|
|
tile_lat = purrr::map_dbl(tile_info, "tile_lat"),
|
|
tile_lon = purrr::map_dbl(tile_info, "tile_lon"),
|
|
tile_id = purrr::map_chr(tile_info, "tile_id")
|
|
) %>%
|
|
dplyr::select(-tile_info)
|
|
|
|
# Fetch once per unique tile
|
|
unique_tiles <- centroids_tiled %>%
|
|
dplyr::distinct(tile_id, tile_lat, tile_lon)
|
|
|
|
tile_rain <- unique_tiles %>%
|
|
dplyr::mutate(rain_data = purrr::pmap(
|
|
list(tile_lat, tile_lon),
|
|
function(lat, lon) {
|
|
tryCatch(
|
|
rain_fetch(lat, lon, start_date, end_date, provider = provider),
|
|
error = function(e) {
|
|
safe_log(paste0("Rain fetch failed for tile ", lat, "_", lon, ": ", e$message), "WARNING")
|
|
data.frame(date = as.Date(character(0)), rain_mm = numeric(0))
|
|
}
|
|
)
|
|
}
|
|
))
|
|
|
|
# Join tile rain back to fields and unnest
|
|
centroids_tiled %>%
|
|
dplyr::left_join(tile_rain %>% dplyr::select(tile_id, rain_data), by = "tile_id") %>%
|
|
dplyr::select(field_id, rain_data) %>%
|
|
tidyr::unnest(rain_data)
|
|
}
|
|
|
|
# ============================================================================
|
|
# PROCESSING
|
|
# ============================================================================
|
|
|
|
#' Join rain data to latest-season CI data by Date
|
|
#'
|
|
#' @param rain_df data.frame(date, rain_mm) for the field.
|
|
#' @param ci_season_data data.frame with at minimum: Date, DAH, week columns
|
|
#' (latest season only, one row per date).
|
|
#' @return data.frame with Date, DAH, week, rain_mm, cum_rain_mm columns,
|
|
#' ordered by Date. Only dates present in ci_season_data are returned.
|
|
rain_join_to_ci <- function(rain_df, ci_season_data) {
|
|
if (is.null(rain_df) || nrow(rain_df) == 0) return(NULL)
|
|
|
|
ci_dates <- ci_season_data %>%
|
|
dplyr::distinct(Date, DAH, week) %>%
|
|
dplyr::arrange(Date)
|
|
|
|
joined <- ci_dates %>%
|
|
dplyr::left_join(
|
|
rain_df %>% dplyr::rename(Date = date),
|
|
by = "Date"
|
|
) %>%
|
|
dplyr::mutate(
|
|
rain_mm = tidyr::replace_na(rain_mm, 0),
|
|
cum_rain_mm = cumsum(rain_mm)
|
|
)
|
|
|
|
joined
|
|
}
|
|
|
|
# ============================================================================
|
|
# PLOT OVERLAY — single plot (absolute or cumulative)
|
|
# ============================================================================
|
|
|
|
#' Add a rainfall overlay to a single-panel CI ggplot
|
|
#'
|
|
#' For absolute CI (mean_rolling_10_days): adds daily precipitation bars
|
|
#' (steelblue, semi-transparent) scaled to the primary y-axis range, with a
|
|
#' secondary right y-axis labelled in mm/day.
|
|
#'
|
|
#' For cumulative CI: adds a cumulative precipitation filled area scaled to the
|
|
#' primary y-axis range, with a secondary right y-axis labelled in mm.
|
|
#'
|
|
#' The secondary axis is a linear transform of the primary axis (ggplot2
|
|
#' requirement). The rain values are scaled so the maximum rain never exceeds
|
|
#' y_max on screen. The right axis labels show the real mm values.
|
|
#'
|
|
#' @param g ggplot object.
|
|
#' @param rain_joined Result of rain_join_to_ci(). NULL returns g unchanged.
|
|
#' @param ci_type "mean_rolling_10_days" or "cumulative_CI".
|
|
#' @param x_var Character: "DAH", "week", or "Date".
|
|
#' @param y_max Numeric. Max value of the primary y-axis (used for scaling
|
|
#' and for setting limits = c(0, y_max) on the primary axis).
|
|
#' @return Modified ggplot object.
|
|
rain_add_to_plot <- function(g, rain_joined, ci_type, x_var, y_max) {
|
|
if (is.null(rain_joined) || nrow(rain_joined) == 0) {
|
|
# Still enforce y limits even without rain
|
|
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
|
|
}
|
|
|
|
if (ci_type == "mean_rolling_10_days") {
|
|
# --- Daily precipitation bars ---
|
|
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
|
|
if (is.na(max_rain) || max_rain == 0) {
|
|
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
|
|
}
|
|
|
|
scale_factor <- y_max / max_rain
|
|
|
|
bar_width <- switch(x_var,
|
|
"DAH" = 1,
|
|
"week" = 0.5,
|
|
1 # Date: 1 day width handled by ggplot
|
|
)
|
|
|
|
g +
|
|
ggplot2::geom_col(
|
|
data = rain_joined,
|
|
ggplot2::aes(x = .data[[x_var]], y = rain_mm * scale_factor),
|
|
fill = "steelblue",
|
|
alpha = 0.65,
|
|
width = bar_width,
|
|
inherit.aes = FALSE
|
|
) +
|
|
ggplot2::scale_y_continuous(
|
|
limits = c(0, y_max),
|
|
sec.axis = ggplot2::sec_axis(
|
|
~ . / scale_factor,
|
|
name = tr_key("lbl_rain_mm_day", "Precipitation (mm/day)")
|
|
)
|
|
)
|
|
|
|
} else if (ci_type == "cumulative_CI") {
|
|
# --- Cumulative precipitation area ---
|
|
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
|
|
if (is.na(max_cum_rain) || max_cum_rain == 0) {
|
|
return(g)
|
|
}
|
|
|
|
scale_factor <- (y_max * 0.30) / max_cum_rain
|
|
|
|
g +
|
|
ggplot2::geom_line(
|
|
data = rain_joined,
|
|
ggplot2::aes(x = .data[[x_var]], y = cum_rain_mm * scale_factor),
|
|
color = "steelblue",
|
|
linewidth = 0.8,
|
|
inherit.aes = FALSE
|
|
) +
|
|
ggplot2::scale_y_continuous(
|
|
limits = c(0, y_max),
|
|
sec.axis = ggplot2::sec_axis(
|
|
~ . / scale_factor,
|
|
name = tr_key("lbl_cum_rain_mm", "Cumulative Rain (mm)")
|
|
)
|
|
)
|
|
|
|
} else {
|
|
g
|
|
}
|
|
}
|
|
|
|
# ============================================================================
|
|
# PLOT OVERLAY — faceted "both" plot
|
|
# ============================================================================
|
|
|
|
#' Add rainfall overlays to a faceted (plot_type = "both") CI ggplot
|
|
#'
|
|
#' ggplot2 does not support per-facet secondary axes with free_y scales.
|
|
#' This function adds the rain layers to the correct facets by setting
|
|
#' ci_type_label to match the facet strip, but omits secondary axis labels
|
|
#' to avoid misleading scale information across facets.
|
|
#'
|
|
#' Rolling mean facet receives: daily precipitation bars.
|
|
#' Cumulative facet receives: cumulative precipitation filled area.
|
|
#'
|
|
#' @param g ggplot object with facet_wrap(~ci_type_label).
|
|
#' @param rain_joined Result of rain_join_to_ci().
|
|
#' @param rolling_mean_label Character. Must match the strip label for the
|
|
#' rolling mean facet (from tr_key).
|
|
#' @param cumulative_label Character. Must match the strip label for the
|
|
#' cumulative CI facet (from tr_key).
|
|
#' @param x_var Character: "DAH", "week", or "Date".
|
|
#' @param y_max_abs Numeric. Primary y-axis max for rolling mean facet (typically 7).
|
|
#' @param y_max_cum Numeric. Primary y-axis max for cumulative CI facet.
|
|
#' @return Modified ggplot object.
|
|
rain_add_to_faceted_plot <- function(g, rain_joined, rolling_mean_label, cumulative_label,
|
|
x_var, y_max_abs, y_max_cum) {
|
|
if (is.null(rain_joined) || nrow(rain_joined) == 0) return(g)
|
|
|
|
# --- Bars in rolling mean facet ---
|
|
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
|
|
if (!is.na(max_rain) && max_rain > 0) {
|
|
scale_abs <- y_max_abs / max_rain
|
|
bar_width <- switch(x_var, "DAH" = 1, "week" = 0.5, 1)
|
|
|
|
rain_bars <- rain_joined %>%
|
|
dplyr::mutate(
|
|
ci_type_label = rolling_mean_label,
|
|
y_scaled = rain_mm * scale_abs
|
|
)
|
|
|
|
g <- g +
|
|
ggplot2::geom_col(
|
|
data = rain_bars,
|
|
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
|
|
fill = "steelblue",
|
|
alpha = 0.65,
|
|
width = bar_width,
|
|
inherit.aes = FALSE
|
|
)
|
|
}
|
|
|
|
# --- Area in cumulative facet ---
|
|
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
|
|
if (!is.na(max_cum_rain) && max_cum_rain > 0) {
|
|
scale_cum <- (y_max_cum * 0.30) / max_cum_rain
|
|
|
|
rain_area <- rain_joined %>%
|
|
dplyr::mutate(
|
|
ci_type_label = cumulative_label,
|
|
y_scaled = cum_rain_mm * scale_cum
|
|
)
|
|
|
|
g <- g +
|
|
ggplot2::geom_line(
|
|
data = rain_area,
|
|
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
|
|
color = "steelblue",
|
|
linewidth = 0.8,
|
|
inherit.aes = FALSE
|
|
)
|
|
}
|
|
|
|
g
|
|
}
|