SmartCane/r_app/90_rainfall_utils.R

377 lines
14 KiB
R

# 90_RAINFALL_UTILS.R
# ============================================================================
# Rainfall data fetching and ggplot overlay utilities for SmartCane CI reports.
#
# Provider-agnostic design: to swap weather provider, implement a new
# rain_fetch_<provider>() function and update the dispatch in rain_fetch().
#
# EXPORTS:
# - rain_snap_to_tile() Snap lat/lon to Open-Meteo native 0.25° grid
# - rain_fetch() Generic fetch wrapper (dispatches to provider)
# - rain_fetch_for_fields() Batch fetch with spatial tile deduplication
# - rain_join_to_ci() Join rain to latest-season CI data by Date
# - rain_add_to_plot() Overlay rain on single CI plot (abs or cum)
# - rain_add_to_faceted_plot() Overlay rain on faceted "both" CI plot
# ============================================================================
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(httr2)
library(purrr)
})
# ============================================================================
# PROVIDER LAYER — swap here to change data source
# ============================================================================
#' Fetch daily precipitation from Open-Meteo ERA5 archive
#'
#' ERA5 covers 1940-present at 0.25° (~28 km) resolution.
#' No API key required.
#'
#' @param latitude Numeric. WGS84 latitude.
#' @param longitude Numeric. WGS84 longitude.
#' @param start_date Date or "YYYY-MM-DD" string.
#' @param end_date Date or "YYYY-MM-DD" string.
#' @return data.frame with columns: date (Date), rain_mm (numeric)
rain_fetch_open_meteo_archive <- function(latitude, longitude, start_date, end_date) {
url <- paste0(
"https://archive-api.open-meteo.com/v1/archive",
"?latitude=", latitude,
"&longitude=", longitude,
"&daily=precipitation_sum",
"&start_date=", format(as.Date(start_date), "%Y-%m-%d"),
"&end_date=", format(as.Date(end_date), "%Y-%m-%d"),
"&timezone=auto"
)
resp <- tryCatch(
httr2::request(url) %>% httr2::req_timeout(30) %>% httr2::req_perform(),
error = function(e) stop(paste("Rain archive request failed:", e$message), call. = FALSE)
)
if (httr2::resp_status(resp) != 200) {
stop(paste("Rain archive API returned status", httr2::resp_status(resp)), call. = FALSE)
}
body <- httr2::resp_body_json(resp)
if (is.null(body$daily) || is.null(body$daily$time)) {
stop("Unexpected Open-Meteo archive response: missing daily/time.", call. = FALSE)
}
data.frame(
date = as.Date(unlist(body$daily$time)),
rain_mm = as.numeric(unlist(body$daily$precipitation_sum)),
stringsAsFactors = FALSE
)
}
# ============================================================================
# GENERIC FETCH WRAPPER
# ============================================================================
#' Fetch daily precipitation via the configured provider
#'
#' @param latitude Numeric.
#' @param longitude Numeric.
#' @param start_date Date or "YYYY-MM-DD".
#' @param end_date Date or "YYYY-MM-DD".
#' @param provider Character. Currently only "open_meteo_archive".
#' @return data.frame(date, rain_mm)
rain_fetch <- function(latitude, longitude, start_date, end_date,
provider = "open_meteo_archive") {
# --- ADD NEW PROVIDERS HERE ---
switch(provider,
open_meteo_archive = rain_fetch_open_meteo_archive(
latitude, longitude, start_date, end_date
),
stop(paste("Unknown rain provider:", provider), call. = FALSE)
)
}
# ============================================================================
# SPATIAL HELPERS
# ============================================================================
#' Snap a coordinate to the nearest ERA5 tile centre
#'
#' Open-Meteo ERA5 has 0.25° native resolution (~28 km). Snapping coordinates
#' to this grid avoids redundant API calls for nearby fields.
#'
#' @param latitude Numeric.
#' @param longitude Numeric.
#' @param resolution Numeric. Grid resolution in degrees (default 0.25).
#' @return Named list: tile_lat, tile_lon, tile_id
rain_snap_to_tile <- function(latitude, longitude, resolution = 0.25) {
tile_lat <- round(latitude / resolution) * resolution
tile_lon <- round(longitude / resolution) * resolution
list(
tile_lat = tile_lat,
tile_lon = tile_lon,
tile_id = paste0(tile_lat, "_", tile_lon)
)
}
# ============================================================================
# BATCH FETCH WITH TILE DEDUPLICATION
# ============================================================================
#' Fetch rain for multiple fields, calling the API once per unique 0.25° tile
#'
#' Fields that fall on the same ERA5 tile share one API call. This handles
#' estates spread over 100+ km (e.g. Angata) without over-calling the API.
#'
#' @param centroids_df data.frame with columns: field_id, latitude, longitude
#' @param start_date Date or "YYYY-MM-DD". Start of fetch window.
#' @param end_date Date or "YYYY-MM-DD". End of fetch window.
#' @param provider Character. Passed to rain_fetch().
#' @return data.frame with columns: field_id, date, rain_mm
rain_fetch_for_fields <- function(centroids_df, start_date, end_date,
provider = "open_meteo_archive") {
required_cols <- c("field_id", "latitude", "longitude")
if (!all(required_cols %in% names(centroids_df))) {
stop(paste("centroids_df must have:", paste(required_cols, collapse = ", ")), call. = FALSE)
}
# Snap each field to its tile
centroids_tiled <- centroids_df %>%
dplyr::mutate(
tile_info = purrr::map2(latitude, longitude, rain_snap_to_tile),
tile_lat = purrr::map_dbl(tile_info, "tile_lat"),
tile_lon = purrr::map_dbl(tile_info, "tile_lon"),
tile_id = purrr::map_chr(tile_info, "tile_id")
) %>%
dplyr::select(-tile_info)
# Fetch once per unique tile
unique_tiles <- centroids_tiled %>%
dplyr::distinct(tile_id, tile_lat, tile_lon)
tile_rain <- unique_tiles %>%
dplyr::mutate(rain_data = purrr::pmap(
list(tile_lat, tile_lon),
function(lat, lon) {
tryCatch(
rain_fetch(lat, lon, start_date, end_date, provider = provider),
error = function(e) {
safe_log(paste0("Rain fetch failed for tile ", lat, "_", lon, ": ", e$message), "WARNING")
data.frame(date = as.Date(character(0)), rain_mm = numeric(0))
}
)
}
))
# Join tile rain back to fields and unnest
centroids_tiled %>%
dplyr::left_join(tile_rain %>% dplyr::select(tile_id, rain_data), by = "tile_id") %>%
dplyr::select(field_id, rain_data) %>%
tidyr::unnest(rain_data)
}
# ============================================================================
# PROCESSING
# ============================================================================
#' Join rain data to latest-season CI data by Date
#'
#' @param rain_df data.frame(date, rain_mm) for the field.
#' @param ci_season_data data.frame with at minimum: Date, DAH, week columns
#' (latest season only, one row per date).
#' @return data.frame with Date, DAH, week, rain_mm, cum_rain_mm columns,
#' ordered by Date. Only dates present in ci_season_data are returned.
rain_join_to_ci <- function(rain_df, ci_season_data) {
if (is.null(rain_df) || nrow(rain_df) == 0) return(NULL)
ci_dates <- ci_season_data %>%
dplyr::distinct(Date, DAH, week) %>%
dplyr::arrange(Date)
joined <- ci_dates %>%
dplyr::left_join(
rain_df %>% dplyr::rename(Date = date),
by = "Date"
) %>%
dplyr::mutate(
rain_mm = tidyr::replace_na(rain_mm, 0),
cum_rain_mm = cumsum(rain_mm)
)
joined
}
# ============================================================================
# PLOT OVERLAY — single plot (absolute or cumulative)
# ============================================================================
#' Add a rainfall overlay to a single-panel CI ggplot
#'
#' For absolute CI (mean_rolling_10_days): adds daily precipitation bars
#' (steelblue, semi-transparent) scaled to the primary y-axis range, with a
#' secondary right y-axis labelled in mm/day.
#'
#' For cumulative CI: adds a cumulative precipitation filled area scaled to the
#' primary y-axis range, with a secondary right y-axis labelled in mm.
#'
#' The secondary axis is a linear transform of the primary axis (ggplot2
#' requirement). The rain values are scaled so the maximum rain never exceeds
#' y_max on screen. The right axis labels show the real mm values.
#'
#' @param g ggplot object.
#' @param rain_joined Result of rain_join_to_ci(). NULL returns g unchanged.
#' @param ci_type "mean_rolling_10_days" or "cumulative_CI".
#' @param x_var Character: "DAH", "week", or "Date".
#' @param y_max Numeric. Max value of the primary y-axis (used for scaling
#' and for setting limits = c(0, y_max) on the primary axis).
#' @return Modified ggplot object.
rain_add_to_plot <- function(g, rain_joined, ci_type, x_var, y_max) {
if (is.null(rain_joined) || nrow(rain_joined) == 0) {
# Still enforce y limits even without rain
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
}
if (ci_type == "mean_rolling_10_days") {
# --- Daily precipitation bars ---
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
if (is.na(max_rain) || max_rain == 0) {
return(g + ggplot2::scale_y_continuous(limits = c(0, y_max)))
}
scale_factor <- y_max / max_rain
bar_width <- switch(x_var,
"DAH" = 1,
"week" = 0.5,
1 # Date: 1 day width handled by ggplot
)
g +
ggplot2::geom_col(
data = rain_joined,
ggplot2::aes(x = .data[[x_var]], y = rain_mm * scale_factor),
fill = "steelblue",
alpha = 0.65,
width = bar_width,
inherit.aes = FALSE
) +
ggplot2::scale_y_continuous(
limits = c(0, y_max),
sec.axis = ggplot2::sec_axis(
~ . / scale_factor,
name = tr_key("lbl_rain_mm_day", "Precipitation (mm/day)")
)
)
} else if (ci_type == "cumulative_CI") {
# --- Cumulative precipitation area ---
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
if (is.na(max_cum_rain) || max_cum_rain == 0) {
return(g)
}
scale_factor <- (y_max * 0.30) / max_cum_rain
g +
ggplot2::geom_area(
data = rain_joined,
ggplot2::aes(x = .data[[x_var]], y = cum_rain_mm * scale_factor),
fill = "steelblue",
alpha = 0.15,
inherit.aes = FALSE
) +
ggplot2::geom_line(
data = rain_joined,
ggplot2::aes(x = .data[[x_var]], y = cum_rain_mm * scale_factor),
color = "steelblue",
linewidth = 0.8,
inherit.aes = FALSE
) +
ggplot2::scale_y_continuous(
limits = c(0, y_max),
sec.axis = ggplot2::sec_axis(
~ . / scale_factor,
name = tr_key("lbl_cum_rain_mm", "Cumulative Rain (mm)")
)
)
} else {
g
}
}
# ============================================================================
# PLOT OVERLAY — faceted "both" plot
# ============================================================================
#' Add rainfall overlays to a faceted (plot_type = "both") CI ggplot
#'
#' ggplot2 does not support per-facet secondary axes with free_y scales.
#' This function adds the rain layers to the correct facets by setting
#' ci_type_label to match the facet strip, but omits secondary axis labels
#' to avoid misleading scale information across facets.
#'
#' Rolling mean facet receives: daily precipitation bars.
#' Cumulative facet receives: cumulative precipitation filled area.
#'
#' @param g ggplot object with facet_wrap(~ci_type_label).
#' @param rain_joined Result of rain_join_to_ci().
#' @param rolling_mean_label Character. Must match the strip label for the
#' rolling mean facet (from tr_key).
#' @param cumulative_label Character. Must match the strip label for the
#' cumulative CI facet (from tr_key).
#' @param x_var Character: "DAH", "week", or "Date".
#' @param y_max_abs Numeric. Primary y-axis max for rolling mean facet (typically 7).
#' @param y_max_cum Numeric. Primary y-axis max for cumulative CI facet.
#' @return Modified ggplot object.
rain_add_to_faceted_plot <- function(g, rain_joined, rolling_mean_label, cumulative_label,
x_var, y_max_abs, y_max_cum) {
if (is.null(rain_joined) || nrow(rain_joined) == 0) return(g)
# --- Bars in rolling mean facet ---
max_rain <- max(rain_joined$rain_mm, na.rm = TRUE)
if (!is.na(max_rain) && max_rain > 0) {
scale_abs <- y_max_abs / max_rain
bar_width <- switch(x_var, "DAH" = 1, "week" = 0.5, 1)
rain_bars <- rain_joined %>%
dplyr::mutate(
ci_type_label = rolling_mean_label,
y_scaled = rain_mm * scale_abs
)
g <- g +
ggplot2::geom_col(
data = rain_bars,
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
fill = "steelblue",
alpha = 0.65,
width = bar_width,
inherit.aes = FALSE
)
}
# --- Area in cumulative facet ---
max_cum_rain <- max(rain_joined$cum_rain_mm, na.rm = TRUE)
if (!is.na(max_cum_rain) && max_cum_rain > 0) {
scale_cum <- (y_max_cum * 0.30) / max_cum_rain
rain_area <- rain_joined %>%
dplyr::mutate(
ci_type_label = cumulative_label,
y_scaled = cum_rain_mm * scale_cum
)
g <- g +
ggplot2::geom_line(
data = rain_area,
ggplot2::aes(x = .data[[x_var]], y = y_scaled),
color = "steelblue",
linewidth = 0.8,
inherit.aes = FALSE
)
}
g
}