Refactor parallel processing in growth model utilities and streamline field alert generation in report utilities

This commit is contained in:
Timon 2026-03-10 14:06:45 +01:00
parent 74aa881ec3
commit 8924fd1273
4 changed files with 410 additions and 395 deletions

View file

@ -71,54 +71,43 @@ load_combined_ci_data <- function(daily_vals_dir, harvesting_data = NULL) {
safe_log(sprintf("Filtered to %d files within harvest season date range", length(all_daily_files)))
}
# Set up parallel future plan (Windows PSOCK multisession; Mac/Linux can use forking)
# Automatically detect available cores and limit to reasonable number
n_cores <- min(parallel::detectCores() - 1, 8) # Use max 8 cores (diminishing returns after)
future::plan(strategy = future::multisession, workers = n_cores)
safe_log(sprintf("Using %d parallel workers for file I/O", n_cores))
# Adaptive core count: scale with file count to avoid parallel overhead on small projects
n_files <- length(all_daily_files)
n_cores_io <- if (n_files < 200) {
1
} else if (n_files < 600) {
2
} else if (n_files < 1500) {
min(parallel::detectCores() - 1, 4)
} else {
min(parallel::detectCores() - 1, 8)
}
safe_log(sprintf("Using %d parallel workers for file I/O (%d files)", n_cores_io, n_files))
# Parallel file reading: future_map_dfr processes each file in parallel
# Returns combined dataframe directly (no need to rbind)
combined_long <- furrr::future_map_dfr(
all_daily_files,
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE),
function(file) {
# Extract date from filename: {YYYY-MM-DD}.rds
filename <- basename(file)
date_str <- tools::file_path_sans_ext(filename)
read_one_file <- function(file) {
filename <- basename(file)
date_str <- tools::file_path_sans_ext(filename)
if (nchar(date_str) != 10 || !grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) return(data.frame())
parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
if (is.na(parsed_date)) return(data.frame())
tryCatch({
rds_data <- readRDS(file)
if (is.null(rds_data) || nrow(rds_data) == 0) return(data.frame())
rds_data %>% dplyr::mutate(Date = parsed_date)
}, error = function(e) data.frame())
}
# Parse date
if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) {
parsed_date <- as.Date(date_str, format = "%Y-%m-%d")
} else {
return(data.frame()) # Return empty dataframe if parse fails
}
if (is.na(parsed_date)) {
return(data.frame())
}
# Read RDS file
tryCatch({
rds_data <- readRDS(file)
if (is.null(rds_data) || nrow(rds_data) == 0) {
return(data.frame())
}
# Add date column to the data
rds_data %>%
dplyr::mutate(Date = parsed_date)
}, error = function(e) {
return(data.frame()) # Return empty dataframe on error
})
}
)
# Return to sequential processing to avoid nested parallelism
future::plan(future::sequential)
if (n_cores_io > 1) {
future::plan(strategy = future::multisession, workers = n_cores_io)
combined_long <- furrr::future_map_dfr(
all_daily_files, read_one_file,
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE)
)
future::plan(future::sequential)
} else {
combined_long <- purrr::map_dfr(all_daily_files, read_one_file)
}
if (nrow(combined_long) == 0) {
safe_log("Warning: No valid CI data loaded from daily files", "WARNING")
@ -245,6 +234,33 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
total_fields <- 0
successful_fields <- 0
# Pre-compute total valid fields across all years to decide core count once
total_valid_fields <- sum(sapply(years, function(yr) {
sfs <- harvesting_data %>%
dplyr::filter(year == yr, !is.na(season_start)) %>%
dplyr::pull(sub_field)
sum(sfs %in% unique(ci_data$sub_field))
}))
# Adaptive core count: scale with field count, avoid parallel overhead for small projects
n_cores_interp <- if (total_valid_fields <= 1) {
1
} else if (total_valid_fields <= 10) {
2
} else if (total_valid_fields <= 50) {
min(parallel::detectCores() - 1, 4)
} else {
min(parallel::detectCores() - 1, 8)
}
safe_log(sprintf("Interpolating %d fields across %d year(s) using %d worker(s)",
total_valid_fields, length(years), n_cores_interp))
# Set up parallel plan once before the year loop (avoid per-year startup cost)
if (n_cores_interp > 1) {
future::plan(strategy = future::multisession, workers = n_cores_interp)
}
# Process each year
result <- purrr::map_df(years, function(yr) {
# Get the fields harvested in this year with valid season start dates
@ -252,43 +268,40 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
dplyr::filter(year == yr, !is.na(season_start)) %>%
dplyr::pull(sub_field)
if (length(sub_fields) == 0) {
return(data.frame())
}
if (length(sub_fields) == 0) return(data.frame())
# Filter sub_fields to only include those with value data in ci_data
valid_sub_fields <- sub_fields %>%
purrr::keep(~ any(ci_data$sub_field == .x))
if (length(valid_sub_fields) == 0) {
return(data.frame())
}
if (length(valid_sub_fields) == 0) return(data.frame())
total_fields <<- total_fields + length(valid_sub_fields)
safe_log(sprintf("Year %d: Processing %d fields in parallel", yr, length(valid_sub_fields)))
safe_log(sprintf("Year %d: Processing %d fields", yr, length(valid_sub_fields)))
# Set up parallel future plan for field interpolation
# Allocate 1 core per ~100 fields (with minimum 2 cores)
n_cores <- max(2, min(parallel::detectCores() - 1, ceiling(length(valid_sub_fields) / 100)))
future::plan(strategy = future::multisession, workers = n_cores)
# PARALLELIZE: Process all fields in parallel (each extracts & interpolates independently)
result_list <- furrr::future_map(
valid_sub_fields,
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE),
function(field) {
# Call with verbose=FALSE to suppress warnings during parallel iteration
# Process fields — parallel if workers > 1, otherwise plain map (no overhead)
if (n_cores_interp > 1) {
result_list <- furrr::future_map(
valid_sub_fields,
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE),
function(field) {
extract_CI_data(field,
harvesting_data = harvesting_data,
field_CI_data = ci_data,
season = yr,
verbose = FALSE)
}
)
} else {
result_list <- purrr::map(valid_sub_fields, function(field) {
extract_CI_data(field,
harvesting_data = harvesting_data,
field_CI_data = ci_data,
season = yr,
verbose = FALSE)
}
)
# Return to sequential processing
future::plan(future::sequential)
verbose = TRUE)
})
}
# Process results and tracking
for (i in seq_along(result_list)) {
@ -307,14 +320,15 @@ generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
}
# Combine all results for this year
result_list <- result_list[sapply(result_list, nrow) > 0] # Keep only non-empty
if (length(result_list) > 0) {
purrr::list_rbind(result_list)
} else {
data.frame()
}
result_list <- result_list[sapply(result_list, nrow) > 0]
if (length(result_list) > 0) purrr::list_rbind(result_list) else data.frame()
})
# Tear down parallel plan once after all years are processed
if (n_cores_interp > 1) {
future::plan(future::sequential)
}
# Print summary
safe_log(sprintf("\n=== Interpolation Summary ==="))
safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields))

View file

@ -498,59 +498,7 @@ tryCatch({
localisation <<- NULL
})
# tr_key() is defined in 90_report_utils.R (sourced above)
# ============================================================================
# SHARED TREND MAPPING HELPER
# ============================================================================
# Canonical function for converting trend text to arrows/formatted text
# Normalizes all legacy and current trend category names to standardized output
# Used by: combined_kpi_table, field_details_table, and compact_field_display chunks
map_trend_to_arrow <- function(text_vec, include_text = FALSE) {
# Normalize: convert to character and lowercase
text_lower <- tolower(as.character(text_vec))
# Apply mapping to each element
sapply(text_lower, function(text) {
# Handle NA and empty values
if (is.na(text) || text == "" || nchar(trimws(text)) == 0) {
return(NA_character_)
}
# Determine category and build output with translated labels
# Using word-boundary anchored patterns (perl=TRUE) to avoid substring mis-matches
if (grepl("\\bstrong growth\\b", text, perl = TRUE)) {
arrow <- "↑↑"
trans_key <- "Strong growth"
} else if (grepl("\\b(?:slight|weak) growth\\b|(?<!no\\s)\\bgrowth\\b|\\bincreasing\\b", text, perl = TRUE)) {
arrow <- "↑"
trans_key <- "Slight growth"
} else if (grepl("\\bstable\\b|\\bno growth\\b", text, perl = TRUE)) {
arrow <- "→"
trans_key <- "Stable"
} else if (grepl("\\b(?:weak|slight|moderate) decline\\b", text, perl = TRUE)) {
arrow <- "↓"
trans_key <- "Slight decline"
} else if (grepl("\\bstrong decline\\b|\\bsevere\\b", text, perl = TRUE)) {
arrow <- "↓↓"
trans_key <- "Strong decline"
} else {
# Fallback: return "—" (em-dash) for arrow-only mode, raw text for text mode
# This signals an unmatched trend value that should be logged
return(if (include_text) as.character(text) else "—")
}
# Get translated label using tr_key()
label <- tr_key(trans_key)
# Return formatted output based on include_text flag
if (include_text) {
paste0(label, " (", arrow, ")")
} else {
arrow
}
}, USE.NAMES = FALSE)
}
# tr_key() and map_trend_to_arrow() are defined in 90_report_utils.R (sourced above)
```
<!-- Dynamic cover page -->
@ -768,144 +716,12 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
`r tr_key("field_alerts")`
```{r field_alerts_table, echo=FALSE, results='asis'}
# Generate alerts for all fields
generate_field_alerts <- function(field_details_table) {
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
return(NULL) # Return NULL to signal no data
}
# Check for required columns
required_cols <- c("Field", "Growth Uniformity", "Yield Forecast (t/ha)",
"Gap Score", "Decline Risk", "Patchiness Risk", "Mean CI", "CV Value", "Moran's I")
missing_cols <- setdiff(required_cols, colnames(field_details_table))
if (length(missing_cols) > 0) {
message("Field details missing required columns: ", paste(missing_cols, collapse = ", "))
return(NULL) # Return NULL if required columns are missing
}
alerts_list <- list()
# Get unique fields
unique_fields <- unique(field_details_table$Field)
for (field_name in unique_fields) {
field_data <- field_details_table %>% filter(Field == field_name)
# Aggregate data for the field
field_summary <- field_data %>%
summarise(
uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
avg_yield_forecast = mean(`Yield Forecast (t/ha)`, na.rm = TRUE),
max_gap_score = max(`Gap Score`, na.rm = TRUE),
highest_decline_risk = case_when(
any(`Decline Risk` == "Very-high") ~ "Very-high",
any(`Decline Risk` == "High") ~ "High",
any(`Decline Risk` == "Moderate") ~ "Moderate",
any(`Decline Risk` == "Low") ~ "Low",
TRUE ~ "Unknown"
),
highest_patchiness_risk = case_when(
any(`Patchiness Risk` == "High") ~ "High",
any(`Patchiness Risk` == "Medium") ~ "Medium",
any(`Patchiness Risk` == "Low") ~ "Low",
any(`Patchiness Risk` == "Minimal") ~ "Minimal",
TRUE ~ "Unknown"
),
avg_mean_ci = mean(`Mean CI`, na.rm = TRUE),
avg_cv = mean(`CV Value`, na.rm = TRUE),
.groups = 'drop'
)
# Generate alerts for this field based on simplified CV-Moran's I priority system (3 levels)
field_alerts <- c()
# Get CV and Moran's I values
avg_cv <- field_summary$avg_cv
morans_i <- mean(field_data[["Moran's I"]], na.rm = TRUE)
# Determine priority level (1=Urgent, 2=Monitor, 3=No stress)
priority_level <- get_field_priority_level(avg_cv, morans_i)
# Generate alerts based on priority level
if (priority_level == 1) {
field_alerts <- c(field_alerts, tr_key("priority"))
} else if (priority_level == 2) {
field_alerts <- c(field_alerts, tr_key("monitor"))
}
# Priority 3: No alert (no stress)
# Keep other alerts for decline risk, patchiness risk, gap score
if (field_summary$highest_decline_risk %in% c("High", "Very-high")) {
field_alerts <- c(field_alerts, tr_key("growth_decline"))
}
if (field_summary$highest_patchiness_risk == "High") {
field_alerts <- c(field_alerts, tr_key("high_patchiness"))
}
if (field_summary$max_gap_score > 20) {
field_alerts <- c(field_alerts, tr_key("gaps_present"))
}
# Only add alerts if there are any (skip fields with no alerts)
if (length(field_alerts) > 0) {
# Add to alerts list
for (alert in field_alerts) {
alerts_list[[length(alerts_list) + 1]] <- data.frame(
Field = field_name,
Alert = alert
)
}
}
}
# Combine all alerts
if (length(alerts_list) > 0) {
alerts_df <- do.call(rbind, alerts_list)
return(alerts_df)
} else {
return(data.frame(Field = character(), Alert = character()))
}
}
# generate_field_alerts() is defined in 90_report_utils.R (sourced above).
# field_details_table has already been normalised by normalize_field_details_columns().
# Generate and display alerts table
if (exists("field_details_table") && !is.null(field_details_table) && nrow(field_details_table) > 0) {
# Adapter: Map normalized column names back to legacy names for generate_field_alerts()
# (generates from the normalized schema created by normalize_field_details_columns + column_mappings)
field_details_for_alerts <- field_details_table
# Rename normalized columns back to legacy names (only if they exist)
if ("Field_id" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(Field = Field_id)
}
if ("Mean_CI" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Mean CI` = Mean_CI)
}
if ("CV" %in% names(field_details_for_alerts) && !("CV Value" %in% names(field_details_for_alerts))) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`CV Value` = CV)
}
if ("TCH_Forecasted" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Yield Forecast (t/ha)` = TCH_Forecasted)
}
if ("Gap_Score" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Gap Score` = Gap_Score)
}
if ("Uniformity_Category" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Growth Uniformity` = Uniformity_Category)
}
if ("Decline_Risk" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Decline Risk` = Decline_Risk)
}
if ("Decline_Severity" %in% names(field_details_for_alerts) && !("Decline Risk" %in% names(field_details_for_alerts))) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Decline Risk` = Decline_Severity)
}
if ("Patchiness_Risk" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Patchiness Risk` = Patchiness_Risk)
}
if ("Morans_I" %in% names(field_details_for_alerts)) {
field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Moran's I` = Morans_I)
}
alerts_data <- generate_field_alerts(field_details_for_alerts)
alerts_data <- generate_field_alerts(field_details_table)
if (!is.null(alerts_data) && nrow(alerts_data) > 0) {
ft <- flextable(alerts_data) %>%
# set_caption("Field Alerts Summary") %>%
@ -1014,36 +830,23 @@ if (!exists("field_details_table") || is.null(field_details_table)) {
tryCatch({
safe_log("Starting farm-level raster aggregation for overview maps")
# Helper function to safely aggregate mosaics for a specific week
aggregate_mosaics_safe <- function(week_num, year_num, label) {
tryCatch({
safe_log(paste("Aggregating mosaics for", label, "(week", week_num, ",", year_num, ")"))
# Call the utility function from 90_report_utils.R
# This function reads all per-field mosaics and merges them into a single raster
farm_mosaic <- aggregate_per_field_mosaics_to_farm_level(
weekly_mosaic_dir = weekly_CI_mosaic,
target_week = week_num,
target_year = year_num
)
if (!is.null(farm_mosaic)) {
safe_log(paste("✓ Successfully aggregated farm mosaic for", label, ""))
return(farm_mosaic)
} else {
safe_log(paste("Warning: Farm mosaic is NULL for", label), "WARNING")
return(NULL)
}
}, error = function(e) {
safe_log(paste("Error aggregating mosaics for", label, ":", e$message), "WARNING")
return(NULL)
})
}
# Aggregate mosaics for three weeks: current, week-1, week-3
farm_mosaic_current <- aggregate_mosaics_safe(current_week, current_iso_year, "current week")
farm_mosaic_minus_1 <- aggregate_mosaics_safe(as.numeric(week_minus_1), week_minus_1_year, "week-1")
farm_mosaic_minus_3 <- aggregate_mosaics_safe(as.numeric(week_minus_3), week_minus_3_year, "week-3")
# Aggregate per-field mosaics into farm-level rasters for current, week-1, week-3
# aggregate_per_field_mosaics_to_farm_level() is defined in 90_report_utils.R (sourced above)
farm_mosaic_current <- aggregate_per_field_mosaics_to_farm_level(
weekly_mosaic_dir = weekly_CI_mosaic,
target_week = current_week,
target_year = current_iso_year
)
farm_mosaic_minus_1 <- aggregate_per_field_mosaics_to_farm_level(
weekly_mosaic_dir = weekly_CI_mosaic,
target_week = as.numeric(week_minus_1),
target_year = week_minus_1_year
)
farm_mosaic_minus_3 <- aggregate_per_field_mosaics_to_farm_level(
weekly_mosaic_dir = weekly_CI_mosaic,
target_week = as.numeric(week_minus_3),
target_year = week_minus_3_year
)
# Extract CI band (5th band, or named "CI") from each aggregated mosaic
farm_ci_current <- NULL
@ -1098,18 +901,7 @@ tryCatch({
AllPivots0_ll <- AllPivots0
target_crs <- "EPSG:4326"
downsample_raster <- function(r, max_cells = 2000000) {
if (is.null(r)) {
return(NULL)
}
n_cells <- terra::ncell(r)
if (!is.na(n_cells) && n_cells > max_cells) {
fact <- ceiling(sqrt(n_cells / max_cells))
safe_log(paste("Downsampling raster by factor", fact), "INFO")
return(terra::aggregate(r, fact = fact, fun = mean, na.rm = TRUE))
}
r
}
# downsample_raster() is defined in 90_report_utils.R (sourced above)
if (!is.null(farm_ci_current) && !terra::is.lonlat(farm_ci_current)) {
farm_ci_current_ll <- terra::project(farm_ci_current, target_crs, method = "bilinear")
@ -1383,13 +1175,7 @@ tryCatch({
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
# Helper to get week/year from a date
get_week_year <- function(date) {
list(
week = as.numeric(format(date, "%V")),
year = as.numeric(format(date, "%G"))
)
}
# get_week_year() is defined in 90_report_utils.R (sourced above)
# Calculate week/year for current and historical weeks
current_ww <- get_week_year(as.Date(today))
@ -1400,25 +1186,7 @@ tryCatch({
message(paste("Processing", nrow(AllPivots_merged), "fields for weeks:",
current_ww$week, minus_1_ww$week, minus_2_ww$week, minus_3_ww$week))
# Helper function to safely load per-field mosaic if it exists
load_per_field_mosaic <- function(base_dir, field_name, week, year) {
path <- file.path(base_dir, field_name, paste0("week_", sprintf("%02d", week), "_", year, ".tif"))
if (file.exists(path)) {
tryCatch({
rast_obj <- terra::rast(path)
# Extract CI band if present, otherwise first band
if ("CI" %in% names(rast_obj)) {
return(rast_obj[["CI"]])
} else if (nlyr(rast_obj) > 0) {
return(rast_obj[[1]])
}
}, error = function(e) {
message(paste("Warning: Could not load", path, ":", e$message))
return(NULL)
})
}
return(NULL)
}
# load_per_field_mosaic() is defined in 90_report_utils.R (sourced above)
# Iterate through fields using purrr::walk
purrr::walk(AllPivots_merged$field, function(field_name) {
@ -1558,38 +1326,7 @@ tryCatch({
})
```
```{r generate_subarea_visualizations, eval=FALSE, echo=FALSE, fig.height=3.8, fig.width=6.5, message=FALSE, warning=FALSE, dpi=150, results='asis'}
# Alternative visualization grouped by sub-area (disabled by default)
tryCatch({
# Group pivots by sub-area
pivots_grouped <- AllPivots0
# Iterate over each subgroup
for (subgroup in unique(pivots_grouped$sub_area)) {
# Add subgroup heading
cat("\n")
cat("## Subgroup: ", subgroup, "\n")
# Filter data for current subgroup
subset_data <- dplyr::filter(pivots_grouped, sub_area == subgroup)
# Generate visualizations for each field in the subgroup
purrr::walk(subset_data$field, function(field_name) {
cat("\n")
ci_plot(field_name)
cat("\n")
cum_ci_plot(field_name)
cat("\n")
})
# Add page break after each subgroup
cat("\\newpage\n")
}
}, error = function(e) {
safe_log(paste("Error in subarea visualization section:", e$message), "ERROR")
cat("Error generating subarea plots. See log for details.\n")
})
```
cat("\\newpage\n\n")
`r tr_key("detailed_field")`

View file

@ -1214,26 +1214,53 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
#' Normalize field_details_table column structure
#'
#' Standardizes column names and ensures all expected KPI columns exist.
#' Handles Field → Field_id rename and injects missing columns as NA.
#' Standardizes column names from various legacy and pipeline-generated schemas
#' into a single canonical set, then ensures all expected KPI columns exist
#' (adding \code{NA} columns for any that are absent).
#'
#' @param field_details_table data.frame to normalize
#' @return data.frame with standardized column structure
#' Rename rules applied in order:
#' \itemize{
#' \item \code{Field} → \code{Field_id}
#' \item \code{Mean CI} → \code{Mean_CI}
#' \item \code{CV Value} → \code{CV}
#' \item \code{TCH_Forecasted} / \code{Yield Forecast (t/ha)} → \code{TCH_Forecasted}
#' \item \code{Gap Score} → \code{Gap_Score}
#' \item \code{Growth Uniformity} / \code{Uniformity_Category} → \code{Uniformity_Interpretation}
#' \item \code{Decline_Risk} → \code{Decline_Severity}
#' \item \code{Moran's I} / \code{Morans_I} → \code{Morans_I}
#' }
#'
#' @param field_details_table A data.frame to normalize.
#' @return A data.frame with standardized column names and all expected KPI
#' columns present (missing ones filled with \code{NA}).
normalize_field_details_columns <- function(field_details_table) {
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
return(field_details_table)
}
# Rename Field → Field_id if needed
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
rename_if_missing <- function(df, from, to) {
if (from %in% names(df) && !to %in% names(df))
df <- dplyr::rename(df, !!to := !!rlang::sym(from))
df
}
field_details_table <- field_details_table %>%
rename_if_missing("Field", "Field_id") %>%
rename_if_missing("Mean CI", "Mean_CI") %>%
rename_if_missing("CV Value", "CV") %>%
rename_if_missing("Yield Forecast (t/ha)", "TCH_Forecasted") %>%
rename_if_missing("Gap Score", "Gap_Score") %>%
rename_if_missing("Growth Uniformity", "Uniformity_Interpretation") %>%
rename_if_missing("Uniformity_Category", "Uniformity_Interpretation") %>%
rename_if_missing("Decline_Risk", "Decline_Severity") %>%
rename_if_missing("Moran's I", "Morans_I")
# Ensure all expected KPI columns exist; add as NA if missing
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Patchiness_Risk")
expected_cols <- c(
"Field_id", "Mean_CI", "CV", "Morans_I", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Patchiness_Risk"
)
for (col in expected_cols) {
if (!col %in% names(field_details_table)) {
field_details_table[[col]] <- NA
@ -1242,3 +1269,240 @@ normalize_field_details_columns <- function(field_details_table) {
return(field_details_table)
}
# ==============================================================================
# TREND / ARROW HELPERS
# ==============================================================================
#' Map trend text to arrow symbols or formatted labels
#'
#' Converts trend category strings (e.g. \code{"strong growth"},
#' \code{"slight decline"}) to Unicode arrow symbols, optionally combined with
#' translated text labels. Normalises legacy and current trend category names
#' to a canonical output. Vectorised over \code{text_vec}.
#'
#' @param text_vec Character vector of trend category strings.
#' @param include_text Logical. If \code{TRUE}, returns
#' \code{"Label (arrow)"}; if \code{FALSE} (default), returns the arrow
#' symbol only.
#' @return Character vector the same length as \code{text_vec}. \code{NA} is
#' returned for missing / empty inputs; an em-dash (\code{"—"}) is returned
#' for unrecognised values when \code{include_text = FALSE}.
#' @seealso \code{\link{tr_key}}
#'
map_trend_to_arrow <- function(text_vec, include_text = FALSE) {
text_lower <- tolower(as.character(text_vec))
sapply(text_lower, function(text) {
if (is.na(text) || nchar(trimws(text)) == 0) return(NA_character_)
if (grepl("\\bstrong growth\\b", text, perl = TRUE)) {
arrow <- "↑↑"; trans_key <- "Strong growth"
} else if (grepl("\\b(?:slight|weak) growth\\b|(?<!no\\s)\\bgrowth\\b|\\bincreasing\\b", text, perl = TRUE)) {
arrow <- "↑"; trans_key <- "Slight growth"
} else if (grepl("\\bstable\\b|\\bno growth\\b", text, perl = TRUE)) {
arrow <- "→"; trans_key <- "Stable"
} else if (grepl("\\b(?:weak|slight|moderate) decline\\b", text, perl = TRUE)) {
arrow <- "↓"; trans_key <- "Slight decline"
} else if (grepl("\\bstrong decline\\b|\\bsevere\\b", text, perl = TRUE)) {
arrow <- "↓↓"; trans_key <- "Strong decline"
} else {
return(if (include_text) as.character(text) else "—")
}
label <- tr_key(trans_key)
if (include_text) paste0(label, " (", arrow, ")") else arrow
}, USE.NAMES = FALSE)
}
# ==============================================================================
# DATE / WEEK HELPERS
# ==============================================================================
#' Extract ISO week and year from a date
#'
#' Returns the ISO 8601 week number and the corresponding ISO year for a given
#' date. Note that the ISO year may differ from the calendar year near
#' year-end boundaries (e.g. 2024-12-30 is ISO week 1 of 2025).
#'
#' @param date A \code{Date} object or a string coercible to \code{Date}.
#' @return A named list with elements:
#' \describe{
#' \item{\code{week}}{Integer ISO week number (153).}
#' \item{\code{year}}{Integer ISO year.}
#' }
#'
get_week_year <- function(date) {
date <- as.Date(date)
list(
week = as.integer(format(date, "%V")),
year = as.integer(format(date, "%G"))
)
}
# ==============================================================================
# RASTER HELPERS
# ==============================================================================
#' Downsample a SpatRaster to a maximum cell count
#'
#' Reduces the resolution of a raster by integer aggregation when the number
#' of cells exceeds \code{max_cells}. The aggregation factor is the smallest
#' integer that brings the cell count at or below the limit.
#'
#' @param r A \code{SpatRaster} object, or \code{NULL}.
#' @param max_cells Maximum number of cells to retain (default 2,000,000).
#' @param fun Aggregation function passed to \code{terra::aggregate()}
#' (default \code{"mean"}).
#' @return The (possibly downsampled) \code{SpatRaster}, or \code{NULL} if
#' \code{r} is \code{NULL}.
#'
downsample_raster <- function(r, max_cells = 2000000, fun = "mean") {
if (is.null(r)) return(NULL)
n_cells <- terra::ncell(r)
if (!is.na(n_cells) && n_cells > max_cells) {
fact <- ceiling(sqrt(n_cells / max_cells))
safe_log(paste("Downsampling raster by factor", fact), "INFO")
return(terra::aggregate(r, fact = fact, fun = fun, na.rm = TRUE))
}
r
}
#' Load the CI band from a per-field weekly mosaic
#'
#' Locates the weekly mosaic TIF for the given field and week via
#' \code{\link{get_per_field_mosaic_path}}, loads it with
#' \code{terra::rast()}, and returns the CI band (the layer named \code{"CI"},
#' or the first layer as a fallback).
#'
#' @param base_dir Path to the \code{weekly_mosaic} directory.
#' @param field_name Name of the field sub-directory.
#' @param week ISO week number.
#' @param year ISO year.
#' @return A single-layer \code{SpatRaster} (CI band), or \code{NULL} if the
#' file does not exist or cannot be loaded.
#' @seealso \code{\link{get_per_field_mosaic_path}}
#'
load_per_field_mosaic <- function(base_dir, field_name, week, year) {
path <- get_per_field_mosaic_path(base_dir, field_name, week, year)
if (is.null(path)) return(NULL)
tryCatch({
rast_obj <- terra::rast(path)
if ("CI" %in% names(rast_obj)) {
return(rast_obj[["CI"]])
} else if (terra::nlyr(rast_obj) > 0) {
return(rast_obj[[1]])
}
NULL
}, error = function(e) {
safe_log(paste("Could not load mosaic:", path, "-", e$message), "WARNING")
NULL
})
}
# ==============================================================================
# FIELD ALERT GENERATION
# ==============================================================================
#' Generate field-level alert flags from normalised KPI data
#'
#' Evaluates each field's CV, Moran's I, decline severity, patchiness risk,
#' and gap score against threshold rules, returning a tidy data frame of
#' translated alert messages. Only fields that trigger at least one alert are
#' included in the output.
#'
#' Expects a table that has been passed through
#' \code{\link{normalize_field_details_columns}}, which guarantees the columns
#' \code{Field_id}, \code{CV}, \code{Morans_I}, \code{Decline_Severity},
#' \code{Patchiness_Risk}, and \code{Gap_Score} are present.
#'
#' Alert rules:
#' \itemize{
#' \item Priority 1 (Urgent) or 2 (Monitor) from
#' \code{\link{get_field_priority_level}} based on CV / Moran's I.
#' \item Decline risk High or Very-high.
#' \item Patchiness risk High.
#' \item Gap score \eqn{> 20}.
#' }
#'
#' @param field_details_table A data frame normalised by
#' \code{\link{normalize_field_details_columns}}.
#' @return A data frame with columns \code{Field} and \code{Alert}, one row
#' per alert per field. Returns an empty 0-row data frame when no alerts
#' are triggered, or \code{NULL} if the input is empty / missing required
#' columns.
#' @seealso \code{\link{get_field_priority_level}}, \code{\link{normalize_field_details_columns}}
#'
generate_field_alerts <- function(field_details_table) {
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
return(NULL)
}
required_cols <- c("Field_id", "CV", "Morans_I", "Decline_Severity",
"Patchiness_Risk", "Gap_Score")
missing_cols <- setdiff(required_cols, names(field_details_table))
if (length(missing_cols) > 0) {
safe_log(paste("generate_field_alerts: missing columns:",
paste(missing_cols, collapse = ", ")), "WARNING")
return(NULL)
}
summaries <- field_details_table %>%
dplyr::group_by(Field_id) %>%
dplyr::summarise(
avg_cv = mean(CV, na.rm = TRUE),
avg_morans_i = mean(Morans_I, na.rm = TRUE),
max_gap = suppressWarnings(max(Gap_Score, na.rm = TRUE)),
highest_decline = dplyr::case_when(
any(Decline_Severity == "Very-high", na.rm = TRUE) ~ "Very-high",
any(Decline_Severity == "High", na.rm = TRUE) ~ "High",
any(Decline_Severity == "Moderate", na.rm = TRUE) ~ "Moderate",
any(Decline_Severity == "Low", na.rm = TRUE) ~ "Low",
TRUE ~ "Unknown"
),
highest_patchiness = dplyr::case_when(
any(Patchiness_Risk == "High", na.rm = TRUE) ~ "High",
any(Patchiness_Risk == "Medium", na.rm = TRUE) ~ "Medium",
any(Patchiness_Risk == "Low", na.rm = TRUE) ~ "Low",
any(Patchiness_Risk == "Minimal", na.rm = TRUE) ~ "Minimal",
TRUE ~ "Unknown"
),
.groups = "drop"
) %>%
dplyr::mutate(
priority = purrr::map2_int(avg_cv, avg_morans_i, get_field_priority_level),
max_gap = dplyr::if_else(is.infinite(max_gap), NA_real_, max_gap)
)
alerts <- summaries %>%
dplyr::mutate(
a_priority = dplyr::case_when(
priority == 1 ~ tr_key("priority"),
priority == 2 ~ tr_key("monitor"),
TRUE ~ NA_character_
),
a_decline = dplyr::if_else(
highest_decline %in% c("High", "Very-high"), tr_key("growth_decline"), NA_character_
),
a_patch = dplyr::if_else(
highest_patchiness == "High", tr_key("high_patchiness"), NA_character_
),
a_gap = dplyr::if_else(
!is.na(max_gap) & max_gap > 20, tr_key("gaps_present"), NA_character_
)
) %>%
tidyr::pivot_longer(
cols = c(a_priority, a_decline, a_patch, a_gap),
names_to = NULL,
values_to = "Alert"
) %>%
dplyr::filter(!is.na(Alert)) %>%
dplyr::select(Field = Field_id, Alert)
if (nrow(alerts) == 0) {
return(data.frame(Field = character(), Alert = character()))
}
alerts
}

Binary file not shown.