SmartCane/r_app/report_utils.R
Timon 458b8247be Cleanup: Fix CI formula, reorganize shell scripts and test files
- Fixed CI calculation: changed from NDVI (NIR-Red)/(NIR+Red) to correct NIR/Green-1 formula in:
  * process_single_tile() function
  * create_ci_band() utility function
  * Updated create_mask_and_crop() documentation

- Renamed numbered shell scripts for clarity (matching R script numbering):
  * 01_run_planet_download -> 10_planet_download.sh
  * 02_run_ci_extraction -> 20_ci_extraction.sh
  * 03_run_growth_model -> 30_growth_model.sh
  * 04_run_mosaic_creation -> 40_mosaic_creation.sh
  * 09_run_calculate_kpis -> 80_calculate_kpis.sh
  * 10_run_kpi_report -> 90_kpi_report.sh

- Archived obsolete shell scripts to old_sh/:
  * build_mosaic.sh, build_report.sh, interpolate_growth_model.sh
  * 05_run_dashboard_report.sh, 06_run_crop_messaging.sh
  * 11_run_yield_prediction.sh/ps1
  * runcane.sh, runpython.sh, smartcane.sh, update_RDS.sh

- Deleted test/debug files and temporary outputs:
  * analyze_*.R, benchmark_gpu_vs_cpu.py, convert_angata_harvest.py
  * debug_mosaic.R, examine_kpi_results.R, generate_sar_report.R
  * inspect_8band_structure.R, inspect_tif_bands.R
  * old_working_utils.R, predict_harvest_operational.R
  * run_kpi_calculation.R, run_report.R, simple_sar_test.R
  * data_validation_tool/, harvest_ci_pattern_analysis.png, kpi_debug.out

- Enhanced harvest prediction: Added threshold tuning (0.40-0.45) and field type handling

- Enhanced mosaic creation: Improved tile detection and routing logic
2026-01-14 16:58:51 +01:00

825 lines
36 KiB
R

# REPORT_UTILS.R
# =============
# Utility functions for generating SmartCane reports with visualizations.
# These functions support the creation of maps, charts and report elements
# for the CI_report_dashboard_planet.Rmd document.
#' Safe logging function that works whether log_message exists or not
#'
#' @param message The message to log
#' @param level The log level (default: "INFO")
#' @return NULL (used for side effects)
#'
safe_log <- function(message, level = "INFO") {
if (exists("log_message")) {
log_message(message, level)
} else {
if (level %in% c("ERROR", "WARNING")) {
warning(message)
} else {
message(message)
}
}
}
#' Creates a sub-chunk for use within RMarkdown documents
#'
#' @param g A ggplot object to render in the sub-chunk
#' @param fig_height Height of the figure in inches
#' @param fig_width Width of the figure in inches
#' @return NULL (writes chunk directly to output)
#'
subchunkify <- function(g, fig_height=7, fig_width=5) {
g_deparsed <- paste0(deparse(
function() {g}
), collapse = '')
sub_chunk <- paste0("
`","``{r sub_chunk_", floor(runif(1) * 10000), ", fig.height=", fig_height, ", fig.width=", fig_width, ", dpi=300, dev='png', out.width='100%', echo=FALSE}",
"\n(",
g_deparsed
, ")()",
"\n`","``
")
cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE))
}
#' Creates a Chlorophyll Index map for a pivot
#'
#' @param pivot_raster The raster data containing CI values
#' @param pivot_shape The shape of the pivot field
#' @param pivot_spans Additional boundary data for the field
#' @param show_legend Whether to show the legend (default: FALSE)
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
#' @param week Week number to display in the title
#' @param age Age of the crop in weeks
#' @param borders Whether to display field borders (default: FALSE)
#' @return A tmap object with the CI map
#'
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, colorblind = FALSE){
# Input validation
if (missing(pivot_raster) || is.null(pivot_raster)) {
stop("pivot_raster is required")
}
if (missing(pivot_shape) || is.null(pivot_shape)) {
stop("pivot_shape is required")
}
if (missing(pivot_spans) || is.null(pivot_spans)) {
stop("pivot_spans is required")
}
if (missing(week) || is.null(week)) {
stop("week parameter is required")
}
if (missing(age) || is.null(age)) {
stop("age parameter is required")
}
# Choose palette based on colorblind flag
palette <- if (colorblind) "viridis" else "brewer.rd_yl_gn"
# Create the base map
map <- tm_shape(pivot_raster, unit = "m")
# Add raster with continuous spectrum (fixed scale 8-1 for consistent comparison, reversed)
map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
limits = c(1,8)),
col.legend = tm_legend(title = "CI",
orientation = if(legend_is_portrait) "portrait" else "landscape",
show = show_legend,
position = if(show_legend) tm_pos_out("left", "center") else c("left", "bottom"),
reverse = TRUE
))
# Add layout elements
map <- map + tm_layout(main.title = paste0("Max CI week ", week,"\n", age, " weeks (", age * 7, " days) old"),
main.title.size = 0.7)
# Add borders if requested
if (borders) {
map <- map +
tm_shape(pivot_shape) +
tm_borders(lwd = 3) +
tm_text("sub_field", size = 1/2) +
tm_shape(pivot_spans) +
tm_borders(lwd = 0.5, alpha = 0.5)
}
return(map)
}
#' Creates a Chlorophyll Index difference map between two weeks
#'
#' @param pivot_raster The raster data containing CI difference values
#' @param pivot_shape The shape of the pivot field
#' @param pivot_spans Additional boundary data for the field
#' @param show_legend Whether to show the legend (default: FALSE)
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
#' @param week_1 First week number for comparison
#' @param week_2 Second week number for comparison
#' @param age Age of the crop in weeks
#' @param borders Whether to display field borders (default: TRUE)
#' @return A tmap object with the CI difference map
#'
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, colorblind = FALSE){
# Input validation
if (missing(pivot_raster) || is.null(pivot_raster)) {
stop("pivot_raster is required")
}
if (missing(pivot_shape) || is.null(pivot_shape)) {
stop("pivot_shape is required")
}
if (missing(pivot_spans) || is.null(pivot_spans)) {
stop("pivot_spans is required")
}
if (missing(week_1) || is.null(week_1) || missing(week_2) || is.null(week_2)) {
stop("week_1 and week_2 parameters are required")
}
if (missing(age) || is.null(age)) {
stop("age parameter is required")
}
# Choose palette based on colorblind flag
palette <- if (colorblind) "plasma" else "brewer.rd_yl_gn"
# Create the base map
map <- tm_shape(pivot_raster, unit = "m")
# Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale, reversed)
map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
midpoint = 0,
limits = c(-3, 3)),
col.legend = tm_legend(title = "CI diff.",
orientation = if(legend_is_portrait) "portrait" else "landscape",
show = show_legend,
position = if(show_legend) tm_pos_out("right", "center") else c("left", "bottom"),
reverse = TRUE
))
# Add layout elements
map <- map + tm_layout(main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"),
main.title.size = 0.7)
# Add borders if requested
if (borders) {
map <- map +
tm_shape(pivot_shape) +
tm_borders(lwd = 3) +
tm_text("sub_field", size = 1/2) +
tm_shape(pivot_spans) +
tm_borders(lwd = 0.5, alpha = 0.5)
}
return(map)
}
#' Creates a visualization of CI data for a specific pivot field
#'
#' @param pivotName The name or ID of the pivot field to visualize
#' @param field_boundaries Field boundaries spatial data (sf object)
#' @param current_ci Current week's Chlorophyll Index raster
#' @param ci_minus_1 Previous week's Chlorophyll Index raster
#' @param ci_minus_2 Two weeks ago Chlorophyll Index raster
#' @param last_week_diff Difference raster between current and last week
#' @param three_week_diff Difference raster between current and three weeks ago
#' @param harvesting_data Data frame containing field harvesting/planting information
#' @param week Current week number
#' @param week_minus_1 Previous week number
#' @param week_minus_2 Two weeks ago week number
#' @param week_minus_3 Three weeks ago week number
#' @param borders Whether to display field borders (default: TRUE)
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
#' @return NULL (adds output directly to R Markdown document)
#'
ci_plot <- function(pivotName,
field_boundaries = AllPivots0,
current_ci = CI,
ci_minus_1 = CI_m1,
ci_minus_2 = CI_m2,
last_week_diff = last_week_dif_raster_abs,
three_week_diff = three_week_dif_raster_abs,
harvesting_data = harvesting_data,
week = week,
week_minus_1 = week_minus_1,
week_minus_2 = week_minus_2,
week_minus_3 = week_minus_3,
borders = TRUE,
colorblind_friendly = FALSE){
# Input validation
if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
stop("pivotName is required")
}
if (missing(field_boundaries) || is.null(field_boundaries)) {
stop("field_boundaries is required")
}
if (missing(current_ci) || is.null(current_ci)) {
stop("current_ci is required")
}
if (missing(ci_minus_1) || is.null(ci_minus_1)) {
stop("ci_minus_1 is required")
}
if (missing(ci_minus_2) || is.null(ci_minus_2)) {
stop("ci_minus_2 is required")
}
if (missing(last_week_diff) || is.null(last_week_diff)) {
stop("last_week_diff is required")
}
if (missing(three_week_diff) || is.null(three_week_diff)) {
stop("three_week_diff is required")
}
if (missing(harvesting_data) || is.null(harvesting_data)) {
stop("harvesting_data is required")
}
# Extract pivot shape and age data
tryCatch({
pivotShape <- field_boundaries %>% terra::subset(field %in% pivotName) %>% sf::st_transform(terra::crs(current_ci))
age <- harvesting_data %>%
dplyr::filter(field %in% pivotName) %>%
sort("year") %>%
tail(., 1) %>%
dplyr::select(age) %>%
unique() %>%
pull() %>%
round()
# Filter for the specific pivot
AllPivots2 <- field_boundaries %>% dplyr::filter(field %in% pivotName)
# Create crop masks for different timepoints using terra functions
singlePivot <- terra::crop(current_ci, pivotShape) %>% terra::mask(., pivotShape)
singlePivot_m1 <- terra::crop(ci_minus_1, pivotShape) %>% terra::mask(., pivotShape)
singlePivot_m2 <- terra::crop(ci_minus_2, pivotShape) %>% terra::mask(., pivotShape)
# Create difference maps
abs_CI_last_week <- terra::crop(last_week_diff, pivotShape) %>% terra::mask(., pivotShape)
abs_CI_three_week <- terra::crop(three_week_diff, pivotShape) %>% terra::mask(., pivotShape)
# Get planting date
planting_date <- harvesting_data %>%
dplyr::filter(field %in% pivotName) %>%
ungroup() %>%
dplyr::select(season_start) %>%
unique()
# Create spans for borders
joined_spans2 <- field_boundaries %>%
sf::st_transform(sf::st_crs(pivotShape)) %>% dplyr::filter(field %in% pivotName)
# Create the maps for different timepoints
CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2,
show_legend = TRUE, legend_is_portrait = TRUE,
week = week_minus_2, age = age - 2, borders = borders, colorblind = colorblind_friendly)
CImap_m1 <- create_CI_map(singlePivot_m1, AllPivots2, joined_spans2,
show_legend = FALSE, legend_is_portrait = FALSE,
week = week_minus_1, age = age - 1, borders = borders, colorblind = colorblind_friendly)
CImap <- create_CI_map(singlePivot, AllPivots2, joined_spans2,
show_legend = FALSE, legend_is_portrait = FALSE,
week = week, age = age, borders = borders, colorblind = colorblind_friendly)
# Create difference maps - only show legend on the second one to avoid redundancy
CI_max_abs_last_week <- create_CI_diff_map(abs_CI_last_week, AllPivots2, joined_spans2,
show_legend = FALSE, legend_is_portrait = FALSE,
week_1 = week, week_2 = week_minus_1, age = age, borders = borders, colorblind = colorblind_friendly)
CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2,
show_legend = TRUE, legend_is_portrait = TRUE,
week_1 = week, week_2 = week_minus_3, age = age, borders = borders, colorblind = colorblind_friendly)
# Arrange the maps with equal widths
tst <- tmap_arrange(CImap_m2, CImap_m1, CImap, CI_max_abs_last_week, CI_max_abs_three_week,
nrow = 1, widths = c(0.23, 0.18, 0.18, 0.18, 0.23))
# Output heading and map to R Markdown
age_months <- round(age / 4.348, 1)
cat(paste("## Field", pivotName, "-", age, "weeks/", age_months, "months after planting/harvest", "\n\n"))
print(tst)
}, error = function(e) {
safe_log(paste("Error creating CI plot for pivot", pivotName, ":", e$message), "ERROR")
cat(paste("# Field", pivotName, "- Error creating visualization", "\n\n"))
cat(paste("Error:", e$message, "\n\n"))
})
}
#' Creates a plot showing Chlorophyll Index data over time for a pivot field
#'
#' @param pivotName The name or ID of the pivot field to visualize
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days")
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
#' @param benchmark_percentiles Vector of percentiles for benchmarks (default: c(10, 50, 90))
#' @return NULL (adds output directly to R Markdown document)
#'
cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "absolute", facet_on = FALSE, x_unit = "days", colorblind_friendly = FALSE, show_benchmarks = FALSE, estate_name = NULL, benchmark_percentiles = c(10, 50, 90), benchmark_data = NULL) {
# Input validation
if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
stop("pivotName is required")
}
if (missing(ci_quadrant_data) || is.null(ci_quadrant_data)) {
stop("ci_quadrant_data is required")
}
if (!plot_type %in% c("absolute", "cumulative", "both")) {
stop("plot_type must be one of: 'absolute', 'cumulative', or 'both'")
}
if (!x_unit %in% c("days", "weeks")) {
stop("x_unit must be either 'days' or 'weeks'")
}
# Filter data for the specified pivot
tryCatch({
data_ci <- ci_quadrant_data %>% dplyr::filter(field == pivotName)
if (nrow(data_ci) == 0) {
safe_log(paste("No CI data found for field", pivotName), "WARNING")
return(cum_ci_plot2(pivotName)) # Use fallback function when no data is available
}
# Process data
data_ci2 <- data_ci %>%
dplyr::mutate(CI_rate = cumulative_CI / DOY,
week = lubridate::week(Date)) %>%
dplyr::group_by(field) %>%
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
mean_rolling_10_days = zoo::rollapplyr(value, width = 10, FUN = mean, partial = TRUE))
data_ci2 <- data_ci2 %>% dplyr::mutate(season = as.factor(season))
# Compute benchmarks if requested and not provided
if (show_benchmarks && is.null(benchmark_data)) {
benchmark_data <- compute_ci_benchmarks(ci_quadrant_data, estate_name, benchmark_percentiles)
}
# Prepare benchmark data for plotting if available
if (!is.null(benchmark_data)) {
benchmark_data <- benchmark_data %>%
dplyr::mutate(
ci_type_label = case_when(
ci_type == "value" ~ "10-Day Rolling Mean CI",
ci_type == "cumulative_CI" ~ "Cumulative CI",
TRUE ~ ci_type
),
benchmark_label = paste0(percentile, "th Percentile")
)
safe_log("Benchmark data prepared for plotting", "INFO")
} else if (show_benchmarks) {
safe_log("No benchmark data available", "WARNING")
}
data_ci3 <- tidyr::pivot_longer(
data_ci2,
cols = c("mean_rolling_10_days", "cumulative_CI"),
names_to = "ci_type", # This column will say "mean_rolling_10_days" or "cumulative_CI"
values_to = "ci_value" # This column will have the numeric values
)
# Prepare date information by season
date_preparation_perfect_pivot <- data_ci2 %>%
dplyr::group_by(season) %>%
dplyr::summarise(min_date = min(Date),
max_date = max(Date),
days = max_date - min_date)
# Get the 3 most recent seasons
unique_seasons <- sort(unique(date_preparation_perfect_pivot$season), decreasing = TRUE)[1:3]
latest_season <- unique_seasons[1] # Identify the latest season
# Create plotting function that uses data_ci3 and filters by ci_type
create_plot <- function(ci_type_filter, y_label, title_suffix) {
# Filter data based on ci_type
plot_data <- data_ci3 %>%
dplyr::filter(season %in% unique_seasons, ci_type == ci_type_filter) %>%
dplyr::mutate(is_latest = season == latest_season) # Flag for latest season
# Determine x-axis variable based on x_unit parameter
x_var <- if (x_unit == "days") {
if (facet_on) "Date" else "DOY"
} else {
"week"
}
x_label <- switch(x_unit,
"days" = if (facet_on) "Date" else "Age of Crop (Days)",
"weeks" = "Week Number")
# Create plot with either facets by season or overlay by DOY/week
if (facet_on) {
g <- ggplot2::ggplot(data = plot_data) +
ggplot2::facet_wrap(~season, scales = "free_x") +
ggplot2::geom_line(ggplot2::aes_string(x = x_var, y = "ci_value", col = "sub_field", group = "sub_field")) +
ggplot2::labs(title = paste("Plot of", y_label),
color = "Field Name",
y = y_label,
x = x_label) +
ggplot2::scale_x_date(date_breaks = "1 month", date_labels = "%m-%Y",
sec.axis = ggplot2::sec_axis(~ ., name = "Age in Months",
breaks = scales::breaks_pretty(),
labels = function(x) round(as.numeric(x - min(x)) / 30.44, 1))) +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
axis.title.x.top = ggplot2::element_text(size = 8),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.title = ggplot2::element_text(size = 8),
legend.text = ggplot2::element_text(size = 8)) +
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
} else {
# Choose color palette based on colorblind_friendly flag
color_scale <- if (colorblind_friendly) {
ggplot2::scale_color_brewer(type = "qual", palette = "Set2")
} else {
ggplot2::scale_color_discrete()
}
g <- ggplot2::ggplot(data = plot_data) +
# Add benchmark lines first (behind season lines)
{
if (!is.null(benchmark_data) && ci_type_filter %in% benchmark_data$ci_type) {
benchmark_subset <- benchmark_data %>%
dplyr::filter(ci_type == ci_type_filter) %>%
dplyr::mutate(
benchmark_x = if (x_var == "DOY") {
DOY
} else if (x_var == "week") {
DOY / 7 # Approximate conversion
} else {
DOY # For Date, use DOY as is (may not align perfectly)
}
)
ggplot2::geom_smooth(
data = benchmark_subset,
ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
)
}
} +
# Plot older seasons with lighter lines
ggplot2::geom_line(
data = plot_data %>% dplyr::filter(!is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 0.7, alpha = 0.4
) +
# Plot latest season with thicker, more prominent line
ggplot2::geom_line(
data = plot_data %>% dplyr::filter(is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 1.5, alpha = 1
) +
ggplot2::labs(title = paste("Plot of", y_label, "for Field", pivotName, title_suffix),
color = "Season",
y = y_label,
x = x_label) +
color_scale +
{
if (x_var == "DOY") {
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "week") {
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
}
} +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
axis.title.x.top = ggplot2::element_text(size = 8),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.title = ggplot2::element_text(size = 8),
legend.text = ggplot2::element_text(size = 8)) +
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
}
# Add y-axis limits for absolute CI (10-day rolling mean) to fix scale at 0-7
if (ci_type_filter == "mean_rolling_10_days") {
g <- g + ggplot2::ylim(0, 7)
}
return(g)
}
# Generate plots based on plot_type
if (plot_type == "absolute") {
g <- create_plot("mean_rolling_10_days", "10-Day Rolling Mean CI", "")
subchunkify(g, 2.8, 10)
} else if (plot_type == "cumulative") {
g <- create_plot("cumulative_CI", "Cumulative CI", "")
subchunkify(g, 2.8, 10)
} else if (plot_type == "both") {
# Create faceted plot with both CI types using pivot_longer approach
plot_data_both <- data_ci3 %>%
dplyr::filter(season %in% unique_seasons) %>%
dplyr::mutate(
ci_type_label = case_when(
ci_type == "mean_rolling_10_days" ~ "10-Day Rolling Mean CI",
ci_type == "cumulative_CI" ~ "Cumulative CI",
TRUE ~ ci_type
),
is_latest = season == latest_season # Flag for latest season
)
# Determine x-axis variable based on x_unit parameter
x_var <- if (x_unit == "days") {
if (facet_on) "Date" else "DOY"
} else {
"week"
}
x_label <- switch(x_unit,
"days" = if (facet_on) "Date" else "Age of Crop (Days)",
"weeks" = "Week Number")
# Choose color palette based on colorblind_friendly flag
color_scale <- if (colorblind_friendly) {
ggplot2::scale_color_brewer(type = "qual", palette = "Set2")
} else {
ggplot2::scale_color_discrete()
}
# Create the faceted plot
g_both <- ggplot2::ggplot(data = plot_data_both) +
# Add benchmark lines first (behind season lines)
{
if (!is.null(benchmark_data)) {
benchmark_subset <- benchmark_data %>%
dplyr::mutate(
benchmark_x = if (x_var == "DOY") {
DOY
} else if (x_var == "week") {
DOY / 7
} else {
DOY
},
ci_type_label = case_when(
ci_type == "value" ~ "10-Day Rolling Mean CI",
ci_type == "cumulative_CI" ~ "Cumulative CI",
TRUE ~ ci_type
)
)
ggplot2::geom_smooth(
data = benchmark_subset,
ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
)
}
} +
ggplot2::facet_wrap(~ci_type_label, scales = "free_y") +
# Plot older seasons with lighter lines
ggplot2::geom_line(
data = plot_data_both %>% dplyr::filter(!is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 0.7, alpha = 0.4
) +
# Plot latest season with thicker, more prominent line
ggplot2::geom_line(
data = plot_data_both %>% dplyr::filter(is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 1.5, alpha = 1
) +
ggplot2::labs(title = paste("CI Analysis for Field", pivotName),
color = "Season",
y = "CI Value",
x = x_label) +
color_scale +
{
if (x_var == "DOY") {
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "week") {
ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "Date") {
ggplot2::scale_x_date(breaks = "1 month", date_labels = "%b-%Y", sec.axis = ggplot2::sec_axis(~ ., name = "Age in Months", breaks = scales::breaks_pretty()))
}
} +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
axis.title.x.top = ggplot2::element_text(size = 8),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.title = ggplot2::element_text(size = 8),
legend.text = ggplot2::element_text(size = 8)) +
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
# For the rolling mean data, we want to set reasonable y-axis limits
# Since we're using free_y scales, each facet will have its own y-axis
# The rolling mean will automatically scale to its data range,
# but we can ensure it shows the 0-7 context by adding invisible points
# Add invisible points to set the y-axis range for rolling mean facet
dummy_data <- data.frame(
ci_type_label = "10-Day Rolling Mean CI",
ci_value = c(0, 7),
stringsAsFactors = FALSE
)
dummy_data[[x_var]] <- range(plot_data_both[[x_var]], na.rm = TRUE)
dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]]))
g_both <- g_both +
ggplot2::geom_point(data = dummy_data,
ggplot2::aes_string(x = x_var, y = "ci_value"),
alpha = 0, size = 0) # Invisible points to set scale
# Display the combined faceted plot
subchunkify(g_both, 2.8, 10)
}
}, error = function(e) {
safe_log(paste("Error creating CI trend plot for pivot", pivotName, ":", e$message), "ERROR")
cum_ci_plot2(pivotName) # Use fallback function in case of error
})
}
#' Fallback function for creating CI visualization when data is missing
#'
#' @param pivotName The name or ID of the pivot field to visualize
#' @return NULL (adds output directly to R Markdown document)
#'
cum_ci_plot2 <- function(pivotName){
# Input validation
if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
stop("pivotName is required")
}
# Create a simple plot showing "No data available"
tryCatch({
end_date <- Sys.Date()
start_date <- end_date %m-% months(11) # 11 months ago from end_date
date_seq <- seq.Date(from = start_date, to = end_date, by = "month")
midpoint_date <- start_date + (end_date - start_date) / 2
g <- ggplot() +
scale_x_date(limits = c(start_date, end_date), date_breaks = "1 month", date_labels = "%m-%Y") +
scale_y_continuous(limits = c(0, 4)) +
labs(title = paste("14 day rolling MEAN CI rate - Field ", pivotName),
x = "Date", y = "CI Rate") +
theme_minimal() +
theme(axis.text.x = element_text(hjust = 0.5),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
annotate("text", x = midpoint_date, y = 2, label = "No data available", size = 6, hjust = 0.5)
subchunkify(g, 3.2, 10)
}, error = function(e) {
safe_log(paste("Error creating fallback CI plot for pivot", pivotName, ":", e$message), "ERROR")
cat(paste("No data available for field", pivotName, "\n"))
})
}
#' Gets the file path for a specific week's mosaic
#'
#' @param mosaic_path Base directory containing mosaic files
#' @param input_date Reference date to calculate from
#' @param week_offset Number of weeks to offset from input date (positive or negative)
#' @return File path to the requested week's mosaic TIF file
#'
get_week_path <- function(mosaic_path, input_date, week_offset) {
# Input validation
if (missing(mosaic_path) || is.null(mosaic_path) || mosaic_path == "") {
stop("mosaic_path is required")
}
if (missing(input_date)) {
stop("input_date is required")
}
tryCatch({
# Convert input_date to Date object (in case it's a string)
input_date <- as.Date(input_date)
if (is.na(input_date)) {
stop("Invalid input_date. Expected a Date object or a string convertible to Date.")
}
# Validate week_offset
week_offset <- as.integer(week_offset)
if (is.na(week_offset)) {
stop("Invalid week_offset. Expected an integer value.")
}
# Get the start of the week for the input date (adjust to Monday as the start of the week)
start_of_week <- lubridate::floor_date(input_date, unit = "week", week_start = 1)
# Calculate the new date after applying the week offset
target_date <- start_of_week + lubridate::weeks(week_offset)
# Get the week number and year of the target date
target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed
target_year <- lubridate::isoyear(target_date)
# Primary approach: Try single-file mosaic path first
path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
# Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles
if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) {
# Try to locate tile-based mosaics in weekly_tile_max instead
tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path)
# Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.)
if (dir.exists(tile_mosaic_path)) {
tile_files <- list.files(tile_mosaic_path,
pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"),
full.names = TRUE)
if (length(tile_files) > 0) {
# Found tiles - return the first tile as primary, note that multiple tiles exist
safe_log(paste("Single-file mosaic not found for week", target_week, target_year,
"but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO")
# Return first tile - caller should aggregate if needed
path_to_week <- tile_files[1] # Return first tile; downstream can handle multiple tiles
}
}
}
# Log the path calculation
safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")
# Return the path
return(path_to_week)
}, error = function(e) {
safe_log(paste("Error calculating week path:", e$message), "ERROR")
stop(e$message)
})
}
#' Computes historical percentile benchmarks for CI data per estate
#'
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns
#' @param estate_name Name of the estate/client to filter data for
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data
#'
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
# Input validation
if (missing(ci_quadrant_data) || is.null(ci_quadrant_data)) {
stop("ci_quadrant_data is required")
}
if (missing(estate_name) || is.null(estate_name) || estate_name == "") {
stop("estate_name is required")
}
if (!all(percentiles >= 0 & percentiles <= 100)) {
stop("percentiles must be between 0 and 100")
}
tryCatch({
# Filter data for the specified estate (assuming estate is not directly in data, but we can infer from context)
# Since the data is per field, and fields are unique to estates, we'll use all data but could add estate filtering if available
data_filtered <- ci_quadrant_data
# Check if we have enough seasons
unique_seasons <- unique(data_filtered$season)
if (length(unique_seasons) < min_seasons) {
safe_log(paste("Insufficient historical seasons for estate", estate_name, ":", length(unique_seasons), "seasons found, need at least", min_seasons), "WARNING")
return(NULL)
}
# Prepare data for both CI types
data_prepared <- data_filtered %>%
dplyr::ungroup() %>% # Ensure no existing groupings
dplyr::select(DOY, value, cumulative_CI, season) %>%
tidyr::pivot_longer(
cols = c("value", "cumulative_CI"),
names_to = "ci_type",
values_to = "ci_value"
) %>%
dplyr::filter(!is.na(ci_value)) # Remove NA values
# Compute percentiles for each DOY and ci_type
benchmarks <- data_prepared %>%
dplyr::group_by(DOY, ci_type) %>%
dplyr::summarise(
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
p90 = tryCatch(quantile(ci_value, 0.9, na.rm = TRUE), error = function(e) NA_real_),
n_observations = n(),
.groups = 'drop'
) %>%
dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data
tidyr::pivot_longer(
cols = c(p10, p50, p90),
names_to = "percentile",
values_to = "benchmark_value"
) %>%
dplyr::mutate(
percentile = case_when(
percentile == "p10" ~ 10,
percentile == "p50" ~ 50,
percentile == "p90" ~ 90
)
) %>%
dplyr::filter(!is.na(benchmark_value)) # Remove any NA benchmarks
# Rename columns for clarity
benchmarks <- benchmarks %>%
dplyr::select(DOY, ci_type, percentile, benchmark_value)
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
return(benchmarks)
}, error = function(e) {
safe_log(paste("Error computing CI benchmarks for estate", estate_name, ":", e$message), "ERROR")
print(paste("DEBUG: Error details:", e$message, "Call:", deparse(e$call)))
return(NULL)
})
}