Update .gitignore to include cache files, enhance R Markdown report paths, and improve KPI calculation script for clarity and error handling

This commit is contained in:
Timon 2026-02-10 17:50:12 +01:00
parent f2da320fb6
commit 3d3df151d3
6 changed files with 164 additions and 192 deletions

4
.gitignore vendored
View file

@ -45,6 +45,10 @@ output/
reports/
*.docx
# Cache Files
rosm.cache/
*.cache
# Logs
*.log
package_manager.log

View file

@ -369,7 +369,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
for (i in seq_len(nrow(field_boundaries))) {
field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_ field_vect <- field_boundaries_vect[i]
sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
field_vect <- field_boundaries_vect[i]
# Extract CI values using helper function
ci_values <- extract_ci_values(ci_raster, field_vect)

View file

@ -12,7 +12,7 @@ params:
x_axis_unit: "days" # x-axis unit for trend plots: "days" or "weeks"
output:
word_document:
reference_docx: !expr file.path("word-styles-reference-var1.docx")
reference_docx: !expr file.path(here::here("r_app/word-styles-reference-var1.docx"))
toc: no
editor_options:
chunk_output_type: console
@ -53,6 +53,8 @@ suppressPackageStartupMessages({
# Visualization
library(tmap) # For interactive maps (field boundary visualization)
library(ggspatial) # For basemap tiles (annotation_map_tile for hexbin overlay)
library(hexbin) # For hexbin plots in ggplot2
# Reporting
library(knitr) # For R Markdown document generation (code execution and output)
@ -95,6 +97,20 @@ tryCatch({
# Load centralized paths
paths <- setup_project_directories(project_dir)
# Make paths globally available for use in other chunks (especially hexbin)
assign("paths", paths, envir = globalenv())
# Load field boundaries for hexbin map and spatial operations
tryCatch({
boundaries_list <- load_field_boundaries(paths$data_dir)
field_boundaries_sf <- boundaries_list$field_boundaries_sf
assign("field_boundaries_sf", field_boundaries_sf, envir = globalenv())
safe_log("✓ Field boundaries loaded successfully")
}, error = function(e) {
safe_log(paste("⚠ Could not load field boundaries:", e$message), "WARNING")
safe_log("Hexbin map will not be available", "WARNING")
})
# Log initial configuration
safe_log("Starting the R Markdown script with KPIs")
safe_log(paste("mail_day params:", params$mail_day))
@ -102,7 +118,7 @@ safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
```
```{r load_kpi_data, message=FALSE, warning=FALSE}
```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
## SIMPLE KPI LOADING - robust lookup with fallbacks
# First, show working directory for debugging
@ -194,6 +210,9 @@ if (!is.null(summary_file) && file.exists(summary_file)) {
cat(paste(" File size:", file.size(summary_file), "bytes\n"))
summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL })
# Explicitly assign to global environment for downstream chunks
assign("summary_data", summary_data, envir = globalenv())
if (!is.null(summary_data)) {
cat(paste(" ✓ Loaded successfully. Class:", class(summary_data), "\n"))
if (is.list(summary_data)) {
@ -243,7 +262,9 @@ if (!is.null(field_details_file) && file.exists(field_details_file)) {
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
field_details_table <- summary_data$field_analysis %>%
rename(`Mean CI` = Mean_CI, `CV Value` = CV, Field = Field_id)
safe_log("Extracted field details from field_analysis data")}
safe_log("Extracted field details from field_analysis data")
}
}
if (kpi_files_exist) {
safe_log("✓ KPI summary tables loaded successfully")
@ -446,39 +467,37 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
**Section 1:** Cane supply zone analyses, summaries and Key Performance Indicators (KPIs)
**Section 2:** Explanation of the report, definitions, methodology, and CSV export structure
\newpage
# Section 1: Farm-wide Analyses and KPIs
## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready
```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE}
```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
# Create a hexbin overview map with ggplot
tryCatch({
# Try to load in the field analysis from this week
tryCatch({
# Ensure week is zero-padded (e.g., 04) to match filenames like *_week04_2026.xlsx
week_padded <- sprintf("%02d", as.numeric(current_week))
analysis_path <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_analysis", paste0(params$data_dir, "_field_analysis_week", week_padded, "_", year, ".xlsx"))
analysis_data <- read_excel(analysis_path)
}, error = function(e) {
warning("Error loading field analysis data:", e$message)
})
# Fallback: if analysis_data failed to load, create an empty tibble with required columns
if (!exists("analysis_data") || is.null(analysis_data)) {
# Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
analysis_data <- summary_data$field_analysis %>%
select(Field_id, Status_Alert) %>%
rename(Status_trigger = Status_Alert) # Rename to Status_trigger for compatibility with hexbin logic
} else {
analysis_data <- tibble(Field_id = character(), Status_trigger = character())
}
# Load field boundaries if not already available (should be from initialize_project_config)
if (!exists("field_boundaries_sf")) {
if (exists("paths") && !is.null(paths$data_dir)) {
boundaries_list <- load_field_boundaries(paths$data_dir)
field_boundaries_sf <- boundaries_list$field_boundaries_sf
} else {
stop("Cannot locate field boundaries: paths$data_dir not available")
}
}
# Define constants
ACRE_CONV <- 4046.856
TARGET_CRS <- 32736
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
# Process polygons into points
points_processed <- field_boundaries_sf %>%
@ -500,22 +519,31 @@ tryCatch({
st_centroid() %>%
bind_cols(st_coordinates(.))
# Subsetting
# Validate coordinates - check for NaN, Inf, or missing values
if (any(!is.finite(points_processed$X)) || any(!is.finite(points_processed$Y))) {
points_processed <- points_processed %>%
filter(is.finite(X) & is.finite(Y))
}
# Final validation: ensure we have valid coordinates before proceeding
valid_x <- is.finite(points_processed$X) & is.finite(points_processed$Y)
if (!any(valid_x)) {
stop("No valid coordinates found in field boundaries. Cannot create hexbin map.")
}
# Subsetting by harvest status
points_ready <- points_processed %>%
filter(Status_trigger == "harvest_ready")
points_not_ready <- points_processed %>%
filter(Status_trigger != "harvest_ready" | is.na(Status_trigger))
# Generate breaks for the plotting
# Generate breaks for color gradients
breaks_vec <- c(0, 5, 10, 15, 20, 30, 35)
labels_vec <- as.character(breaks_vec)
labels_vec[length(labels_vec)] <- ">30"
labels_vec[1] <- "0.1"
# Set CRS
map_crs <- st_crs(points_processed)
# Create dummy point to anchor hexbin grids
# Create dummy point to anchor hexbin grids for consistency
dummy_point <- data.frame(
field = NA,
sub_field = NA,
@ -526,92 +554,96 @@ tryCatch({
geometry = NA
)
# Add dummy point
# Convert dummy point to sf and add xy coordinates
dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
# Add dummy point to ensure consistent hexbin grid anchoring
points_ready <- rbind(points_ready, dummy_point)
points_not_ready <- rbind(points_not_ready, dummy_point)
# Create the plot
hexbin <- ggplot() +
# Add OSM basemap
annotation_map_tile(type = "osm", zoom = 11, progress = "none", alpha = 0.5) +
# Add the hexbins for not ready points
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
x_limits <- c(
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
)
y_limits <- c(
floor(min(points_processed$Y, na.rm = TRUE) * 20) / 20,
ceiling(max(points_processed$Y, na.rm = TRUE) * 20) / 20
)
# Create hexbin map with enhanced aesthetics, basemap, and proper legend
ggplot() +
# OpenStreetMap basemap (zoom=11 appropriate for agricultural fields)
ggspatial::annotation_map_tile(type = "osm", zoom = 11, progress = "none", alpha = 0.5) +
# Hexbin for NOT ready fields (light background)
geom_hex(
data = points_not_ready,
aes(x = X, y = Y, weight = area_ac, alpha = "Not harvest ready within 1 month."),
binwidth = c(1500, 1500),
fill = "#ffffff",
colour = "#0000009a",
linewidth = 0.1
data = points_not_ready,
aes(x = X, y = Y, weight = area_ac, alpha = "Not harvest ready"),
binwidth = c(0.012, 0.012),
fill = "#ffffff",
colour = "#0000009a",
linewidth = 0.1
) +
# Add the hexbins for ready points
# Hexbin for READY fields (colored gradient)
geom_hex(
data = points_ready,
aes(x = X, y = Y, weight = area_ac),
binwidth = c(1500, 1500),
alpha = 0.9,
colour = "#0000009a",
linewidth = 0.1
data = points_ready,
aes(x = X, y = Y, weight = area_ac),
binwidth = c(0.012, 0.012),
alpha = 0.9,
colour = "#0000009a",
linewidth = 0.1
) +
# Create colour bins
# Color gradient scale for acreage
scale_fill_viridis_b(
option = "viridis",
direction = -1,
breaks = breaks_vec, # Use our 0-50 sequence
labels = labels_vec, # Use our custom ">50" labels
limits = c(0, 35), # Limit the scale
oob = scales::squish, # Squish higher values into the top bin
name = "Total Acres"
option = "viridis",
direction = -1,
breaks = breaks_vec,
labels = labels_vec,
limits = c(0, 35),
oob = scales::squish,
name = "Total Acres"
) +
# Titles
labs(
subtitle = "Acres of fields 'harvest ready within a month'"
) +
# Set the CRS
coord_sf(crs = map_crs) +
theme_minimal() +
# Legend trick to add the gray colours
# Alpha scale for "not ready" status indication
scale_alpha_manual(
name = NULL, # No title needed for this specific legend item
values = 0.8 # This sets the actual transparency for the map
name = NULL,
values = 0.8
) +
# Legend customisation
# Titles and subtitle
labs(subtitle = "Acres of fields 'harvest ready within a month'") +
# Spatial coordinate system with explicit bounds (prevents basemap calc_limits conflicts)
coord_sf(crs = 4326, xlim = x_limits, ylim = y_limits, expand = FALSE) +
# Theme customization
theme_minimal() +
theme(
legend.position = "right",
legend.box = "vertical",
legend.title.align = 0.5, # Center the legend title
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 11)
legend.position = "right",
legend.box = "vertical",
legend.title.align = 0.5,
plot.subtitle = element_text(size = 11, face = "bold")
) +
# Customise the look of the horizontal bar
# Custom legend guides with enhanced styling
guides(
# The colour bar
fill = guide_coloursteps(
barwidth = 1,
barheight = 20,
title.position = "top",
order = 1
fill = guide_coloursteps(
barwidth = 1,
barheight = 20,
title.position = "top",
order = 1
),
alpha = guide_legend(
override.aes = list(
fill = "#ffffff",
colour = "#0000009a",
shape = 22
),
# The not ready box
alpha = guide_legend(
override.aes = list(
fill = "#ffffff",
colour = "#0000009a",
shape = 22
),
order = 2
)
order = 2
)
)
hexbin
}, error = function(e) {
warning("Error creating hexbin map:", e$message)
})
```
\newpage
## 1.2 Key Performance Indicators
@ -780,80 +812,35 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
## Cloud Coverage Summary
```{r cloud_coverage_summary, echo=FALSE}
# Display per-field cloud coverage summary
if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) {
# Prepare cloud coverage table for display
# Handle both old and new column naming conventions
cloud_display <- per_field_cloud_coverage %>%
mutate(
Field = if_else("field" %in% names(per_field_cloud_coverage), field_id,
if_else("Field" %in% names(per_field_cloud_coverage), Field, field_id)),
Clear_Percent = pct_clear,
Cloud_Acreage = if_else("Cloud_Acreage" %in% names(per_field_cloud_coverage), Cloud_Acreage,
as.numeric(NA)),
Total_Acreage = if_else("Total_Acreage" %in% names(per_field_cloud_coverage), Total_Acreage,
as.numeric(NA))
) %>%
select(Field, Cloud_category, Clear_Percent, missing_pixels, clear_pixels, total_pixels) %>%
rename(
"Field" = Field,
"Cloud Status" = Cloud_category,
"Clear %" = Clear_Percent,
"Cloud Pixels" = missing_pixels,
"Clear Pixels" = clear_pixels,
"Total Pixels" = total_pixels
) %>%
arrange(Field)
# Create flextable
ft <- flextable(cloud_display) %>%
autofit()
ft
} else if (exists("cloud_coverage_available") && !cloud_coverage_available) {
cat("Cloud coverage data not available for this week.\n")
} else {
cat("Cloud coverage data not loaded.\n")
}
```
## Field Alerts
```{r field_alerts_table, echo=FALSE}
# Generate alerts table from field analysis status triggers
# Display cloud coverage summary aggregated by category
# Cloud coverage data is included in the field_analysis RDS from Script 80
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
field_analysis_table <- summary_data$field_analysis
field_analysis_df <- summary_data$field_analysis
# Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger)
alerts_data <- field_analysis_table %>%
filter(!is.na(Status_Alert), Status_Alert != "") %>%
select(Field_id, Status_Alert) %>%
rename(Field = Field_id, Alert = Status_Alert)
# Aggregate cloud coverage by category
cloud_summary <- field_analysis_df %>%
filter(!is.na(Cloud_category)) %>%
group_by(Cloud_category) %>%
summarise(
"Number of Fields" = n(),
"Total Acreage" = round(sum(Acreage, na.rm = TRUE), 1),
.groups = "drop"
) %>%
rename("Cloud Category" = Cloud_category) %>%
arrange("Cloud Category")
if (nrow(alerts_data) > 0) {
# Format alert messages for display
alerts_data <- alerts_data %>%
mutate(
Alert = case_when(
Alert == "germination_started" ~ "Germination started - crop emerging",
Alert == "germination_complete" ~ "Germination complete - established",
Alert == "stress_detected_whole_field" ~ "Stress detected - check irrigation/disease",
Alert == "strong_recovery" ~ "Strong recovery - growth accelerating",
Alert == "growth_on_track" ~ "Growth on track - normal progression",
Alert == "maturation_progressing" ~ "Maturation progressing - ripening phase",
Alert == "harvest_ready" ~ "Harvest ready - 45+ weeks old",
TRUE ~ Alert
)
)
if (nrow(cloud_summary) > 0) {
# Create flextable
ft <- flextable(cloud_summary) %>%
autofit() %>%
theme_vanilla()
ft <- flextable(alerts_data) %>%
autofit()
ft
} else {
cat("No active status triggers this week.\n")
cat("Cloud coverage data not available for summary.\n")
}
} else {
cat("Field analysis data not available for alerts.\n")
cat("Field analysis data not available for cloud coverage summary.\n")
}
```

View file

@ -1,20 +0,0 @@
# Fix indentation for lines 408-1022 in 80_calculate_kpis.R
# These lines should be inside the else-if block at the CANE_SUPPLY_WORKFLOW level
file_path <- "r_app/80_calculate_kpis.R"
lines <- readLines(file_path)
# Lines 408-1021 (0-indexed: 407-1020) need 2 more spaces of indentation
for (i in 408:1021) {
if (i <= length(lines)) {
line <- lines[i]
# Skip empty or whitespace-only lines
if (nchar(trimws(line)) > 0) {
# Add 2 spaces
lines[i] <- paste0(" ", line)
}
}
}
writeLines(lines, file_path)
cat("Fixed indentation for lines 408-1022\n")

View file

@ -436,21 +436,23 @@
# From R console or R script:
#
# rmarkdown::render(
# "r_app/90_CI_report_with_kpis_simple.Rmd",
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
# output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
# output_dir = "laravel_app/storage/app/angata/reports"
# )
rmarkdown::render(
"r_app/90_CI_report_with_kpis_cane_supply.Rmd",
params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
output_file = "SmartCane_Report_cane_supply_angata_2026-02-10_FIXED.docx",
output_dir = "laravel_app/storage/app/angata/reports"
)
#
# COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
# From R console or R script:
#
# rmarkdown::render(
# "r_app/91_CI_report_with_kpis_Angata.Rmd",
# params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
# output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
# output_dir = "laravel_app/storage/app/angata/reports"
# )
rmarkdown::render(
"r_app/91_CI_report_with_kpis_cane_supply.Rmd",
params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
output_file = "SmartCane_Report_basemap_test.docx",
output_dir = "laravel_app/storage/app/angata/reports"
)
#
# EXPECTED OUTPUT:
# File: SmartCane_Report_*_{PROJECT}_{DATE}.docx

View file

@ -190,7 +190,6 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
# TIER 8: CONFIG & METADATA PATHS
field_boundaries_path <- here(data_dir, "pivot.geojson")
tiling_config_path <- here(laravel_storage_dir, "tiling_config.json")
# Return comprehensive list
return(list(
@ -231,8 +230,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
log_dir = log_dir,
# TIER 8: Metadata
field_boundaries_path = field_boundaries_path,
tiling_config_path = tiling_config_path
field_boundaries_path = field_boundaries_path
))
}