Refactor KPI report generation to improve data handling and summary table creation; update field details mapping and enhance logging for better traceability.

This commit is contained in:
DimitraVeropoulou 2026-02-16 17:25:03 +01:00
parent e966d778f4
commit 2e683d0c6d
3 changed files with 176 additions and 216 deletions

1
r_app/.gitignore vendored
View file

@ -8,6 +8,7 @@ renv
*.tmp
*.swp
*.save
*.png
# Ignore files related to Rproj
.Rproj.user/

View file

@ -166,153 +166,6 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me
NA_character_
}
# #' Calculate Gap Filling Score KPI (2σ method)
# #' @param ci_raster Current week CI raster
# #' @param field_boundaries Field boundaries
# #' @return Data frame with field-level gap filling scores
# calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
# safe_log("Calculating Gap Filling Score KPI (placeholder)")
# # Handle both sf and SpatVector inputs
# if (!inherits(field_boundaries, "SpatVector")) {
# field_boundaries_vect <- terra::vect(field_boundaries)
# } else {
# field_boundaries_vect <- field_boundaries
# }
# # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions
# n_fields_vect <- length(field_boundaries_vect)
# n_fields_sf <- nrow(field_boundaries)
# if (n_fields_sf != n_fields_vect) {
# warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length."))
# }
# field_results <- data.frame()
# for (i in seq_len(nrow(field_boundaries))) {
# field_name <- field_boundaries$field[i]
# sub_field_name <- field_boundaries$sub_field[i]
# field_vect <- field_boundaries_vect[i]
# # Extract CI values using helper function
# ci_values <- extract_ci_values(ci_raster, field_vect)
# valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
# if (length(valid_values) > 1) {
# # Gap score using 2σ below median to detect outliers
# median_ci <- median(valid_values)
# sd_ci <- sd(valid_values)
# outlier_threshold <- median_ci - (2 * sd_ci)
# low_ci_pixels <- sum(valid_values < outlier_threshold)
# total_pixels <- length(valid_values)
# gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
# # Classify gap severity
# gap_level <- dplyr::case_when(
# gap_score < 10 ~ "Minimal",
# gap_score < 25 ~ "Moderate",
# TRUE ~ "Significant"
# )
# field_results <- rbind(field_results, data.frame(
# field = field_name,
# sub_field = sub_field_name,
# gap_level = gap_level,
# gap_score = gap_score,
# mean_ci = mean(valid_values),
# outlier_threshold = outlier_threshold
# ))
# } else {
# # Not enough valid data, fill with NA row
# field_results <- rbind(field_results, data.frame(
# field = field_name,
# sub_field = sub_field_name,
# gap_level = NA_character_,
# gap_score = NA_real_,
# mean_ci = NA_real_,
# outlier_threshold = NA_real_
# ))
# }
# }
# return(list(field_results = field_results))
# }
# #' Calculate gap filling scores for all per-field mosaics
# #' This is a wrapper function that processes multiple per-field mosaic files
# #' and calculates gap scores for each field.
# #' @param per_field_files Character vector of paths to per-field mosaic TIFFs
# #' @param field_boundaries_sf sf object with field geometries
# #' @return data.frame with Field_id and gap_score columns
# calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
# message("\nCalculating gap filling scores (2σ method)...")
# message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
# field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
# process_gap_for_field <- function(field_file) {
# field_id <- basename(dirname(field_file))
# field_bounds <- field_boundaries_by_id[[field_id]]
# if (is.null(field_bounds) || nrow(field_bounds) == 0) {
# return(data.frame(Field_id = field_id, gap_score = NA_real_))
# }
# tryCatch({
# field_raster <- terra::rast(field_file)
# ci_band_name <- "CI"
# if (!(ci_band_name %in% names(field_raster))) {
# return(data.frame(Field_id = field_id, gap_score = NA_real_))
# }
# field_ci_band <- field_raster[[ci_band_name]]
# names(field_ci_band) <- "CI"
# gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
# if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
# return(data.frame(Field_id = field_id, gap_score = NA_real_))
# }
# gap_scores <- gap_result$field_results
# gap_scores$Field_id <- gap_scores$field
# gap_scores <- gap_scores[, c("Field_id", "gap_score")]
# stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
# }, error = function(e) {
# message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message))
# data.frame(Field_id = field_id, gap_score = NA_real_)
# })
# }
# # Process fields sequentially with progress bar
# message(" Processing gap scores for ", length(per_field_files), " fields...")
# pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
# results_list <- lapply(seq_along(per_field_files), function(idx) {
# result <- process_gap_for_field(per_field_files[[idx]])
# utils::setTxtProgressBar(pb, idx)
# result
# })
# close(pb)
# gap_scores_df <- dplyr::bind_rows(results_list)
# if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
# gap_scores_df <- gap_scores_df %>%
# dplyr::group_by(Field_id) %>%
# dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
# message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
# message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-",
# round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
# } else {
# message(" WARNING: No gap scores calculated from per-field mosaics")
# gap_scores_df <- NULL
# }
# return(gap_scores_df)
# }
#' Build complete per-field KPI dataframe with all 22 columns
#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats

View file

@ -107,15 +107,15 @@ safe_log(paste("weekly_CI_mosaic path:", weekly_CI_mosaic))
# NO workspace-wide fallback that might load wrong project
# Build expected KPI file path strictly from project_dir
kpi_data_dir <- paths$kpi_reports_dir # Should be: laravel_app/storage/app/{project}/reports/kpis/field_level
kpi_data_dir <- file.path(paths$reports_dir, "kpis") # Should be: laravel_app/storage/app/{project}/reports/kpis
# Calculate week from report_date
current_week <- as.numeric(format(as.Date(report_date), "%V"))
current_year <- as.numeric(format(as.Date(report_date), "%G"))
# The ACTUAL filename format from 80_calculate_kpis.R output (after fix)
# Format: {project_dir}_kpi_summary_tables_week{WW}_{YYYY}.rds
kpi_rds_filename <- paste0(project_dir, "_kpi_summary_tables_week",
# Format: {project_dir}_field_analysis_week{WW}_{YYYY}.rds
kpi_rds_filename <- paste0(project_dir, "_field_analysis_week",
sprintf("%02d_%d", current_week, current_year), ".rds")
kpi_rds_path <- file.path(kpi_data_dir, kpi_rds_filename)
@ -139,30 +139,114 @@ if (dir.exists(kpi_data_dir)) {
}
)
# Handle new RDS structure (list with $summary_tables, $all_kpis, $field_details)
# Handle RDS structure from 80_utils_agronomic_support.R
# Expected: list(field_analysis = dataframe, kpis = list, summary_tables = list, ...)
# OR just a dataframe (for backward compatibility)
if (!is.null(loaded_data)) {
if (is.list(loaded_data) && "summary_tables" %in% names(loaded_data)) {
# New structure: extract summary_tables from the list
summary_tables <- loaded_data$summary_tables
if (!is.null(loaded_data$field_details)) {
field_details_table <- loaded_data$field_details
# Try to extract field_analysis from different possible structures
if (is.data.frame(loaded_data)) {
# Direct dataframe (simplest case)
field_details_table <- loaded_data
safe_log("✓ Loaded field_analysis dataframe directly")
} else if (is.list(loaded_data)) {
# List structure - try different key names
if ("field_analysis_df" %in% names(loaded_data)) {
field_details_table <- loaded_data$field_analysis_df
safe_log("✓ Loaded field_analysis_df from list")
} else if ("field_analysis" %in% names(loaded_data)) {
field_details_table <- loaded_data$field_analysis
safe_log("✓ Loaded field_analysis from list")
} else if ("kpis" %in% names(loaded_data)) {
# Might be the full output from orchestrator - create combined table
safe_log("✓ Found kpis list in loaded data")
# For now, skip - we need the combined field table
}
# Also check if summary_tables already exists in the RDS
if ("summary_tables" %in% names(loaded_data)) {
summary_tables <- loaded_data$summary_tables
safe_log("✓ Loaded pre-computed summary_tables from RDS")
}
safe_log("✓ Loaded KPI data (new structure with summary_tables)")
kpi_files_exist <- TRUE
} else if (is.list(loaded_data) && length(loaded_data) > 0) {
# Legacy structure: directly use as summary_tables
summary_tables <- loaded_data
safe_log("✓ Loaded KPI tables (legacy structure)")
kpi_files_exist <- TRUE
}
if (kpi_files_exist) {
safe_log(paste("✓ Available KPI tables:", paste(names(summary_tables), collapse=", ")))
# If we successfully loaded field_details_table, transform it into summary_tables
if (!is.null(field_details_table) && nrow(field_details_table) > 0) {
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
# Only create summary_tables if not already loaded from RDS
if (is.null(summary_tables)) {
summary_tables <- list()
# 1. Uniformity summary - GROUP BY Uniformity_Interpretation and COUNT
if ("Uniformity_Interpretation" %in% names(field_details_table)) {
summary_tables$uniformity <- field_details_table %>%
group_by(interpretation = Uniformity_Interpretation) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created uniformity summary")
}
# 2. Area change summary - GROUP BY Area_Change_Interpretation and COUNT
if ("Area_Change_Interpretation" %in% names(field_details_table)) {
summary_tables$area_change <- field_details_table %>%
group_by(interpretation = Area_Change_Interpretation) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created area_change summary")
}
# 3. Growth decline summary - GROUP BY Trend_Interpretation and COUNT
if ("Trend_Interpretation" %in% names(field_details_table)) {
summary_tables$growth_decline <- field_details_table %>%
group_by(trend_interpretation = Trend_Interpretation) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created growth_decline summary")
}
# 4. Weed pressure summary - GROUP BY Weed_Pressure_Risk and COUNT
if ("Weed_Pressure_Risk" %in% names(field_details_table)) {
summary_tables$weed_pressure <- field_details_table %>%
group_by(weed_pressure_risk = Weed_Pressure_Risk) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created weed_pressure summary")
}
# 5. TCH forecast summary - bin into categories and COUNT
if ("TCH_Forecasted" %in% names(field_details_table)) {
summary_tables$tch_forecast <- field_details_table %>%
filter(!is.na(TCH_Forecasted)) %>%
mutate(
tch_category = case_when(
TCH_Forecasted >= quantile(TCH_Forecasted, 0.75, na.rm = TRUE) ~ "Top 25%",
TCH_Forecasted >= quantile(TCH_Forecasted, 0.25, na.rm = TRUE) ~ "Average",
TRUE ~ "Lowest 25%"
)
) %>%
group_by(tch_category) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created tch_forecast summary")
}
# 6. Gap filling summary - GROUP BY Gap_Level and COUNT
if ("Gap_Level" %in% names(field_details_table)) {
summary_tables$gap_filling <- field_details_table %>%
group_by(gap_level = Gap_Level) %>%
summarise(field_count = n(), .groups = 'drop')
safe_log(" ✓ Created gap_filling summary")
}
safe_log(paste("✓ Created", length(summary_tables), "summary tables from field_details"))
}
kpi_files_exist <- TRUE
} else {
safe_log("ERROR: Could not extract field_analysis dataframe from RDS", "ERROR")
}
}
} else {
safe_log(paste("KPI file not found in:", kpi_rds_path), "WARNING")
safe_log(paste("Expected file:", kpi_rds_filename), "WARNING")
safe_log(paste("KPI file not found:", kpi_rds_path), "WARNING")
safe_log(paste("Files in directory:", paste(list.files(kpi_data_dir, pattern="\\.rds$"), collapse=", ")), "WARNING")
}
} else {
@ -172,6 +256,20 @@ if (dir.exists(kpi_data_dir)) {
if (!kpi_files_exist) {
safe_log(paste("Skipping KPI sections - no data for", project_dir, "on", report_date), "WARNING")
summary_tables <- NULL
field_details_table <- NULL
}
# DEBUG: Log what was actually loaded
if (exists("summary_tables") && !is.null(summary_tables)) {
safe_log(paste("✓ summary_tables available with", length(summary_tables), "KPIs"))
for (kpi_name in names(summary_tables)) {
kpi_df <- summary_tables[[kpi_name]]
if (!is.null(kpi_df) && is.data.frame(kpi_df)) {
safe_log(paste(" -", kpi_name, ":", nrow(kpi_df), "rows"))
}
}
} else {
safe_log("WARNING: summary_tables is NULL or does not exist", "WARNING")
}
```
@ -306,7 +404,7 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) {
## Key Insights
```{r key_insights, echo=FALSE, results='asis'}
```{r key_insights, echo=TRUE, message=TRUE, warning=TRUE, results='asis'}
# Calculate key insights from KPI data
if (exists("summary_tables") && !is.null(summary_tables) && length(summary_tables) > 0) {
@ -319,7 +417,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
group_by(interpretation) %>%
summarise(count = n(), .groups = 'drop')
for (i in 1:nrow(uniformity_counts)) {
for (i in seq_len(nrow(uniformity_counts))) {
status <- uniformity_counts$interpretation[i]
count <- uniformity_counts$count[i]
if (!is.na(status) && !is.na(count) && count > 0) {
@ -335,7 +433,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
group_by(interpretation) %>%
summarise(count = n(), .groups = 'drop')
for (i in 1:nrow(area_counts)) {
for (i in seq_len(nrow(area_counts))) {
status <- area_counts$interpretation[i]
count <- area_counts$count[i]
if (!is.na(status) && !is.na(count) && count > 0) {
@ -367,7 +465,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
group_by(weed_pressure_risk) %>%
summarise(count = n(), .groups = 'drop')
for (i in 1:nrow(weed_counts)) {
for (i in seq_len(nrow(weed_counts))) {
risk <- weed_counts$weed_pressure_risk[i]
count <- weed_counts$count[i]
if (!is.na(risk) && !is.na(count) && count > 0) {
@ -398,12 +496,12 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
tryCatch({
# KPI metadata for display
kpi_display_order <- list(
uniformity = list(display = "Field Uniformity", level_col = "Status", count_col = "Field Count"),
area_change = list(display = "Area Change", level_col = "Status", count_col = "Field Count"),
tch_forecast = list(display = "TCH Forecasted", level_col = NULL, count_col = "Fields"),
growth_decline = list(display = "Growth Decline", level_col = "Trend", count_col = "Field Count"),
weed_pressure = list(display = "Weed Presence", level_col = "Risk Level", count_col = "Field Count"),
gap_filling = list(display = "Gap Filling", level_col = NULL, count_col = NULL)
uniformity = list(display = "Field Uniformity", level_col = "interpretation", count_col = "field_count"),
area_change = list(display = "Area Change", level_col = "interpretation", count_col = "field_count"),
tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", count_col = "field_count"),
growth_decline = list(display = "Growth Decline", level_col = "trend_interpretation", count_col = "field_count"),
weed_pressure = list(display = "Weed Presence", level_col = "weed_pressure_risk", count_col = "field_count"),
gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count")
)
standardize_kpi <- function(df, level_col, count_col) {
@ -479,7 +577,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
}
}
print(ft)
ft
} else {
cat("No valid KPI summary tables found for display.\n")
}
@ -494,6 +592,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table
}
```
\newpage
## Field Alerts
```{r field_alerts_table, echo=FALSE, results='asis'}
@ -839,6 +938,7 @@ tryCatch({
})
```
\newpage
### Chlorophyll Index (CI) Overview Map - Current Week
```{r render_farm_ci_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE}
@ -947,6 +1047,7 @@ tryCatch({
})
```
\newpage
### Weekly Chlorophyll Index Difference Map
```{r render_farm_ci_diff_map, echo=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE}
@ -1064,10 +1165,10 @@ tryCatch({
This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions.
**Key Elements per Field:**
- Current and historical CI maps
- Week-over-week change visualizations
- Cumulative growth trends
- Field-specific KPI summaries
- Current and historical CI maps
- Week-over-week change visualizations
- Cumulative growth trends
- Field-specific KPI summaries
*Navigate to the following pages for individual field reports.*
@ -1294,6 +1395,7 @@ tryCatch({
})
```
\newpage
## KPI Summary by Field
## Detailed Field Performance Summary
@ -1337,46 +1439,50 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) {
safe_log("CI quadrant data unavailable - field ages will not be included in detailed table", "WARNING")
}
# Clean up the field details table - remove sub field column and round numeric values
# Check if field_details_table was loaded successfully
if (!exists("field_details_table") || is.null(field_details_table)) {
# Initialize empty tibble with expected columns
field_details_clean <- tibble(
Field = character(),
`Field Size (ha)` = numeric(),
`Growth Uniformity` = character(),
`Yield Forecast (t/ha)` = numeric(),
`Gap Score` = numeric(),
`Decline Risk` = character(),
`Weed Risk` = character(),
`Mean CI` = numeric(),
`CV Value` = numeric()
)
# Transform field_details_table to display format with proper column names
if (!exists("field_details_table") || is.null(field_details_table) || nrow(field_details_table) == 0) {
safe_log("No field details available for table", "WARNING")
cat("No field-level KPI data available for this report period.\n")
} else {
# Map raw KPI columns to display names
field_details_clean <- field_details_table %>%
mutate(
Field = Field_id,
`Field Size (ha)` = NA_real_, # Not available in KPI output, would need to come from boundaries
`Growth Uniformity` = Uniformity_Interpretation,
`Yield Forecast (t/ha)` = TCH_Forecasted,
`Gap Score` = Gap_Score,
`Decline Risk` = Decline_Severity,
`Weed Risk` = Weed_Pressure_Risk,
`Mean CI` = Mean_CI,
`CV Value` = CV
) %>%
left_join(field_ages, by = "Field") %>%
mutate(
`Yield Forecast (t/ha)` = ifelse(is.na(Age_days) | Age_days < 240, NA_real_, `Yield Forecast (t/ha)`)
# Only show yield forecast for fields >= 240 days old
`Yield Forecast (t/ha)` = if_else(is.na(Age_days) | Age_days < 240,
NA_real_,
`Yield Forecast (t/ha)`),
# Round numeric columns
`Mean CI` = round(`Mean CI`, 2),
`CV Value` = round(`CV Value`, 2),
`Gap Score` = round(`Gap Score`, 0),
`Yield Forecast (t/ha)` = round(`Yield Forecast (t/ha)`, 1)
) %>%
select(Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`, `Mean CI`, `CV Value`) %>% # Reorder columns as requested
mutate(
`Mean CI` = round(`Mean CI`, 2), # Round to 2 decimal places
`CV Value` = round(`CV Value`, 2), # Round to 2 decimal places
`Gap Score` = round(`Gap Score`, 0) # Round to nearest integer
)
select(Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`,
`Gap Score`, `Decline Risk`, `Weed Risk`, `Mean CI`, `CV Value`)
# Display the cleaned field table with flextable
col_widths <- c(0.97, 0.73, 0.80, 0.80, 0.65, 0.73, 0.65, 0.56, 0.48)
ft <- flextable(field_details_clean) %>%
set_caption("Detailed Field Performance Summary") %>%
width(width = col_widths) %>%
theme_booktabs()
knit_print(ft)
}
# Display the cleaned field table with flextable
# Set column widths to fit page (approximately 6.5 inches for 1-inch margins)
# Scale widths proportionally: original total = 8.0 inches, scale to 6.2 inches
col_widths <- c(0.97, 0.73, 0.80, 0.80, 0.65, 0.73, 0.65, 0.56, 0.48) # inches for each column
ft <- flextable(field_details_clean) %>%
set_caption("Detailed Field Performance Summary") %>%
width(width = col_widths)
ft
```
\newpage