SmartCane/r_app/80_report_building_utils.R

251 lines
9.6 KiB
R

# 80_REPORT_BUILDING_UTILS.R
# ============================================================================
# UTILITY FUNCTIONS FOR REPORT GENERATION AND EXCEL/CSV EXPORT
#
# This file contains reusable functions for:
# - Field analysis summary generation
# - Excel/CSV/RDS export functionality
# - Farm-level KPI aggregation and summary
# - Tile-based KPI extraction (alternative calculation method)
#
# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts
# ============================================================================
# ============================================================================
# SUMMARY GENERATION
# ============================================================================
generate_field_analysis_summary <- function(field_df) {
message("Generating summary statistics...")
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE)
tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE)
grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE)
maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE)
unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE)
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
total_fields <- nrow(field_df)
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
summary_df <- data.frame(
Category = c(
"--- PHASE DISTRIBUTION ---",
"Germination",
"Tillering",
"Grand Growth",
"Maturation",
"Unknown phase",
"--- STATUS TRIGGERS ---",
"Harvest ready",
"Stress detected",
"Strong recovery",
"Growth on track",
"Germination complete",
"Germination started",
"No trigger",
"--- CLOUD COVERAGE (FIELDS) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- CLOUD COVERAGE (ACREAGE) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- TOTAL ---",
"Total Acreage"
),
Acreage = c(
NA,
round(germination_acreage, 2),
round(tillering_acreage, 2),
round(grand_growth_acreage, 2),
round(maturation_acreage, 2),
round(unknown_phase_acreage, 2),
NA,
round(harvest_ready_acreage, 2),
round(stress_acreage, 2),
round(recovery_acreage, 2),
round(growth_on_track_acreage, 2),
round(germination_complete_acreage, 2),
round(germination_started_acreage, 2),
round(no_trigger_acreage, 2),
NA,
paste0(clear_fields, " fields"),
paste0(partial_fields, " fields"),
paste0(no_image_fields, " fields"),
NA,
round(clear_acreage, 2),
round(partial_acreage, 2),
round(no_image_acreage, 2),
NA,
round(total_acreage, 2)
),
stringsAsFactors = FALSE
)
return(summary_df)
}
# ============================================================================
# EXPORT FUNCTIONS
# ============================================================================
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
message("Exporting per-field analysis to Excel, CSV, and RDS...")
field_df_rounded <- field_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
summary_df_rounded <- summary_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE)
}
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
sheets <- list(
"Field Data" = field_df_rounded,
"Summary" = summary_df_rounded
)
write_xlsx(sheets, excel_path)
message(paste("✓ Field analysis Excel exported to:", excel_path))
kpi_data <- list(
field_analysis = field_df_rounded,
field_analysis_summary = summary_df_rounded,
metadata = list(
current_week = current_week,
year = year,
project = project_dir,
created_at = Sys.time()
)
)
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(reports_dir, "kpis", rds_filename)
saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
csv_path <- file.path(output_subdir, csv_filename)
write_csv(field_df_rounded, csv_path)
message(paste("✓ Field analysis CSV exported to:", csv_path))
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
}
# ============================================================================
# TILE-BASED KPI EXTRACTION (Alternative calculation method)
# ============================================================================
# [COMMENTED OUT / UNUSED - kept for reference]
# These functions provide tile-based extraction as an alternative to field_statistics approach
# Currently replaced by calculate_field_statistics() in 80_weekly_stats_utils.R
# Uncomment if parallel processing of tiles is needed in future
# calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) {
# message("Calculating field-level KPI statistics from tiles...")
#
# tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
# tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE)
#
# if (length(tile_files) == 0) {
# message("No tiles found for week", week_num, year)
# return(NULL)
# }
#
# message(paste("Processing", length(tile_files), "tiles in parallel..."))
#
# field_kpi_list <- furrr::future_map(
# tile_files,
# ~ process_single_kpi_tile(
# tile_file = .,
# field_boundaries_sf = field_boundaries_sf,
# tile_grid = tile_grid
# ),
# .progress = TRUE,
# .options = furrr::furrr_options(seed = TRUE)
# )
#
# field_kpi_stats <- dplyr::bind_rows(field_kpi_list)
#
# if (nrow(field_kpi_stats) == 0) {
# message(" No KPI data extracted from tiles")
# return(NULL)
# }
#
# message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields"))
# return(field_kpi_stats)
# }
# process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) {
# # Helper function for calculate_field_kpis_from_tiles
# tryCatch({
# tile_basename <- basename(tile_file)
# tile_raster <- terra::rast(tile_file)
# ci_band <- tile_raster[[1]]
#
# field_bbox <- sf::st_bbox(field_boundaries_sf)
# ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out")
#
# extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE)
#
# tile_results <- data.frame()
# tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename))
#
# for (field_idx in seq_len(nrow(field_boundaries_sf))) {
# field_id <- field_boundaries_sf$field[field_idx]
# mean_ci <- extracted_vals[field_idx, 2]
#
# if (is.na(mean_ci)) {
# next
# }
#
# tile_results <- rbind(tile_results, data.frame(
# field = field_id,
# tile_id = tile_id_match,
# tile_file = tile_basename,
# mean_ci = round(mean_ci, 4),
# stringsAsFactors = FALSE
# ))
# }
#
# return(tile_results)
#
# }, error = function(e) {
# message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message))
# return(data.frame())
# })
# }
# calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf,
# harvesting_data, cumulative_CI_vals_dir,
# weekly_CI_mosaic, reports_dir, current_week, year,
# tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") {
# # Farm-level KPI calculation using tile-based extraction (alternative approach)
# # [Implementation kept as reference for alternative calculation method]
# }