SmartCane/r_app/experiments/experiment_generate_report_with_phases.R

478 lines
17 KiB
R

# 14_GENERATE_REPORT_WITH_PHASES.R
# ==================================
# First-draft Word report generation from field analysis CSV
#
# Purpose: Take the existing field_analysis_weekly.csv (which already has phases
# calculated from 09_field_analysis_weekly.R) and generate a professional Word
# report showing:
# - Field-level phase assignment (age-based)
# - Weekly CI change
# - Current status triggers (as-is, no modifications)
# - Summary statistics by phase
#
# This is a FIRST DRAFT to test the pipeline. Once working, we can iterate on
# what gets included in the report.
#
# Usage: Rscript 14_generate_report_with_phases.R [project_dir] [report_date]
# - project_dir: Project directory name (e.g., "esa", "aura")
# - report_date: Date for report (YYYY-MM-DD), default: today
suppressPackageStartupMessages({
library(here)
library(dplyr)
library(tidyr)
library(readr)
library(lubridate)
library(officer) # For Word document generation
library(flextable) # For beautiful tables in Word
})
# ============================================================================
# CONFIGURATION
# ============================================================================
# Color scheme for status triggers
TRIGGER_COLORS <- list(
germination_started = "E8F4F8", # Light blue
germination_complete = "C6E0B4", # Light green
growth_on_track = "A9D08E", # Green
stress_detected_whole_field = "F4B084", # Orange
strong_recovery = "92D050", # Bright green
maturation_progressing = "4472C4", # Dark blue
harvest_ready = "70AD47", # Dark green
none = "D9D9D9" # Gray
)
PHASE_COLORS <- list(
Germination = "E8F4F8", # Light blue
"Early Growth" = "BDD7EE", # Blue
Tillering = "70AD47", # Green
"Grand Growth" = "92D050", # Bright green
Maturation = "FFC7CE", # Light red
"Pre-Harvest" = "F4B084", # Orange
Unknown = "D9D9D9" # Gray
)
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
#' Load field analysis CSV from reports directory
#' @param project_dir Project name
#' @param report_date Date for the report (used to find current week)
#' @param reports_dir Reports directory path
#' @return Data frame with field analysis, or NULL if not found
load_field_analysis_csv <- function(project_dir, report_date, reports_dir) {
current_week <- as.numeric(format(report_date, "%V"))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
message(paste("Looking for CSV at:", csv_path))
if (!file.exists(csv_path)) {
message(paste("CSV not found. Available files:"))
field_analysis_dir <- file.path(reports_dir, "kpis", "field_analysis")
if (dir.exists(field_analysis_dir)) {
files <- list.files(field_analysis_dir, pattern = project_dir)
if (length(files) > 0) {
message(paste(" -", files))
# Try to load the most recent available
most_recent <- tail(files, 1)
csv_path <- file.path(field_analysis_dir, most_recent)
message(paste("Using most recent:", most_recent))
}
}
}
if (!file.exists(csv_path)) {
warning(paste("Cannot find field analysis CSV for project:", project_dir))
return(NULL)
}
tryCatch({
data <- read_csv(csv_path, show_col_types = FALSE)
message(paste("Loaded field analysis with", nrow(data), "rows"))
return(data)
}, error = function(e) {
warning(paste("Error reading CSV:", e$message))
return(NULL)
})
}
#' Extract field-level data (exclude summary rows)
#' @param field_df Data frame from field analysis CSV
#' @return Filtered data frame with only individual field rows
extract_field_rows <- function(field_df) {
# Summary rows start with special prefixes or markers
summary_patterns <- c(
"^===",
"^ACREAGE_",
"^TRIGGER_",
"^NO_TRIGGER",
"^TOTAL_"
)
field_df <- field_df %>%
filter(!grepl(paste(summary_patterns, collapse = "|"), Field_id, ignore.case = TRUE))
return(field_df)
}
#' Extract summary statistics from field analysis CSV
#' @param field_df Data frame from field analysis CSV
#' @return List with summary statistics
extract_summary_statistics <- function(field_df) {
summary_rows <- field_df %>%
filter(grepl("^ACREAGE_|^TRIGGER_|^NO_TRIGGER|^TOTAL_", Field_id, ignore.case = TRUE))
summary_list <- list()
# Phase acreage
summary_list$germination_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
summary_list$tillering_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
summary_list$grand_growth_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
summary_list$maturation_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)
# Trigger acreage
summary_list$harvest_ready_ha <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
summary_list$stress_ha <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
summary_list$recovery_ha <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
summary_list$growth_on_track_ha <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
summary_list$germination_complete_ha <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
summary_list$germination_started_ha <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
summary_list$no_trigger_ha <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
summary_list$total_ha <- sum(field_df$Acreage, na.rm = TRUE)
return(summary_list)
}
#' Create a flextable from field analysis data
#' @param field_df Data frame with field data
#' @param include_cols Columns to include in table
#' @return flextable object
create_field_table <- function(field_df, include_cols = NULL) {
if (is.null(include_cols)) {
include_cols <- c("Field_id", "Acreage", "Age_week", "Phase (age based)",
"Weekly_ci_change", "Status_trigger", "CV")
}
# Filter to available columns
include_cols <- include_cols[include_cols %in% names(field_df)]
table_data <- field_df %>%
select(all_of(include_cols)) %>%
mutate(
Acreage = round(Acreage, 2),
CV = round(CV, 3),
Weekly_ci_change = as.character(Weekly_ci_change)
)
# Create flextable
ft <- flextable(table_data)
# Format header
ft <- ft %>%
bold(part = "header") %>%
bg(part = "header", bg = "#4472C4") %>%
color(part = "header", color = "white") %>%
autofit()
# Add phase color highlighting if phase column exists
if ("Phase (age based)" %in% include_cols) {
for (i in 1:nrow(table_data)) {
phase <- table_data[[i, "Phase (age based)"]]
color_val <- PHASE_COLORS[[phase]]
if (!is.null(color_val)) {
ft <- ft %>%
bg(i = i + 1, j = "Phase (age based)", bg = color_val)
}
}
}
# Add status trigger color highlighting if trigger column exists
if ("Status_trigger" %in% include_cols) {
for (i in 1:nrow(table_data)) {
trigger <- table_data[[i, "Status_trigger"]]
if (is.na(trigger)) trigger <- "none"
color_val <- TRIGGER_COLORS[[trigger]]
if (!is.null(color_val)) {
ft <- ft %>%
bg(i = i + 1, j = "Status_trigger", bg = color_val)
}
}
}
return(ft)
}
#' Create summary statistics table
#' @param summary_list List from extract_summary_statistics()
#' @return flextable object
create_summary_table <- function(summary_list) {
summary_df <- data.frame(
Category = c(
"PHASE DISTRIBUTION",
" Germination",
" Tillering",
" Grand Growth",
" Maturation",
"",
"STATUS TRIGGERS",
" Harvest Ready",
" Stress Detected",
" Strong Recovery",
" Growth On Track",
" Germination Complete",
" Germination Started",
" No Trigger",
"",
"TOTAL ACREAGE"
),
Hectares = c(
NA,
summary_list$germination_ha,
summary_list$tillering_ha,
summary_list$grand_growth_ha,
summary_list$maturation_ha,
NA,
NA,
summary_list$harvest_ready_ha,
summary_list$stress_ha,
summary_list$recovery_ha,
summary_list$growth_on_track_ha,
summary_list$germination_complete_ha,
summary_list$germination_started_ha,
summary_list$no_trigger_ha,
NA,
summary_list$total_ha
),
stringsAsFactors = FALSE
)
summary_df$Hectares <- round(summary_df$Hectares, 2)
ft <- flextable(summary_df)
ft <- ft %>%
bold(part = "header") %>%
bg(part = "header", bg = "#4472C4") %>%
color(part = "header", color = "white") %>%
autofit()
return(ft)
}
# ============================================================================
# MAIN REPORT GENERATION
# ============================================================================
generate_word_report <- function(project_dir, report_date, reports_dir, output_path) {
message("=== GENERATING WORD REPORT WITH PHASES ===\n")
# Load field analysis CSV
field_df_all <- load_field_analysis_csv(project_dir, report_date, reports_dir)
if (is.null(field_df_all)) {
stop("Cannot generate report without field analysis CSV")
}
# Extract field rows and summary statistics
field_df <- extract_field_rows(field_df_all)
summary_stats <- extract_summary_statistics(field_df)
message(paste("Processing", nrow(field_df), "fields\n"))
# Create Word document
doc <- read_docx()
# -----------------------------------------------------------------------
# TITLE AND METADATA
# -----------------------------------------------------------------------
doc <- doc %>%
add_heading("Field Analysis Report with Phase Detection", level = 1) %>%
add_paragraph(paste("Project:", project_dir)) %>%
add_paragraph(paste("Report Date:", format(report_date, "%B %d, %Y"))) %>%
add_paragraph(paste("Week:", as.numeric(format(report_date, "%V")))) %>%
add_paragraph(paste("Total Fields Analyzed:", nrow(field_df))) %>%
add_paragraph(paste("Total Acreage:", round(summary_stats$total_ha, 2))) %>%
add_paragraph("")
# -----------------------------------------------------------------------
# PHASE DISTRIBUTION SUMMARY
# -----------------------------------------------------------------------
doc <- doc %>%
add_heading("Phase Distribution Summary", level = 2) %>%
add_paragraph("Fields are assigned to growth phases based on their age (weeks since planting).")
phase_summary_df <- data.frame(
Phase = c("Germination (0-6 wks)", "Tillering (9-17 wks)", "Grand Growth (17-35 wks)", "Maturation (35+ wks)"),
Hectares = c(
round(summary_stats$germination_ha, 2),
round(summary_stats$tillering_ha, 2),
round(summary_stats$grand_growth_ha, 2),
round(summary_stats$maturation_ha, 2)
),
stringsAsFactors = FALSE
)
ft_phases <- flextable(phase_summary_df) %>%
bold(part = "header") %>%
bg(part = "header", bg = "#70AD47") %>%
color(part = "header", color = "white") %>%
autofit()
doc <- doc %>% body_add_flextable(ft_phases) %>% add_paragraph("")
# -----------------------------------------------------------------------
# STATUS TRIGGERS SUMMARY
# -----------------------------------------------------------------------
doc <- doc %>%
add_heading("Status Triggers This Week", level = 2) %>%
add_paragraph("Fields with active status triggers indicating specific management actions.")
trigger_summary_df <- data.frame(
Trigger = c(
"Harvest Ready",
"Stress Detected",
"Strong Recovery",
"Growth On Track",
"Germination Complete",
"Germination Started",
"No Active Trigger"
),
Hectares = c(
round(summary_stats$harvest_ready_ha, 2),
round(summary_stats$stress_ha, 2),
round(summary_stats$recovery_ha, 2),
round(summary_stats$growth_on_track_ha, 2),
round(summary_stats$germination_complete_ha, 2),
round(summary_stats$germination_started_ha, 2),
round(summary_stats$no_trigger_ha, 2)
),
stringsAsFactors = FALSE
)
ft_triggers <- flextable(trigger_summary_df) %>%
bold(part = "header") %>%
bg(part = "header", bg = "#4472C4") %>%
color(part = "header", color = "white") %>%
autofit()
doc <- doc %>% body_add_flextable(ft_triggers) %>% add_paragraph("")
# -----------------------------------------------------------------------
# DETAILED FIELD-LEVEL ANALYSIS
# -----------------------------------------------------------------------
doc <- doc %>%
add_heading("Field-Level Analysis", level = 2) %>%
add_paragraph("Detailed view of each field with current phase, CI metrics, and active triggers.")
# Create detailed field table
ft_fields <- create_field_table(field_df)
doc <- doc %>% body_add_flextable(ft_fields) %>% add_paragraph("")
# -----------------------------------------------------------------------
# LEGEND AND INTERPRETATION
# -----------------------------------------------------------------------
doc <- doc %>%
add_heading("Legend & Interpretation", level = 2)
doc <- doc %>%
add_heading("Phases", level = 3) %>%
add_paragraph("Germination (0-6 weeks): Early growth after planting, variable emergence") %>%
add_paragraph("Tillering (9-17 weeks): Shoot development, lateral growth") %>%
add_paragraph("Grand Growth (17-35 weeks): Peak growth period, maximum biomass accumulation") %>%
add_paragraph("Maturation (35+ weeks): Harvest preparation, sugar accumulation")
doc <- doc %>%
add_heading("Status Triggers", level = 3) %>%
add_paragraph("Germination Started: 10% of field CI > 2.0") %>%
add_paragraph("Germination Complete: 70% of field CI >= 2.0") %>%
add_paragraph("Growth On Track: CI increasing > 0.2 per week") %>%
add_paragraph("Stress Detected: CI declining > -1.5 with low uniformity") %>%
add_paragraph("Strong Recovery: CI increasing > 1.5 per week") %>%
add_paragraph("Maturation Progressing: Age 35-45 weeks with high CI (> 3.5)") %>%
add_paragraph("Harvest Ready: Age 45+ weeks")
doc <- doc %>%
add_heading("Metrics", level = 3) %>%
add_paragraph("Weekly CI Change: Change in mean CI value from previous week ± standard deviation") %>%
add_paragraph("CV (Coefficient of Variation): Field uniformity (lower = more uniform)") %>%
add_paragraph("CI Range: Minimum-Maximum CI values in field")
# -----------------------------------------------------------------------
# SAVE DOCUMENT
# -----------------------------------------------------------------------
print(doc, target = output_path)
message(paste("✓ Report saved to:", output_path))
return(output_path)
}
# ============================================================================
# MAIN
# ============================================================================
main <- function() {
args <- commandArgs(trailingOnly = TRUE)
project_dir <- if (length(args) >= 1 && !is.na(args[1])) {
as.character(args[1])
} else {
"esa"
}
report_date <- if (length(args) >= 2 && !is.na(args[2])) {
as.Date(args[2])
} else {
Sys.Date()
}
# Set project globally for parameters_project.R
assign("project_dir", project_dir, envir = .GlobalEnv)
# Load project configuration
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Check that reports_dir is defined
if (!exists("reports_dir")) {
stop("reports_dir must be defined in parameters_project.R")
}
# Set output path
output_dir <- file.path(reports_dir, "kpis", "word_reports")
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
current_week <- as.numeric(format(report_date, "%V"))
output_filename <- paste0(project_dir, "_field_analysis_week",
sprintf("%02d", current_week), ".docx")
output_path <- file.path(output_dir, output_filename)
message(paste("Output:", output_path))
message(paste("Reports dir:", reports_dir))
# Generate report
generate_word_report(project_dir, report_date, reports_dir, output_path)
message("\n=== REPORT GENERATION COMPLETE ===\n")
cat("Word report:", output_path, "\n")
}
if (sys.nframe() == 0) {
main()
}