SmartCane/r_app/experiments/experiment_generate_report_with_phases.R

# 14_GENERATE_REPORT_WITH_PHASES.R
# ==================================
# First-draft Word report generation from field analysis CSV
#
# Purpose: Take the existing field_analysis_weekly.csv (which already has phases
# calculated from 09_field_analysis_weekly.R) and generate a professional Word
# report showing:
#   - Field-level phase assignment (age-based)
#   - Weekly CI change
#   - Current status triggers (as-is, no modifications)
#   - Summary statistics by phase
#
# This is a FIRST DRAFT to test the pipeline. Once working, we can iterate on
# what gets included in the report.
#
# Usage: Rscript 14_generate_report_with_phases.R [project_dir] [report_date]
#   - project_dir: Project directory name (e.g., "esa", "aura")
#   - report_date: Date for report (YYYY-MM-DD), default: today

suppressPackageStartupMessages({
  library(here)
  library(dplyr)
  library(tidyr)
  library(readr)
  library(lubridate)
  library(officer)  # For Word document generation
  library(flextable)  # For beautiful tables in Word
})

# ============================================================================
# CONFIGURATION
# ============================================================================

# Color scheme for status triggers
TRIGGER_COLORS <- list(
  germination_started = "E8F4F8",           # Light blue
  germination_complete = "C6E0B4",          # Light green
  growth_on_track = "A9D08E",               # Green
  stress_detected_whole_field = "F4B084",   # Orange
  strong_recovery = "92D050",               # Bright green
  maturation_progressing = "4472C4",        # Dark blue
  harvest_ready = "70AD47",                 # Dark green
  none = "D9D9D9"                           # Gray
)

PHASE_COLORS <- list(
  Germination = "E8F4F8",                   # Light blue
  "Early Growth" = "BDD7EE",                # Blue
  Tillering = "70AD47",                     # Green
  "Grand Growth" = "92D050",                # Bright green
  Maturation = "FFC7CE",                    # Light red
  "Pre-Harvest" = "F4B084",                 # Orange
  Unknown = "D9D9D9"                        # Gray
)

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

#' Load field analysis CSV from reports directory
#' @param project_dir Project name
#' @param report_date Date for the report (used to find current week)
#' @param reports_dir Reports directory path
#' @return Data frame with field analysis, or NULL if not found
load_field_analysis_csv <- function(project_dir, report_date, reports_dir) {
  current_week <- as.numeric(format(report_date, "%V"))

  csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
  csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)

  message(paste("Looking for CSV at:", csv_path))

  if (!file.exists(csv_path)) {
    message(paste("CSV not found. Available files:"))
    field_analysis_dir <- file.path(reports_dir, "kpis", "field_analysis")
    if (dir.exists(field_analysis_dir)) {
      files <- list.files(field_analysis_dir, pattern = project_dir)
      if (length(files) > 0) {
        message(paste("  -", files))
        # Try to load the most recent available
        most_recent <- tail(files, 1)
        csv_path <- file.path(field_analysis_dir, most_recent)
        message(paste("Using most recent:", most_recent))
      }
    }
  }

  if (!file.exists(csv_path)) {
    warning(paste("Cannot find field analysis CSV for project:", project_dir))
    return(NULL)
  }

  tryCatch({
    data <- read_csv(csv_path, show_col_types = FALSE)
    message(paste("Loaded field analysis with", nrow(data), "rows"))
    return(data)
  }, error = function(e) {
    warning(paste("Error reading CSV:", e$message))
    return(NULL)
  })
}

#' Extract field-level data (exclude summary rows)
#' @param field_df Data frame from field analysis CSV
#' @return Filtered data frame with only individual field rows
extract_field_rows <- function(field_df) {
  # Summary rows start with special prefixes or markers
  summary_patterns <- c(
    "^===",
    "^ACREAGE_",
    "^TRIGGER_",
    "^NO_TRIGGER",
    "^TOTAL_"
  )

  field_df <- field_df %>%
    filter(!grepl(paste(summary_patterns, collapse = "|"), Field_id, ignore.case = TRUE))

  return(field_df)
}

#' Extract summary statistics from field analysis CSV
#' @param field_df Data frame from field analysis CSV
#' @return List with summary statistics
extract_summary_statistics <- function(field_df) {
  summary_rows <- field_df %>%
    filter(grepl("^ACREAGE_|^TRIGGER_|^NO_TRIGGER|^TOTAL_", Field_id, ignore.case = TRUE))

  summary_list <- list()

  # Phase acreage
  summary_list$germination_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
  summary_list$tillering_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
  summary_list$grand_growth_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
  summary_list$maturation_ha <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)

  # Trigger acreage
  summary_list$harvest_ready_ha <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
  summary_list$stress_ha <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
  summary_list$recovery_ha <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
  summary_list$growth_on_track_ha <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
  summary_list$germination_complete_ha <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
  summary_list$germination_started_ha <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
  summary_list$no_trigger_ha <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)

  summary_list$total_ha <- sum(field_df$Acreage, na.rm = TRUE)

  return(summary_list)
}

#' Create a flextable from field analysis data
#' @param field_df Data frame with field data
#' @param include_cols Columns to include in table
#' @return flextable object
create_field_table <- function(field_df, include_cols = NULL) {
  if (is.null(include_cols)) {
    include_cols <- c("Field_id", "Acreage", "Age_week", "Phase (age based)",
                      "Weekly_ci_change", "Status_trigger", "CV")
  }

  # Filter to available columns
  include_cols <- include_cols[include_cols %in% names(field_df)]

  table_data <- field_df %>%
    select(all_of(include_cols)) %>%
    mutate(
      Acreage = round(Acreage, 2),
      CV = round(CV, 3),
      Weekly_ci_change = as.character(Weekly_ci_change)
    )

  # Create flextable
  ft <- flextable(table_data)

  # Format header
  ft <- ft %>%
    bold(part = "header") %>%
    bg(part = "header", bg = "#4472C4") %>%
    color(part = "header", color = "white") %>%
    autofit()

  # Add phase color highlighting if phase column exists
  if ("Phase (age based)" %in% include_cols) {
    for (i in 1:nrow(table_data)) {
      phase <- table_data[[i, "Phase (age based)"]]
      color_val <- PHASE_COLORS[[phase]]
      if (!is.null(color_val)) {
        ft <- ft %>%
          bg(i = i + 1, j = "Phase (age based)", bg = color_val)
      }
    }
  }

  # Add status trigger color highlighting if trigger column exists
  if ("Status_trigger" %in% include_cols) {
    for (i in 1:nrow(table_data)) {
      trigger <- table_data[[i, "Status_trigger"]]
      if (is.na(trigger)) trigger <- "none"
      color_val <- TRIGGER_COLORS[[trigger]]
      if (!is.null(color_val)) {
        ft <- ft %>%
          bg(i = i + 1, j = "Status_trigger", bg = color_val)
      }
    }
  }

  return(ft)
}

#' Create summary statistics table
#' @param summary_list List from extract_summary_statistics()
#' @return flextable object
create_summary_table <- function(summary_list) {
  summary_df <- data.frame(
    Category = c(
      "PHASE DISTRIBUTION",
      "  Germination",
      "  Tillering",
      "  Grand Growth",
      "  Maturation",
      "",
      "STATUS TRIGGERS",
      "  Harvest Ready",
      "  Stress Detected",
      "  Strong Recovery",
      "  Growth On Track",
      "  Germination Complete",
      "  Germination Started",
      "  No Trigger",
      "",
      "TOTAL ACREAGE"
    ),
    Hectares = c(
      NA,
      summary_list$germination_ha,
      summary_list$tillering_ha,
      summary_list$grand_growth_ha,
      summary_list$maturation_ha,
      NA,
      NA,
      summary_list$harvest_ready_ha,
      summary_list$stress_ha,
      summary_list$recovery_ha,
      summary_list$growth_on_track_ha,
      summary_list$germination_complete_ha,
      summary_list$germination_started_ha,
      summary_list$no_trigger_ha,
      NA,
      summary_list$total_ha
    ),
    stringsAsFactors = FALSE
  )

  summary_df$Hectares <- round(summary_df$Hectares, 2)

  ft <- flextable(summary_df)
  ft <- ft %>%
    bold(part = "header") %>%
    bg(part = "header", bg = "#4472C4") %>%
    color(part = "header", color = "white") %>%
    autofit()

  return(ft)
}

# ============================================================================
# MAIN REPORT GENERATION
# ============================================================================

generate_word_report <- function(project_dir, report_date, reports_dir, output_path) {
  message("=== GENERATING WORD REPORT WITH PHASES ===\n")

  # Load field analysis CSV
  field_df_all <- load_field_analysis_csv(project_dir, report_date, reports_dir)

  if (is.null(field_df_all)) {
    stop("Cannot generate report without field analysis CSV")
  }

  # Extract field rows and summary statistics
  field_df <- extract_field_rows(field_df_all)
  summary_stats <- extract_summary_statistics(field_df)

  message(paste("Processing", nrow(field_df), "fields\n"))

  # Create Word document
  doc <- read_docx()

  # -----------------------------------------------------------------------
  # TITLE AND METADATA
  # -----------------------------------------------------------------------

  doc <- doc %>%
    add_heading("Field Analysis Report with Phase Detection", level = 1) %>%
    add_paragraph(paste("Project:", project_dir)) %>%
    add_paragraph(paste("Report Date:", format(report_date, "%B %d, %Y"))) %>%
    add_paragraph(paste("Week:", as.numeric(format(report_date, "%V")))) %>%
    add_paragraph(paste("Total Fields Analyzed:", nrow(field_df))) %>%
    add_paragraph(paste("Total Acreage:", round(summary_stats$total_ha, 2))) %>%
    add_paragraph("")

  # -----------------------------------------------------------------------
  # PHASE DISTRIBUTION SUMMARY
  # -----------------------------------------------------------------------

  doc <- doc %>%
    add_heading("Phase Distribution Summary", level = 2) %>%
    add_paragraph("Fields are assigned to growth phases based on their age (weeks since planting).")

  phase_summary_df <- data.frame(
    Phase = c("Germination (0-6 wks)", "Tillering (9-17 wks)", "Grand Growth (17-35 wks)", "Maturation (35+ wks)"),
    Hectares = c(
      round(summary_stats$germination_ha, 2),
      round(summary_stats$tillering_ha, 2),
      round(summary_stats$grand_growth_ha, 2),
      round(summary_stats$maturation_ha, 2)
    ),
    stringsAsFactors = FALSE
  )

  ft_phases <- flextable(phase_summary_df) %>%
    bold(part = "header") %>%
    bg(part = "header", bg = "#70AD47") %>%
    color(part = "header", color = "white") %>%
    autofit()

  doc <- doc %>% body_add_flextable(ft_phases) %>% add_paragraph("")

  # -----------------------------------------------------------------------
  # STATUS TRIGGERS SUMMARY
  # -----------------------------------------------------------------------

  doc <- doc %>%
    add_heading("Status Triggers This Week", level = 2) %>%
    add_paragraph("Fields with active status triggers indicating specific management actions.")

  trigger_summary_df <- data.frame(
    Trigger = c(
      "Harvest Ready",
      "Stress Detected",
      "Strong Recovery",
      "Growth On Track",
      "Germination Complete",
      "Germination Started",
      "No Active Trigger"
    ),
    Hectares = c(
      round(summary_stats$harvest_ready_ha, 2),
      round(summary_stats$stress_ha, 2),
      round(summary_stats$recovery_ha, 2),
      round(summary_stats$growth_on_track_ha, 2),
      round(summary_stats$germination_complete_ha, 2),
      round(summary_stats$germination_started_ha, 2),
      round(summary_stats$no_trigger_ha, 2)
    ),
    stringsAsFactors = FALSE
  )

  ft_triggers <- flextable(trigger_summary_df) %>%
    bold(part = "header") %>%
    bg(part = "header", bg = "#4472C4") %>%
    color(part = "header", color = "white") %>%
    autofit()

  doc <- doc %>% body_add_flextable(ft_triggers) %>% add_paragraph("")

  # -----------------------------------------------------------------------
  # DETAILED FIELD-LEVEL ANALYSIS
  # -----------------------------------------------------------------------

  doc <- doc %>%
    add_heading("Field-Level Analysis", level = 2) %>%
    add_paragraph("Detailed view of each field with current phase, CI metrics, and active triggers.")

  # Create detailed field table
  ft_fields <- create_field_table(field_df)

  doc <- doc %>% body_add_flextable(ft_fields) %>% add_paragraph("")

  # -----------------------------------------------------------------------
  # LEGEND AND INTERPRETATION
  # -----------------------------------------------------------------------

  doc <- doc %>%
    add_heading("Legend & Interpretation", level = 2)

  doc <- doc %>%
    add_heading("Phases", level = 3) %>%
    add_paragraph("Germination (0-6 weeks): Early growth after planting, variable emergence") %>%
    add_paragraph("Tillering (9-17 weeks): Shoot development, lateral growth") %>%
    add_paragraph("Grand Growth (17-35 weeks): Peak growth period, maximum biomass accumulation") %>%
    add_paragraph("Maturation (35+ weeks): Harvest preparation, sugar accumulation")

  doc <- doc %>%
    add_heading("Status Triggers", level = 3) %>%
    add_paragraph("Germination Started: 10% of field CI > 2.0") %>%
    add_paragraph("Germination Complete: 70% of field CI >= 2.0") %>%
    add_paragraph("Growth On Track: CI increasing > 0.2 per week") %>%
    add_paragraph("Stress Detected: CI declining > -1.5 with low uniformity") %>%
    add_paragraph("Strong Recovery: CI increasing > 1.5 per week") %>%
    add_paragraph("Maturation Progressing: Age 35-45 weeks with high CI (> 3.5)") %>%
    add_paragraph("Harvest Ready: Age 45+ weeks")

  doc <- doc %>%
    add_heading("Metrics", level = 3) %>%
    add_paragraph("Weekly CI Change: Change in mean CI value from previous week ± standard deviation") %>%
    add_paragraph("CV (Coefficient of Variation): Field uniformity (lower = more uniform)") %>%
    add_paragraph("CI Range: Minimum-Maximum CI values in field")

  # -----------------------------------------------------------------------
  # SAVE DOCUMENT
  # -----------------------------------------------------------------------

  print(doc, target = output_path)
  message(paste("✓ Report saved to:", output_path))

  return(output_path)
}

# ============================================================================
# MAIN
# ============================================================================

main <- function() {
  args <- commandArgs(trailingOnly = TRUE)

  project_dir <- if (length(args) >= 1 && !is.na(args[1])) {
    as.character(args[1])
  } else {
    "esa"
  }

  report_date <- if (length(args) >= 2 && !is.na(args[2])) {
    as.Date(args[2])
  } else {
    Sys.Date()
  }

  # Set project globally for parameters_project.R
  assign("project_dir", project_dir, envir = .GlobalEnv)

  # Load project configuration
  tryCatch({
    source(here("r_app", "parameters_project.R"))
  }, error = function(e) {
    stop("Error loading parameters_project.R: ", e$message)
  })

  # Check that reports_dir is defined
  if (!exists("reports_dir")) {
    stop("reports_dir must be defined in parameters_project.R")
  }

  # Set output path
  output_dir <- file.path(reports_dir, "kpis", "word_reports")
  if (!dir.exists(output_dir)) {
    dir.create(output_dir, recursive = TRUE)
  }

  current_week <- as.numeric(format(report_date, "%V"))
  output_filename <- paste0(project_dir, "_field_analysis_week",
                           sprintf("%02d", current_week), ".docx")
  output_path <- file.path(output_dir, output_filename)

  message(paste("Output:", output_path))
  message(paste("Reports dir:", reports_dir))

  # Generate report
  generate_word_report(project_dir, report_date, reports_dir, output_path)

  message("\n=== REPORT GENERATION COMPLETE ===\n")
  cat("Word report:", output_path, "\n")
}

if (sys.nframe() == 0) {
  main()
}