SmartCane/r_app/80_utils_agronomic_support.R

# 80_UTILS_AGRONOMIC_SUPPORT.R
# ============================================================================
# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support)
#
# Contains all 6 AURA KPI calculation functions and helpers:
#   - Field uniformity KPI (CV-based, spatial autocorrelation)
#   - Area change KPI (week-over-week CI changes)
#   - TCH forecasted KPI (tonnage projections from harvest data)
#   - Growth decline KPI (trend analysis)
#   - Weed presence KPI (field fragmentation detection)
#   - Gap filling KPI (interpolation quality)
#   - KPI reporting (summary tables, field details, text interpretation)
#   - KPI export (Excel, RDS, data export)
#
# Orchestrator: calculate_all_kpis()
# Dependencies: 00_common_utils.R (safe_log), sourced from common
# Used by: 80_calculate_kpis.R (when client_type == "agronomic_support")
# ============================================================================

library(terra)
library(sf)
library(dplyr)
library(tidyr)
library(readxl)
library(writexl)
library(spdep)
library(caret)
library(CAST)

# ============================================================================
# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R)
# ============================================================================
# The following helper functions have been moved to 80_utils_common.R:
#   - calculate_cv()
#   - calculate_change_percentages()
#   - calculate_spatial_autocorrelation()
#   - extract_ci_values()
#   - calculate_week_numbers()
#   - load_field_ci_raster()
#   - load_weekly_ci_mosaic()
#   - prepare_predictions()
#
# These are now sourced from common utils and shared by all client types.
# ============================================================================

#' Prepare harvest predictions and ensure proper alignment with field data
prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") {
  if (is.null(harvest_model) || is.null(field_data)) {
    return(NULL)
  }

  tryCatch({
    scenario_factor <- switch(scenario,
                              "pessimistic" = 0.85,
                              "realistic" = 1.0,
                              "optimistic" = 1.15,
                              1.0)

    predictions <- field_data %>%
      mutate(tch_forecasted = field_data$mean_ci * scenario_factor)

    return(predictions)
  }, error = function(e) {
    message(paste("Error preparing predictions:", e$message))
    return(NULL)
  })
}

# ============================================================================
# AURA KPI CALCULATION FUNCTIONS (6 KPIS)
# ============================================================================

#' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation
#'
#' Measures how uniform crop development is across the field.
#' Low CV + high positive Moran's I = excellent uniformity
#'
#' @param ci_pixels_by_field List of CI pixel arrays for each field
#' @param field_boundaries_sf SF object with field geometries
#' @param ci_band Raster band with CI values
#'
#' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation
calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) {
  result <- data.frame(
    field_idx = integer(),
    cv_value = numeric(),
    morans_i = numeric(),
    uniformity_score = numeric(),
    interpretation = character(),
    stringsAsFactors = FALSE
  )

  for (field_idx in seq_len(nrow(field_boundaries_sf))) {
    ci_pixels <- ci_pixels_by_field[[field_idx]]

    if (is.null(ci_pixels) || length(ci_pixels) == 0) {
      result <- rbind(result, data.frame(
        field_idx = field_idx,
        cv_value = NA_real_,
        morans_i = NA_real_,
        uniformity_score = NA_real_,
        interpretation = "No data",
        stringsAsFactors = FALSE
      ))
      next
    }

    cv_val <- calculate_cv(ci_pixels)

    morans_i <- NA_real_
    if (!is.null(ci_band)) {
      morans_result <- calculate_spatial_autocorrelation(ci_pixels, field_boundaries_sf[field_idx, ])
      if (is.list(morans_result)) {
        morans_i <- morans_result$morans_i
      } else {
        morans_i <- morans_result
      }
    }

    # Normalize CV (0-1 scale, invert so lower CV = higher score)
    cv_normalized <- min(cv_val / 0.3, 1)  # 0.3 = threshold for CV
    cv_score <- 1 - cv_normalized

    # Normalize Moran's I (-1 to 1 scale, shift to 0-1)
    morans_normalized <- if (!is.na(morans_i)) {
      (morans_i + 1) / 2
    } else {
      0.5
    }

    uniformity_score <- 0.7 * cv_score + 0.3 * morans_normalized

    # Interpretation
    if (is.na(cv_val)) {
      interpretation <- "No data"
    } else if (cv_val < 0.08) {
      interpretation <- "Excellent uniformity"
    } else if (cv_val < 0.15) {
      interpretation <- "Good uniformity"
    } else if (cv_val < 0.25) {
      interpretation <- "Acceptable uniformity"
    } else if (cv_val < 0.4) {
      interpretation <- "Poor uniformity"
    } else {
      interpretation <- "Very poor uniformity"
    }

    result <- rbind(result, data.frame(
      field_idx = field_idx,
      cv_value = cv_val,
      morans_i = morans_i,
      uniformity_score = round(uniformity_score, 3),
      interpretation = interpretation,
      stringsAsFactors = FALSE
    ))
  }

  return(result)
}

#' KPI 2: Calculate area change metric (week-over-week CI changes)
#'
#' Tracks the percentage change in CI between current and previous week
#'
#' @param current_stats Current week field statistics (from extract_field_statistics_from_ci)
#' @param previous_stats Previous week field statistics
#'
#' @return Data frame with field-level CI changes
calculate_area_change_kpi <- function(current_stats, previous_stats) {
  result <- calculate_change_percentages(current_stats, previous_stats)

  # Add interpretation
  result$interpretation <- NA_character_

  for (i in seq_len(nrow(result))) {
    change <- result$mean_ci_pct_change[i]

    if (is.na(change)) {
      result$interpretation[i] <- "No previous data"
    } else if (change > 15) {
      result$interpretation[i] <- "Rapid growth"
    } else if (change > 5) {
      result$interpretation[i] <- "Positive growth"
    } else if (change > -5) {
      result$interpretation[i] <- "Stable"
    } else if (change > -15) {
      result$interpretation[i] <- "Declining"
    } else {
      result$interpretation[i] <- "Rapid decline"
    }
  }

  return(result)
}

#' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare)
#'
#' Projects final harvest tonnage based on CI growth trajectory
#'
#' @param field_statistics Current field statistics
#' @param harvesting_data Historical harvest data (with yield observations)
#' @param field_boundaries_sf Field geometries
#'
#' @return Data frame with field-level TCH forecasts
calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) {
  result <- data.frame(
    field_idx = field_statistics$field_idx,
    mean_ci = field_statistics$mean_ci,
    tch_forecasted = NA_real_,
    tch_lower_bound = NA_real_,
    tch_upper_bound = NA_real_,
    confidence = NA_character_,
    stringsAsFactors = FALSE
  )

  # Base TCH model: TCH = 50 + (CI * 10)
  # This is a simplified model; production use should include more variables

  for (i in seq_len(nrow(result))) {
    if (is.na(result$mean_ci[i])) {
      result$confidence[i] <- "No data"
      next
    }

    ci_val <- result$mean_ci[i]

    # Simple linear model
    tch_est <- 50 + (ci_val * 10)

    # Confidence interval based on CI range
    tch_lower <- tch_est * 0.85
    tch_upper <- tch_est * 1.15

    result$tch_forecasted[i] <- round(tch_est, 2)
    result$tch_lower_bound[i] <- round(tch_lower, 2)
    result$tch_upper_bound[i] <- round(tch_upper, 2)
    result$confidence[i] <- "Medium"
  }

  return(result)
}

#' KPI 4: Calculate growth decline indicator
#'
#' Identifies fields with negative growth trajectory
#'
#' @param ci_values_list List of CI values for each field (multiple weeks)
#'
#' @return Data frame with field-level decline indicators
calculate_growth_decline_kpi <- function(ci_values_list) {
  result <- data.frame(
    field_idx = seq_len(length(ci_values_list)),
    four_week_trend = NA_real_,
    trend_interpretation = NA_character_,
    decline_severity = NA_character_,
    stringsAsFactors = FALSE
  )

  for (field_idx in seq_len(length(ci_values_list))) {
    ci_vals <- ci_values_list[[field_idx]]

    if (is.null(ci_vals) || length(ci_vals) < 2) {
      result$trend_interpretation[field_idx] <- "Insufficient data"
      next
    }

    ci_vals <- ci_vals[!is.na(ci_vals)]
    if (length(ci_vals) < 2) {
      result$trend_interpretation[field_idx] <- "Insufficient data"
      next
    }

    # Calculate linear trend
    weeks <- seq_along(ci_vals)
    lm_fit <- lm(ci_vals ~ weeks)
    slope <- coef(lm_fit)["weeks"]

    result$four_week_trend[field_idx] <- round(as.numeric(slope), 3)

    if (slope > 0.1) {
      result$trend_interpretation[field_idx] <- "Strong growth"
      result$decline_severity[field_idx] <- "None"
    } else if (slope > 0) {
      result$trend_interpretation[field_idx] <- "Weak growth"
      result$decline_severity[field_idx] <- "None"
    } else if (slope > -0.1) {
      result$trend_interpretation[field_idx] <- "Slight decline"
      result$decline_severity[field_idx] <- "Low"
    } else if (slope > -0.3) {
      result$trend_interpretation[field_idx] <- "Moderate decline"
      result$decline_severity[field_idx] <- "Medium"
    } else {
      result$trend_interpretation[field_idx] <- "Strong decline"
      result$decline_severity[field_idx] <- "High"
    }
  }

  return(result)
}

#' KPI 5: Calculate weed presence indicator
#'
#' Detects field fragmentation/patchiness (potential weed/pest pressure)
#'
#' @param ci_pixels_by_field List of CI pixel arrays for each field
#'
#' @return Data frame with fragmentation indicators
calculate_weed_presence_kpi <- function(ci_pixels_by_field) {
  result <- data.frame(
    field_idx = seq_len(length(ci_pixels_by_field)),
    cv_value = NA_real_,
    low_ci_percent = NA_real_,
    fragmentation_index = NA_real_,
    weed_pressure_risk = NA_character_,
    stringsAsFactors = FALSE
  )

  for (field_idx in seq_len(length(ci_pixels_by_field))) {
    ci_pixels <- ci_pixels_by_field[[field_idx]]

    if (is.null(ci_pixels) || length(ci_pixels) == 0) {
      result$weed_pressure_risk[field_idx] <- "No data"
      next
    }

    ci_pixels <- ci_pixels[!is.na(ci_pixels)]
    if (length(ci_pixels) == 0) {
      result$weed_pressure_risk[field_idx] <- "No data"
      next
    }

    cv_val <- calculate_cv(ci_pixels)
    low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100
    fragmentation <- cv_val * low_ci_pct / 100

    result$cv_value[field_idx] <- cv_val
    result$low_ci_percent[field_idx] <- round(low_ci_pct, 2)
    result$fragmentation_index[field_idx] <- round(fragmentation, 3)

    if (is.na(fragmentation)) {
      result$weed_pressure_risk[field_idx] <- "No data"
    } else if (fragmentation > 0.15) {
      result$weed_pressure_risk[field_idx] <- "High"
    } else if (fragmentation > 0.08) {
      result$weed_pressure_risk[field_idx] <- "Medium"
    } else if (fragmentation > 0.04) {
      result$weed_pressure_risk[field_idx] <- "Low"
    } else {
      result$weed_pressure_risk[field_idx] <- "Minimal"
    }
  }

  return(result)
}

#' KPI 6: Calculate gap filling quality (data interpolation success)
#'
#' Measures how well cloud/missing data was interpolated during growth model
#'
#' @param ci_rds_path Path to combined CI RDS file (before/after interpolation)
#'
#' @return Data frame with gap-filling quality metrics
calculate_gap_filling_kpi <- function(ci_rds_path) {
  # If ci_rds_path is NULL or not a valid path, return placeholder
  if (is.null(ci_rds_path) || !is.character(ci_rds_path) || length(ci_rds_path) == 0) {
    return(NULL)
  }

  # If ci_rds_path is a directory, find the cumulative CI file
  if (dir.exists(ci_rds_path)) {
    ci_files <- list.files(ci_rds_path, pattern = "^All_pivots.*\\.rds$", full.names = TRUE)
    if (length(ci_files) == 0) {
      return(NULL)
    }
    ci_rds_path <- ci_files[1]
  }

  if (!file.exists(ci_rds_path)) {
    return(NULL)
  }

  tryCatch({
    ci_data <- readRDS(ci_rds_path)

    # ci_data should be a wide matrix: fields × weeks
    # NA values = missing data before interpolation
    # (Gap filling is done during growth model stage)

    result <- data.frame(
      field_idx = seq_len(nrow(ci_data)),
      na_percent_pre_interpolation = NA_real_,
      na_percent_post_interpolation = NA_real_,
      gap_filling_success = NA_character_,
      stringsAsFactors = FALSE
    )

    for (field_idx in seq_len(nrow(ci_data))) {
      na_count <- sum(is.na(ci_data[field_idx, ]))
      na_pct <- na_count / ncol(ci_data) * 100

      if (na_pct == 0) {
        result$gap_filling_success[field_idx] <- "No gaps (100% data)"
      } else if (na_pct < 10) {
        result$gap_filling_success[field_idx] <- "Excellent"
      } else if (na_pct < 25) {
        result$gap_filling_success[field_idx] <- "Good"
      } else if (na_pct < 40) {
        result$gap_filling_success[field_idx] <- "Fair"
      } else {
        result$gap_filling_success[field_idx] <- "Poor"
      }

      result$na_percent_pre_interpolation[field_idx] <- round(na_pct, 2)
    }

    return(result)
  }, error = function(e) {
    message(paste("Error calculating gap filling KPI:", e$message))
    return(NULL)
  })
}

# ============================================================================
# KPI ORCHESTRATOR AND REPORTING
# ============================================================================

#' Create summary tables for all 6 KPIs
#'
#' @param all_kpis List containing results from all 6 KPI functions
#'
#' @return List of summary data frames ready for reporting
create_summary_tables <- function(all_kpis) {
  kpi_summary <- list(
    uniformity = all_kpis$uniformity %>%
      select(field_idx, cv_value, morans_i, uniformity_score, interpretation),

    area_change = all_kpis$area_change %>%
      select(field_idx, mean_ci_pct_change, interpretation),

    tch_forecast = all_kpis$tch_forecasted %>%
      select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence),

    growth_decline = all_kpis$growth_decline %>%
      select(field_idx, four_week_trend, trend_interpretation, decline_severity),

    weed_pressure = all_kpis$weed_presence %>%
      select(field_idx, fragmentation_index, weed_pressure_risk),

    gap_filling = if (!is.null(all_kpis$gap_filling)) {
      all_kpis$gap_filling %>%
        select(field_idx, na_percent_pre_interpolation, gap_filling_success)
    } else {
      NULL
    }
  )

  return(kpi_summary)
}

#' Create detailed field-by-field KPI report
#'
#' @param field_df Data frame with field identifiers and acreage
#' @param all_kpis List with all KPI results
#' @param field_boundaries_sf SF object with field boundaries
#'
#' @return Data frame with one row per field, all KPI columns
create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) {
  result <- field_df %>%
    left_join(
      all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation),
      by = c("field_idx")
    ) %>%
    left_join(
      all_kpis$area_change %>% select(field_idx, mean_ci_pct_change),
      by = c("field_idx")
    ) %>%
    left_join(
      all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted),
      by = c("field_idx")
    ) %>%
    left_join(
      all_kpis$growth_decline %>% select(field_idx, decline_severity),
      by = c("field_idx")
    ) %>%
    left_join(
      all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk),
      by = c("field_idx")
    )

  return(result)
}

#' Generate KPI text interpretation for inclusion in Word report
#'
#' @param all_kpis List with all KPI results
#'
#' @return Character string with formatted KPI summary text
create_field_kpi_text <- function(all_kpis) {
  text_parts <- c(
    "## AURA KPI ANALYSIS SUMMARY\n",
    "### Field Uniformity\n",
    paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n",
    "### Growth Trends\n",
    paste(all_kpis$growth_decline$trend_interpretation, collapse = "; "), "\n",
    "### Weed/Pest Pressure\n",
    paste(all_kpis$weed_presence$weed_pressure_risk, collapse = "; "), "\n"
  )

  return(paste(text_parts, collapse = ""))
}

#' Export detailed KPI data to Excel/RDS
#'
#' @param all_kpis List with all KPI results
#' @param kpi_summary List with summary tables
#' @param output_dir Directory for output files
#' @param week Week number
#' @param year Year
#'
#' @return List of output file paths
export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year) {
  # Ensure output directory exists
  if (!dir.exists(output_dir)) {
    dir.create(output_dir, recursive = TRUE)
  }

  # Export all KPI tables to a single Excel file
  excel_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".xlsx")

  sheets <- list(
    "Uniformity" = as.data.frame(kpi_summary$uniformity),
    "Area_Change" = as.data.frame(kpi_summary$area_change),
    "TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast),
    "Growth_Decline" = as.data.frame(kpi_summary$growth_decline),
    "Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure),
    "Gap_Filling" = as.data.frame(kpi_summary$gap_filling)
  )

  write_xlsx(sheets, excel_file)
  message(paste("✓ AURA KPI data exported to:", excel_file))

  # Also export to RDS for programmatic access
  rds_file <- paste0(output_dir, "/AURA_KPI_week_", sprintf("%02d_%d", week, year), ".rds")
  saveRDS(all_kpis, rds_file)
  message(paste("✓ AURA KPI RDS exported to:", rds_file))

  return(list(excel = excel_file, rds = rds_file))
}

# ============================================================================
# ORCHESTRATOR FUNCTION
# ============================================================================

#' Calculate all 6 AURA KPIs
#'
#' Main entry point for AURA KPI calculation.
#' This function orchestrates the 6 KPI calculations and returns all results.
#'
#' @param field_boundaries_sf SF object with field geometries
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year
#' @param current_mosaic_dir Directory containing current week's mosaic
#' @param previous_mosaic_dir Directory containing previous week's mosaic (optional)
#' @param ci_rds_path Path to combined CI RDS file
#' @param harvesting_data Data frame with harvest data (optional)
#' @param output_dir Directory for KPI exports
#'
#' @return List with results from all 6 KPI functions
#'
#' @details
#' This function:
#' 1. Loads current week mosaic and extracts field statistics
#' 2. (Optionally) loads previous week mosaic for comparison metrics
#' 3. Calculates all 6 AURA KPIs
#' 4. Creates summary tables
#' 5. Exports results to Excel/RDS
#'
calculate_all_kpis <- function(
    field_boundaries_sf,
    current_week,
    current_year,
    current_mosaic_dir,
    previous_mosaic_dir = NULL,
    ci_rds_path = NULL,
    harvesting_data = NULL,
    output_dir = NULL
) {

  message("\n============ AURA KPI CALCULATION (6 KPIs) ============")

  # Load current week mosaic
  message("Loading current week mosaic...")
  current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir)

  if (is.null(current_mosaic)) {
    stop("Could not load current week mosaic")
  }

  # Extract field statistics
  message("Extracting field statistics from current mosaic...")
  current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
  ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf)

  # Load previous week mosaic (if available)
  previous_stats <- NULL
  if (!is.null(previous_mosaic_dir)) {
    target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
    message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")..."))
    previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir)

    if (!is.null(previous_mosaic)) {
      previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
    } else {
      message("Previous week mosaic not available - skipping area change KPI")
    }
  }

  # Calculate 6 KPIs
  message("\nCalculating KPI 1: Field Uniformity...")
  uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic)

  message("Calculating KPI 2: Area Change...")
  if (!is.null(previous_stats)) {
    area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats)
  } else {
    area_change_kpi <- data.frame(
      field_idx = seq_len(nrow(field_boundaries_sf)),
      mean_ci_pct_change = NA_real_,
      interpretation = rep("No previous data", nrow(field_boundaries_sf))
    )
  }

  message("Calculating KPI 3: TCH Forecasted...")
  tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf)

  message("Calculating KPI 4: Growth Decline...")
  growth_decline_kpi <- calculate_growth_decline_kpi(
    list(ci_pixels_by_field)  # Would need historical data for real trend
  )

  message("Calculating KPI 5: Weed Presence...")
  weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field)

  message("Calculating KPI 6: Gap Filling...")
  gap_filling_kpi <- calculate_gap_filling_kpi(ci_rds_path)

  # Compile results
  all_kpis <- list(
    uniformity = uniformity_kpi,
    area_change = area_change_kpi,
    tch_forecasted = tch_kpi,
    growth_decline = growth_decline_kpi,
    weed_presence = weed_kpi,
    gap_filling = gap_filling_kpi
  )

  # Create summary tables
  kpi_summary <- create_summary_tables(all_kpis)

  # Export
  export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year)

  message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year, "\n"))

  return(all_kpis)
}