SmartCane/r_app/80_utils_cane_supply.R

# 80_UTILS_CANE_SUPPLY.R
# ============================================================================
# CANE SUPPLY CLIENT-SPECIFIC UTILITIES (SCRIPT 80 - CLIENT TYPE: cane_supply)
#
# Contains ANGATA and other cane supply-specific KPI and reporting functions.
#
# Currently, CANE_SUPPLY clients use the common utilities from 80_utils_common.R:
#   - Weekly statistics (calculate_field_statistics, calculate_kpi_trends)
#   - Field analysis reporting (generate_field_analysis_summary)
#   - Excel export (export_field_analysis_excel)
#
# This file is structured to accommodate future ANGATA-specific functionality such as:
#   - Custom yield models
#   - Harvest readiness criteria
#   - Supply chain integration hooks
#   - ANGATA-specific alerting and messaging
#
# Orchestrator: (Placeholder - uses common functions)
# Dependencies: 00_common_utils.R, 80_utils_common.R
# Used by: 80_calculate_kpis.R (when client_type == "cane_supply")
# ============================================================================

library(terra)
library(sf)
library(dplyr)
library(tidyr)
library(readxl)
library(writexl)

# ============================================================================
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
# ============================================================================

#' Calculate acreage for each field from geometry
#' @param field_boundaries_sf sf object with field geometries
#' @return data.frame with field and acreage columns
calculate_field_acreages <- function(field_boundaries_sf) {
  tryCatch({
    lookup_df <- field_boundaries_sf %>%
      sf::st_drop_geometry() %>%
      as.data.frame() %>%
      mutate(
        geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
          tryCatch({
            sf::st_is_valid(field_boundaries_sf[idx, ])
          }, error = function(e) FALSE)
        }),
        area_ha = 0
      )

    # Calculate area for valid geometries
    valid_indices <- which(lookup_df$geometry_valid)
    areas_ha <- vapply(valid_indices, function(idx) {
      tryCatch({
        area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
        area_m2 / 10000
      }, error = function(e) NA_real_)
    }, numeric(1))
    lookup_df$area_ha[valid_indices] <- areas_ha

    # Convert hectares to acres
    lookup_df %>%
      mutate(acreage = area_ha / 0.404686) %>%
      select(field, acreage)
  }, error = function(e) {
    message(paste("Warning: Could not calculate acreages from geometries -", e$message))
    data.frame(field = character(0), acreage = numeric(0))
  })
}

#' Calculate age in weeks from planting date
#'
#' @param planting_date Date of planting
#' @param reference_date Date to calculate age relative to (typically end_date)
#' @return Numeric age in weeks (rounded to nearest week)
calculate_age_week <- function(planting_date, reference_date) {
  if (is.na(planting_date)) {
    return(NA_real_)
  }
  round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0)
}

#' Assign crop phase based on age in weeks
#'
#' @param age_week Numeric age in weeks
#' @return Character phase name
calculate_phase <- function(age_week) {
  if (is.na(age_week)) return(NA_character_)
  if (age_week >= 0 & age_week < 4) return("Germination")
  if (age_week >= 4 & age_week < 17) return("Tillering")
  if (age_week >= 17 & age_week < 39) return("Grand Growth")
  if (age_week >= 39) return("Maturation")
  NA_character_
}

#' Bin percentage into 10% intervals with special handling for 90-100%
#'
#' @param pct Numeric percentage value (0-100)
#' @return Character bin label
bin_percentage <- function(pct) {
  if (is.na(pct)) return(NA_character_)
  if (pct >= 95) return("95-100%")
  else if (pct >= 90) return("90-95%")
  else if (pct >= 80) return("80-90%")
  else if (pct >= 70) return("70-80%")
  else if (pct >= 60) return("60-70%")
  else if (pct >= 50) return("50-60%")
  else if (pct >= 40) return("40-50%")
  else if (pct >= 30) return("30-40%")
  else if (pct >= 20) return("20-30%")
  else if (pct >= 10) return("10-20%")
  else return("0-10%")
}

#' Calculate germination progress from CI threshold percentage
#'
#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2
#' @return Character bin label
calculate_germination_progress <- function(pct_pixels_ci_gte_2) {
  bin_percentage(pct_pixels_ci_gte_2)
}

#' Categorize CV trend (long-term slope) into qualitative labels
#'
#' @param cv_slope Numeric slope from CV trend analysis
#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform"
categorize_cv_trend_long_term <- function(cv_slope) {
  if (is.na(cv_slope)) {
    return(NA_character_)
  } else if (cv_slope < -0.01) {
    return("More uniform")
  } else if (cv_slope > 0.01) {
    return("Less uniform")
  } else {
    return("Stable uniformity")
  }
}

#' Determine status alert based on harvest probability and crop health
#' Priority order:
#' 1. Ready for harvest-check (imminent + mature ≥12 months)
#' 2. Strong decline in crop health (drop ≥2 points but still >1.5)
#' 3. Harvested/bare (Mean CI < 1.5)
#' @param imminent_prob Numeric harvest probability
#' @param age_week Numeric age in weeks
#' @param weekly_ci_change Numeric weekly CI change
#' @param mean_ci Numeric mean CI value
#' @return Character status alert or NA
calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, mean_ci) {
  # Priority 1: Ready for harvest-check
  if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) {
    return("Ready for harvest-check")
  }

  # Priority 2: Strong decline
  if (!is.na(weekly_ci_change) && weekly_ci_change <= -2.0 && !is.na(mean_ci) && mean_ci > 1.5) {
    return("Strong decline in crop health")
  }

  # Priority 3: Harvested/bare
  if (!is.na(mean_ci) && mean_ci < 1.5) {
    return("Harvested/bare")
  }

  # Fallback: no alert
  NA_character_
}


#' Build complete per-field KPI dataframe with all 22 columns
#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats
#' @param planting_dates data.frame with field_id and planting_date columns
#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL)
#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL)
#' @param field_boundaries_sf sf object with field geometries
#' @param end_date Date object for current report date
#' @return data.frame with all 22 KPI columns
calculate_all_field_kpis <- function(current_stats,
                                      planting_dates,
                                      imminent_prob_data,
                                      gap_scores_df,
                                      field_boundaries_sf,
                                      end_date) {

  message("\nBuilding final field analysis output...")

  # Pre-calculate acreages
  acreage_lookup <- calculate_field_acreages(field_boundaries_sf)

  field_analysis_df <- current_stats %>%
    mutate(
      # Column 2: Farm_Section (user fills manually)
      Farm_Section = NA_character_,

      # Column 3: Field_name (from GeoJSON)
      Field_name = Field_id,

      # Column 4: Acreage (from geometry)
      Acreage = {
        acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
        if_else(is.na(acreages_vec), 0, acreages_vec)
      },

      # Column 8: Last_harvest_or_planting_date (from harvest.xlsx)
      Last_harvest_or_planting_date = {
        planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
      },

      # Column 9: Age_week (calculated)
      Age_week = {
        sapply(seq_len(nrow(current_stats)), function(idx) {
          calculate_age_week(Last_harvest_or_planting_date[idx], end_date)
        })
      },

      # Column 10: Phase (based on Age_week)
      Phase = sapply(Age_week, calculate_phase),

      # Column 12: Germination_progress (binned Pct_pixels_CI_gte_2)
      Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress),

      # Column 13: Imminent_prob (from script 31 or NA)
      Imminent_prob = {
        if (!is.null(imminent_prob_data)) {
          imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
        } else {
          rep(NA_real_, nrow(current_stats))
        }
      },

      # Column 14: Status_Alert (multi-priority logic)
      Status_Alert = {
        sapply(seq_len(nrow(current_stats)), function(idx) {
          calculate_status_alert(
            Imminent_prob[idx],
            Age_week[idx],
            Weekly_ci_change[idx],
            Mean_CI[idx]
          )
        })
      },

      # Column 19b: CV_Trend_Long_Term_Category (categorical slope)
      CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term),

      # Column 21: Cloud_pct_clear (binned into intervals)
      Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage),

      # Column 22: Gap_score (2σ method)
      Gap_score = {
        if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
          gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
        } else {
          rep(NA_real_, nrow(current_stats))
        }
      }
    ) %>%
    select(
      all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
               "Last_harvest_or_planting_date", "Age_week", "Phase",
               "Germination_progress",
               "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
               "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
               "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
    )

  message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns"))

  return(field_analysis_df)
}

#' Aggregate per-field data into farm-level KPI summary
#'
#' @param field_analysis_df data.frame with per-field KPI data
#' @param current_week Numeric current week number
#' @param current_year Numeric current year
#' @param end_date Date object for current report date
#' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats
calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) {

  cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")

  # Filter to only fields with actual data
  field_data <- field_analysis_df %>%
    filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
    filter(Acreage > 0)

  if (nrow(field_data) == 0) {
    message("No valid field data for farm-level aggregation")
    return(NULL)
  }

  farm_summary <- list()

  # 1. PHASE DISTRIBUTION
  phase_dist <- field_data %>%
    group_by(Phase) %>%
    summarise(
      num_fields = n(),
      acreage = sum(Acreage, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    rename(Category = Phase)

  farm_summary$phase_distribution <- phase_dist

  # 2. STATUS ALERT DISTRIBUTION
  status_dist <- field_data %>%
    group_by(Status_Alert) %>%
    summarise(
      num_fields = n(),
      acreage = sum(Acreage, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    rename(Category = Status_Alert)

  farm_summary$status_distribution <- status_dist

  # 3. CLOUD COVERAGE DISTRIBUTION
  cloud_dist <- field_data %>%
    group_by(Cloud_category) %>%
    summarise(
      num_fields = n(),
      acreage = sum(Acreage, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    rename(Category = Cloud_category)

  farm_summary$cloud_distribution <- cloud_dist

  # 4. OVERALL STATISTICS
  farm_summary$overall_stats <- data.frame(
    total_fields = nrow(field_data),
    total_acreage = sum(field_data$Acreage, na.rm = TRUE),
    mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
    median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
    mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
    week = current_week,
    year = current_year,
    date = as.character(end_date)
  )

  # Print summaries
  cat("\n--- PHASE DISTRIBUTION ---\n")
  print(phase_dist)

  cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
  print(status_dist)

  cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
  print(cloud_dist)

  cat("\n--- OVERALL FARM STATISTICS ---\n")
  print(farm_summary$overall_stats)

  return(farm_summary)
}


# ============================================================================
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
# ============================================================================

#' Main orchestrator for CANE_SUPPLY per-field KPI workflow
#'
#' This function coordinates all KPI calculations for the per-field analysis workflow.
#' It loads historical data, calculates current/previous week statistics, computes
#' all 22 KPI columns, and aggregates farm-level summaries.
#'
#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.)
#' @param client_config List with workflow configuration (script_91_compatible, outputs)
#' @param end_date Date object for current report date
#' @param project_dir Character project identifier
#' @param weekly_mosaic Character path to weekly mosaic directory
#' @param daily_vals_dir Character path to daily values directory
#' @param field_boundaries_sf sf object with field geometries
#' @param data_dir Character path to data directory
#' @return List with field_analysis_df, farm_kpi_results, export_paths
calculate_field_analysis_cane_supply <- function(setup,
                                                 client_config,
                                                 end_date,
                                                 project_dir,
                                                 weekly_mosaic,
                                                 daily_vals_dir,
                                                 field_boundaries_sf,
                                                 data_dir) {

  message("\n", strrep("=", 70))
  message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
  message(strrep("=", 70))

  reports_dir <- file.path(setup$reports_dir, "kpis")

  # ========== PHASE 1: WEEKLY ANALYSIS SETUP ==========
  message("\n", strrep("-", 70))
  message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
  message(strrep("-", 70))

  weeks <- calculate_week_numbers(end_date)
  current_week <- weeks$current_week
  current_year <- weeks$current_year
  previous_week <- weeks$previous_week
  previous_year <- weeks$previous_year

  message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))

  # Find per-field weekly mosaics
  message("Finding per-field weekly mosaics...")

  if (!dir.exists(weekly_mosaic)) {
    stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
               "\nScript 40 (mosaic creation) must be run first."))
  }

  field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
  field_dirs <- field_dirs[field_dirs != ""]

  if (length(field_dirs) == 0) {
    stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
               "\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
  }

  # Verify we have mosaics for this week
  single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
  per_field_files <- c()
  for (field in field_dirs) {
    field_mosaic_dir <- file.path(weekly_mosaic, field)
    files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
    if (length(files) > 0) {
      per_field_files <- c(per_field_files, files)
    }
  }

  if (length(per_field_files) == 0) {
    stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
               "\nExpected pattern:", single_file_pattern,
               "\nChecked:", weekly_mosaic))
  }

  message(paste("  ✓ Found", length(per_field_files), "per-field weekly mosaics"))

  # ========== PHASE 2: LOAD HISTORICAL DATA ==========
  message("\nLoading historical field data for trend calculations...")
  num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
  message(paste("  Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))

  allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)

  historical_data <- load_historical_field_data(
    project_dir, current_week, current_year, reports_dir,
    num_weeks = num_weeks_to_load,
    auto_generate = allow_auto_gen,
    field_boundaries_sf = field_boundaries_sf,
    daily_vals_dir = daily_vals_dir
  )

  # ========== PHASE 3: LOAD PLANTING DATES ==========
  message("\nLoading harvest data from harvest.xlsx for planting dates...")
  harvest_file_path <- file.path(data_dir, "harvest.xlsx")

  harvesting_data <- tryCatch({
    if (file.exists(harvest_file_path)) {
      harvest_raw <- readxl::read_excel(harvest_file_path)
      harvest_raw$season_start <- as.Date(harvest_raw$season_start)
      harvest_raw$season_end <- as.Date(harvest_raw$season_end)
      message(paste("  ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
      harvest_raw
    } else {
      message(paste("  WARNING: harvest.xlsx not found at", harvest_file_path))
      NULL
    }
  }, error = function(e) {
    message(paste("  ERROR loading harvest.xlsx:", e$message))
    NULL
  })

  planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)

  if (is.null(planting_dates) || nrow(planting_dates) == 0) {
    message("WARNING: No planting dates available. Using NA for all fields.")
    planting_dates <- data.frame(
      field_id = field_boundaries_sf$field,
      planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
      stringsAsFactors = FALSE
    )
  }

  # ========== PHASE 4: CALCULATE WEEKLY STATISTICS ==========
  message("\nUsing modular RDS-based approach for weekly statistics...")

  # Current week
  message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
  current_stats <- load_or_calculate_weekly_stats(
    week_num = current_week,
    year = current_year,
    project_dir = project_dir,
    field_boundaries_sf = field_boundaries_sf,
    mosaic_dir = weekly_mosaic,
    reports_dir = reports_dir,
    report_date = end_date
  )
  message(paste("  ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))

  # Previous week
  message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
  prev_report_date <- end_date - 7

  prev_stats <- load_or_calculate_weekly_stats(
    week_num = previous_week,
    year = previous_year,
    project_dir = project_dir,
    field_boundaries_sf = field_boundaries_sf,
    mosaic_dir = weekly_mosaic,
    reports_dir = reports_dir,
    report_date = prev_report_date
  )
  message(paste("  ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))

  # ========== PHASE 5: CALCULATE TRENDS ==========
  message("\n3. Calculating trend columns...")
  current_stats <- calculate_kpi_trends(
    current_stats, prev_stats,
    project_dir = project_dir,
    reports_dir = reports_dir,
    current_week = current_week,
    year = current_year
  )
  message(paste("  ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))

  # ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
  message("\n4. Loading harvest probabilities from script 31...")
  harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
  harvest_prob_file <- file.path(harvest_prob_dir,
                                  sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
  message(paste("  Looking for:", harvest_prob_file))

  imminent_prob_data <- tryCatch({
    if (file.exists(harvest_prob_file)) {
      prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
                                 col_types = readr::cols(.default = readr::col_character()))
      message(paste("  ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
      prob_df %>%
        select(field, imminent_prob, detected_prob) %>%
        rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
    } else {
      message(paste("  INFO: Harvest probabilities not available (script 31 not run)"))
      NULL
    }
  }, error = function(e) {
    message(paste("  WARNING: Could not load harvest probabilities:", e$message))
    NULL
  })

  # ========== PHASE 7: CALCULATE GAP SCORES ==========
  gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf)

  # ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ==========
  field_analysis_df <- calculate_all_field_kpis(
    current_stats = current_stats,
    planting_dates = planting_dates,
    imminent_prob_data = imminent_prob_data,
    gap_scores_df = gap_scores_df,
    field_boundaries_sf = field_boundaries_sf,
    end_date = end_date
  )

  # ========== PHASE 9: EXPORT PER-FIELD RESULTS ==========
  export_paths <- export_field_analysis_excel(
    field_analysis_df,
    NULL,
    project_dir,
    current_week,
    current_year,
    reports_dir
  )

  # cat("\n--- Per-field Results (first 10) ---\n")
  # available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
  # available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
  # if (length(available_cols) > 0) {
  #   print(head(field_analysis_df[, available_cols], 10))
  # }

  # # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ==========
  # farm_kpi_results <- calculate_farm_level_kpis(
  #   field_analysis_df,
  #   current_week,
  #   current_year,
  #   end_date
  # )
}

# ============================================================================
# FUTURE EXTENSION POINTS
# ============================================================================

# Placeholder for ANGATA-specific utilities that may be added in future:
# - Custom yield models based on ANGATA historical data
# - Field condition thresholds specific to ANGATA growing practices
# - Integration with ANGATA harvest scheduling system
# - WhatsApp messaging templates for ANGATA supply chain stakeholders
# - Cost/benefit analysis for ANGATA operational decisions

# These functions can be added here as ANGATA requirements evolve.