SmartCane/r_app/80_utils_agronomic_support.R

1185 lines
42 KiB
R

# 80_UTILS_AGRONOMIC_SUPPORT.R
# ============================================================================
# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support)
#
# Contains all 6 AURA KPI calculation functions and helpers:
# - Field uniformity KPI (CV-based, spatial autocorrelation)
# - Area change KPI (week-over-week CI changes)
# - TCH forecasted KPI (tonnage projections from harvest data)
# - Growth decline KPI (trend analysis)
# - Weed presence KPI (field fragmentation detection)
# - Gap filling KPI (interpolation quality)
# - KPI reporting (summary tables, field details, text interpretation)
# - KPI export (Excel, RDS, data export)
#
# Orchestrator: calculate_all_field_analysis_agronomic_support()
# Dependencies: 00_common_utils.R (safe_log), sourced from common
# Used by: 80_calculate_kpis.R (when client_type == "agronomic_support")
# ============================================================================
library(terra)
library(sf)
library(dplyr)
library(tidyr)
library(readxl)
library(writexl)
library(spdep)
# ============================================================================
# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R)
# ============================================================================
# The following helper functions have been moved to 80_utils_common.R:
# - calculate_cv()
# - calculate_change_percentages()
# - calculate_spatial_autocorrelation()
# - extract_ci_values()
# - calculate_week_numbers()
# - load_field_ci_raster()
# - load_weekly_ci_mosaic()
# - prepare_predictions()
#
# These are now sourced from common utils and shared by all client types.
# ============================================================================
# ============================================================================
# AURA KPI CALCULATION FUNCTIONS (6 KPIS)
# ============================================================================
#' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation
#'
#' Measures how uniform crop development is across the field.
#' Low CV + high positive Moran's I = excellent uniformity
#'
#' @param ci_pixels_by_field List of CI pixel arrays for each field
#' @param field_boundaries_sf SF object with field geometries
#' @param ci_band Raster band with CI values
#'
#' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation
calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL,
mosaic_dir = NULL, week_file = NULL) {
result <- data.frame(
field_idx = integer(),
cv_value = numeric(),
morans_i = numeric(),
uniformity_score = numeric(),
uniformity_category = character(),
interpretation = character(),
stringsAsFactors = FALSE
)
# Determine if we're using per-field structure
is_per_field <- !is.null(mosaic_dir) && !is.null(week_file)
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
ci_pixels <- ci_pixels_by_field[[field_idx]]
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
result <- rbind(result, data.frame(
field_idx = field_idx,
cv_value = NA_real_,
morans_i = NA_real_,
uniformity_score = NA_real_,
uniformity_category = "No data",
interpretation = "No data",
stringsAsFactors = FALSE
))
next
}
cv_val <- calculate_cv(ci_pixels)
# Calculate Moran's I
morans_i <- NA_real_
if (is_per_field) {
# Load individual field raster for per-field structure
field_name <- field_boundaries_sf$field[field_idx]
field_mosaic_path <- file.path(mosaic_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
tryCatch({
field_raster <- terra::rast(field_mosaic_path)[["CI"]]
single_field <- field_boundaries_sf[field_idx, ]
morans_result <- calculate_spatial_autocorrelation(field_raster, single_field)
if (is.list(morans_result)) {
morans_i <- morans_result$morans_i
} else {
morans_i <- morans_result
}
}, error = function(e) {
message(paste(" Warning: Spatial autocorrelation failed for field", field_name, ":", e$message))
})
}
} else if (!is.null(ci_band) && inherits(ci_band, "SpatRaster")) {
# Use single raster for single-file structure
tryCatch({
single_field <- field_boundaries_sf[field_idx, ]
morans_result <- calculate_spatial_autocorrelation(ci_band, single_field)
if (is.list(morans_result)) {
morans_i <- morans_result$morans_i
} else {
morans_i <- morans_result
}
}, error = function(e) {
message(paste(" Warning: Spatial autocorrelation failed for field", field_idx, ":", e$message))
})
}
# Normalize CV (0-1 scale, invert so lower CV = higher score)
cv_normalized <- min(cv_val / 0.3, 1)
cv_score <- 1 - cv_normalized
# Normalize Moran's I (-1 to 1 scale, shift to 0-1)
morans_normalized <- if (!is.na(morans_i)) {
(morans_i + 1) / 2
} else {
0.5
}
uniformity_score <- 0.7 * cv_score + 0.3 * morans_normalized
# Interpretation
if (is.na(cv_val)) {
interpretation <- "No data"
uniformity_category <- "No data"
} else if (cv_val < 0.08) {
interpretation <- "Excellent uniformity"
uniformity_category <- "Excellent"
} else if (cv_val < 0.15) {
interpretation <- "Good uniformity"
uniformity_category <- "Good"
} else if (cv_val < 0.25) {
interpretation <- "Acceptable uniformity"
uniformity_category <- "Acceptable"
} else if (cv_val < 0.4) {
interpretation <- "Poor uniformity"
uniformity_category <- "Poor"
} else {
interpretation <- "Very poor uniformity"
uniformity_category <- "Very poor"
}
result <- rbind(result, data.frame(
field_idx = field_idx,
cv_value = cv_val,
morans_i = morans_i,
uniformity_score = round(uniformity_score, 3),
uniformity_category = uniformity_category,
interpretation = interpretation,
stringsAsFactors = FALSE
))
}
return(result)
}
#' KPI 2: Calculate area change metric (week-over-week CI changes)
#'
#' Tracks the percentage change in CI between current and previous week
#'
#' @param current_stats Current week field statistics (from extract_field_statistics_from_ci)
#' @param previous_stats Previous week field statistics
#'
#' @return Data frame with field-level CI changes
calculate_area_change_kpi <- function(current_stats, previous_stats, field_boundaries_sf = NULL) {
# Initialize field index vector
field_idx_vec <- seq_len(nrow(current_stats))
if (!is.null(field_boundaries_sf) && "Field_id" %in% names(current_stats)) {
field_idx_vec <- match(current_stats$Field_id, field_boundaries_sf$field)
}
# Initialize result data frame
result <- data.frame(
field_idx = field_idx_vec,
mean_ci_abs_change = NA_real_,
interpretation = NA_character_,
stringsAsFactors = FALSE
)
# Handle case where previous stats is NULL or empty
if (is.null(previous_stats) || nrow(previous_stats) == 0) {
result$interpretation <- "No previous data"
return(result)
}
# Match fields between current and previous stats
# Handle both naming conventions (Field_id vs field_idx)
if ("Field_id" %in% names(current_stats)) {
current_field_col <- "Field_id"
prev_field_col <- "Field_id"
ci_col <- "Mean_CI"
} else {
current_field_col <- "field_idx"
prev_field_col <- "field_idx"
ci_col <- "mean_ci"
}
# Create lookup for previous stats
prev_lookup <- setNames(
previous_stats[[ci_col]],
previous_stats[[prev_field_col]]
)
# Calculate percentage change for each field
for (i in seq_len(nrow(current_stats))) {
current_field_id <- current_stats[[current_field_col]][i]
current_ci <- current_stats[[ci_col]][i]
# Find matching previous CI value
prev_ci <- prev_lookup[[as.character(current_field_id)]]
if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci)) {
# Calculate absolute change (CI units)
abs_change <- current_ci - prev_ci
result$mean_ci_abs_change[i] <- round(abs_change, 2)
# Add interpretation
if (abs_change > 0.5) {
result$interpretation[i] <- "Rapid growth"
} else if (abs_change > 0.2) {
result$interpretation[i] <- "Positive growth"
} else if (abs_change >= -0.2) {
result$interpretation[i] <- "Stable"
} else if (abs_change >= -0.5) {
result$interpretation[i] <- "Declining"
} else {
result$interpretation[i] <- "Rapid decline"
}
} else {
result$interpretation[i] <- "No previous data"
}
}
return(result)
}
#' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare)
#'
#' Projects final harvest tonnage based on historical yield data and CI growth trajectory.
#' Uses a Random Forest model trained on harvest data to predict yields for mature fields.
#' Delegates to calculate_yield_prediction_kpi() in 80_utils_common.R.
#'
#' @param field_statistics Current field statistics (dataframe with Mean_CI or mean_ci column)
#' @param harvesting_data Historical harvest data frame (with tonnage_ha column)
#' @param field_boundaries_sf SF object with field geometries
#' @param cumulative_CI_vals_dir Directory with combined CI RDS files (optional)
#' @param data_dir Project data directory (from setup_project_directories or parameters_project.R)
#' Used to build cumulative_CI_vals_dir path if not provided directly (optional)
#' @param project_dir Deprecated: only used if data_dir not provided (optional)
#'
#' @return Data frame with field-level yield forecasts ready for orchestrator
#' Columns: field_idx, tch_forecasted (yields in t/ha)
calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL,
field_boundaries_sf = NULL,
cumulative_CI_vals_dir = NULL,
data_dir = NULL,
project_dir = NULL) {
# Use common utils yield prediction function (handles all ML logic)
# This replaces the previous linear model (TCH = 50 + CI*10) with proper ML prediction
# Validate required parameters
if (is.null(field_boundaries_sf)) {
safe_log("field_boundaries_sf is NULL in calculate_tch_forecasted_kpi", "WARNING")
return(data.frame(
field_idx = integer(),
tch_forecasted = numeric(),
stringsAsFactors = FALSE
))
}
# Determine cumulative CI directory
if (is.null(cumulative_CI_vals_dir)) {
# Priority 1: Use provided data_dir parameter
if (!is.null(data_dir)) {
cumulative_CI_vals_dir <- file.path(data_dir, "extracted_ci", "cumulative_vals")
} else if (exists("data_dir", envir = .GlobalEnv)) {
# Priority 2: Fallback to global data_dir from parameters_project.R
cumulative_CI_vals_dir <- file.path(get("data_dir", envir = .GlobalEnv), "extracted_ci", "cumulative_vals")
} else {
# Priority 3: Last resort - log warning and fail gracefully
safe_log("Missing project data directory configuration: provide data_dir parameter or ensure parameters_project.R has set data_dir globally", "WARNING")
safe_log("No training data available for yield prediction", "WARNING")
return(data.frame(
field_idx = integer(),
tch_forecasted = numeric(),
stringsAsFactors = FALSE
))
}
}
# Call the shared yield prediction function from common utils
yield_result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir)
# Extract field-level results from the list
field_results <- yield_result$field_results
# Convert to format expected by orchestrator
# If no predictions, return empty data frame
if (is.null(field_results) || nrow(field_results) == 0) {
return(data.frame(
field_idx = integer(),
tch_forecasted = numeric(),
stringsAsFactors = FALSE
))
}
# Map field names to field_idx using field_boundaries_sf
result <- field_results %>%
mutate(
field_idx = match(field, field_boundaries_sf$field),
tch_forecasted = yield_forecast_t_ha
) %>%
filter(!is.na(field_idx)) %>%
select(field_idx, tch_forecasted)
# Ensure result has proper structure even if empty
if (nrow(result) == 0) {
return(data.frame(
field_idx = integer(),
tch_forecasted = numeric(),
stringsAsFactors = FALSE
))
}
return(result)
}
#' KPI 4: Calculate growth decline indicator
#'
#' Identifies fields with negative growth trajectory
#'
#' @param ci_values_list List of CI values for each field (multiple weeks)
#'
#' @return Data frame with field-level decline indicators
calculate_growth_decline_kpi <- function(ci_values_list) {
result <- data.frame(
field_idx = seq_len(length(ci_values_list)),
four_week_trend = NA_real_,
trend_interpretation = NA_character_,
decline_severity = NA_character_,
stringsAsFactors = FALSE
)
for (field_idx in seq_len(length(ci_values_list))) {
ci_vals <- ci_values_list[[field_idx]]
if (is.null(ci_vals) || length(ci_vals) < 2) {
result$trend_interpretation[field_idx] <- "Insufficient data"
next
}
ci_vals <- ci_vals[!is.na(ci_vals)]
if (length(ci_vals) < 2) {
result$trend_interpretation[field_idx] <- "Insufficient data"
next
}
# Calculate linear trend
weeks <- seq_along(ci_vals)
lm_fit <- lm(ci_vals ~ weeks)
slope <- coef(lm_fit)["weeks"]
result$four_week_trend[field_idx] <- round(as.numeric(slope), 3)
if (slope > 0.1) {
result$trend_interpretation[field_idx] <- "Strong growth"
result$decline_severity[field_idx] <- "None"
} else if (slope > 0) {
result$trend_interpretation[field_idx] <- "Weak growth"
result$decline_severity[field_idx] <- "None"
} else if (slope > -0.1) {
result$trend_interpretation[field_idx] <- "Slight decline"
result$decline_severity[field_idx] <- "Low"
} else if (slope > -0.3) {
result$trend_interpretation[field_idx] <- "Moderate decline"
result$decline_severity[field_idx] <- "Medium"
} else {
result$trend_interpretation[field_idx] <- "Strong decline"
result$decline_severity[field_idx] <- "High"
}
}
return(result)
}
#'
#' Combines two complementary metrics for comprehensive heterogeneity assessment:
#' - Gini Coefficient: Distribution inequality of CI values (0=uniform, 1=unequal)
#' - Moran's I: Spatial autocorrelation (-1 to +1, indicates clustering vs dispersal)
#'
#' @param ci_pixels_by_field List of CI pixel arrays for each field
#' @param field_boundaries_sf SF object with field geometries
#' @param mosaic_dir Directory path to per-field mosaic files (for Moran's I)
#' @param week_file Week file pattern (for Moran's I calculation)
#' @param mean_ci_values Optional vector of mean CI values per field
#'
#' @return Data frame with gini_coefficient, morans_i, patchiness_risk, patchiness_interpretation
calculate_patchiness_kpi <- function(ci_pixels_by_field, field_boundaries_sf = NULL,
mosaic_dir = NULL, week_file = NULL, mean_ci_values = NULL) {
n_fields <- length(ci_pixels_by_field)
result <- data.frame(
field_idx = seq_len(n_fields),
gini_coefficient = NA_real_,
morans_i = NA_real_,
patchiness_risk = NA_character_,
patchiness_interpretation = NA_character_,
stringsAsFactors = FALSE
)
# Determine if per-field structure available
is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) && !is.null(field_boundaries_sf)
for (i in seq_len(n_fields)) {
ci_pixels <- ci_pixels_by_field[[i]]
if (is.null(ci_pixels) || length(ci_pixels) == 0) {
result$patchiness_risk[i] <- "No data"
result$patchiness_interpretation[i] <- "No data"
next
}
ci_pixels <- ci_pixels[!is.na(ci_pixels)]
if (length(ci_pixels) == 0) {
result$patchiness_risk[i] <- "No data"
result$patchiness_interpretation[i] <- "No data"
next
}
# =========================================
# METRIC 1: Calculate Gini Coefficient
# =========================================
gini <- NA_real_
if (length(ci_pixels) > 1) {
ci_sorted <- sort(ci_pixels)
n <- length(ci_sorted)
numerator <- 2 * sum(seq_len(n) * ci_sorted)
denominator <- n * sum(ci_sorted)
gini <- (numerator / denominator) - (n + 1) / n
gini <- max(0, min(1, gini)) # Clamp to 0-1
}
result$gini_coefficient[i] <- gini
# =========================================
# METRIC 2: Calculate Moran's I (spatial clustering)
# =========================================
morans_i <- NA_real_
if (is_per_field) {
field_name <- field_boundaries_sf$field[i]
field_mosaic_path <- file.path(mosaic_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
tryCatch({
field_raster <- terra::rast(field_mosaic_path)[["CI"]]
single_field <- field_boundaries_sf[i, ]
morans_result <- calculate_spatial_autocorrelation(field_raster, single_field)
if (is.list(morans_result)) {
morans_i <- morans_result$morans_i
} else {
morans_i <- morans_result
}
}, error = function(e) {
safe_log(paste("Warning: Moran's I failed for field", field_name, ":", e$message), "WARNING")
})
}
}
result$morans_i[i] <- morans_i
# =========================================
# RISK DETERMINATION: Gini + Moran's I combination
# =========================================
# Logic:
# - High Gini (>0.3) + High Moran's I (>0.85) = High patchiness (localized clusters)
# - High Gini + Low Moran's I = Medium patchiness (scattered heterogeneity)
# - Low Gini (<0.15) = Minimal patchiness (uniform)
# - Moderate Gini = Low to Medium patchiness
if (is.na(gini)) {
result$patchiness_risk[i] <- "No data"
} else if (gini < 0.15) {
result$patchiness_risk[i] <- "Minimal"
} else if (gini < 0.30) {
# Low-to-moderate Gini
if (!is.na(morans_i) && morans_i > 0.85) {
result$patchiness_risk[i] <- "Medium" # Some clustering
} else {
result$patchiness_risk[i] <- "Low"
}
} else if (gini < 0.50) {
# High Gini
if (!is.na(morans_i) && morans_i > 0.85) {
result$patchiness_risk[i] <- "High" # Localized problem clusters
} else {
result$patchiness_risk[i] <- "Medium" # Scattered issues
}
} else {
# Very high Gini (>0.5)
result$patchiness_risk[i] <- "High"
}
# =========================================
# INTERPRETATION: Combined Gini + Moran's I narrative
# =========================================
result$patchiness_interpretation[i] <- dplyr::case_when(
is.na(gini) ~ "No data",
gini < 0.15 & (is.na(morans_i) | morans_i < 0.75) ~
"Excellent uniformity - minimal patchiness",
gini < 0.30 & (is.na(morans_i) | morans_i < 0.75) ~
"Good uniformity - low patchiness",
gini < 0.30 & !is.na(morans_i) & morans_i > 0.85 ~
"Moderate uniformity with localized clustering",
gini < 0.50 & (is.na(morans_i) | morans_i < 0.75) ~
"Poor uniformity - scattered heterogeneity",
gini < 0.50 & !is.na(morans_i) & morans_i > 0.85 ~
"Poor uniformity with clustered problem areas",
gini >= 0.50 ~
"Severe heterogeneity - requires field investigation",
TRUE ~ "Mixed heterogeneity"
)
}
return(result)
}
# ============================================================================
# KPI ORCHESTRATOR AND REPORTING
# ============================================================================
#' Create summary tables for all 6 KPIs
#'
#' @param all_kpis List containing results from all 6 KPI functions
#'
#' @return List of summary data frames ready for reporting
create_summary_tables <- function(all_kpis) {
kpi_summary <- list(
uniformity = all_kpis$uniformity %>%
select(field_idx, cv_value, uniformity_category, interpretation),
area_change = all_kpis$area_change %>%
select(field_idx, mean_ci_abs_change, interpretation),
tch_forecast = all_kpis$tch_forecasted %>%
select(field_idx, tch_forecasted),
growth_decline = all_kpis$growth_decline %>%
select(field_idx, four_week_trend, trend_interpretation, decline_severity),
patchiness = all_kpis$patchiness %>%
select(field_idx, gini_coefficient, morans_i, patchiness_interpretation, patchiness_risk),
gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
all_kpis$gap_filling %>%
select(field_idx, gap_score, gap_level)
} else {
NULL
}
)
return(kpi_summary)
}
#' Create detailed field-by-field KPI report (ALL KPIs in one row)
#'
#' @param field_boundaries_sf SF object with field boundaries
#' @param all_kpis List with all KPI results
#' @param current_week Current week number
#' @param current_year Current year
#'
#' @return Data frame with one row per field, all KPI columns
create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_week, current_year, current_stats = NULL) {
# Start with field identifiers AND field_idx for joining
result <- field_boundaries_sf %>%
sf::st_drop_geometry() %>%
mutate(
field_idx = row_number(),
Field_id = field,
Field_name = field,
Week = current_week,
Year = current_year
) %>%
select(field_idx, Field_id, Field_name, Week, Year)
# ============================================
# GROUP 0: MEAN CI (from field statistics)
# ============================================
if (!is.null(current_stats)) {
result <- result %>%
left_join(
current_stats %>%
select(Field_id, Mean_CI),
by = "Field_id"
)
} else {
result$Mean_CI <- NA_real_
}
# ============================================
# GROUP 1: FIELD UNIFORMITY (KPI 1)
# ============================================
result <- result %>%
left_join(
all_kpis$uniformity %>%
select(field_idx, CV = cv_value,
Uniformity_Category = uniformity_category,
Uniformity_Interpretation = interpretation),
by = "field_idx"
)
# ============================================
# GROUP 2: GROWTH & TREND ANALYSIS (KPI 2 + KPI 4)
# ============================================
# KPI 2: Area Change
result <- result %>%
left_join(
all_kpis$area_change %>%
select(field_idx, Weekly_CI_Change = mean_ci_abs_change,
Area_Change_Interpretation = interpretation),
by = "field_idx"
)
# KPI 4: Growth Decline
result <- result %>%
left_join(
all_kpis$growth_decline %>%
select(field_idx, Four_Week_Trend = four_week_trend,
Trend_Interpretation = trend_interpretation,
Decline_Severity = decline_severity),
by = "field_idx"
)
# ============================================
# GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5)
# ============================================
# KPI 5: Field Patchiness (Gini + Moran's I combination)
result <- result %>%
left_join(
all_kpis$patchiness %>%
select(field_idx, Gini_Coefficient = gini_coefficient,
Morans_I = morans_i,
Patchiness_Interpretation = patchiness_interpretation,
Patchiness_Risk = patchiness_risk),
by = "field_idx"
)
# ============================================
# GROUP 4: YIELD FORECAST (KPI 3)
# ============================================
result <- result %>%
left_join(
all_kpis$tch_forecasted %>%
select(field_idx, TCH_Forecasted = tch_forecasted),
by = "field_idx"
)
# ============================================
# GROUP 5: DATA QUALITY / GAP FILLING (KPI 6)
# ============================================
# Add gap filling if available
if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) {
result <- result %>%
left_join(
all_kpis$gap_filling %>%
select(field_idx, Gap_Score = gap_score, Gap_Level = gap_level),
by = "field_idx"
)
}
# Remove field_idx from final output
result <- result %>%
select(-field_idx)
# Round numeric columns
result <- result %>%
mutate(across(where(is.numeric), ~ round(., 2)))
return(result)
}
#' Generate KPI text interpretation for inclusion in Word report
#'
#' @param all_kpis List with all KPI results
#'
#' @return Character string with formatted KPI summary text
create_field_kpi_text <- function(all_kpis) {
text_parts <- c(
"## AURA KPI ANALYSIS SUMMARY\n",
"### Field Uniformity\n",
paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n",
"### Growth Trends\n",
paste(all_kpis$growth_decline$trend_interpretation, collapse = "; "), "\n",
"### Weed/Pest Pressure\n",
paste(all_kpis$weed_presence$weed_pressure_risk, collapse = "; "), "\n"
)
return(paste(text_parts, collapse = ""))
}
#' Export detailed KPI data to Excel/RDS
#'
#' @param field_detail_df Data frame with all KPI columns (one row per field)
#' @param kpi_summary List with summary tables (optional, for metadata)
#' @param output_dir Directory for output files
#' @param week Week number
#' @param year Year
#' @param project_dir Project name
#' @return List of output file paths
export_kpi_data <- function(field_detail_df, kpi_summary, output_dir, week, year, project_dir) {
# Use the common export function from 80_utils_common.R
export_paths <- export_field_analysis_excel(
field_df = field_detail_df,
summary_df = NULL, # No separate summary sheet for agronomic support
project_dir = project_dir,
current_week = week,
year = year,
reports_dir = output_dir
)
return(export_paths)
}
# ============================================================================
# ORCHESTRATOR FUNCTION
# ============================================================================
#' Calculate all 6 AURA KPIs
#'
#' Main entry point for AURA KPI calculation.
#' This function orchestrates the 6 KPI calculations and returns all results.
#'
#' @param field_boundaries_sf SF object with field geometries
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year
#' @param current_mosaic_dir Directory containing current week's mosaic
#' @param previous_mosaic_dir Directory containing previous week's mosaic (optional)
#' @param ci_rds_path Path to combined CI RDS file
#' @param harvesting_data Data frame with harvest data (optional)
#' @param output_dir Directory for KPI exports
#'
#' @return List with results from all 6 KPI functions
#'
#' @details
#' This function:
#' 1. Loads current week mosaic and extracts field statistics
#' 2. (Optionally) loads previous week mosaic for comparison metrics
#' 3. Calculates all 6 AURA KPIs
#' 4. Creates summary tables
#' 5. Exports results to Excel/RDS
#'
calculate_all_field_analysis_agronomic_support <- function(
field_boundaries_sf,
current_week,
current_year,
current_mosaic_dir,
previous_mosaic_dir = NULL,
ci_rds_path = NULL,
harvesting_data = NULL,
output_dir = NULL,
data_dir = NULL,
project_dir = NULL
) {
message("\n============ AURA KPI CALCULATION (6 KPIs) ============")
# DETECT STRUCTURE FIRST - before any use of is_per_field
week_file <- sprintf("week_%02d_%d.tif", current_week, current_year)
# Safely identify immediate child directories (not including root)
# Use list.files + dir.exists filter instead of list.dirs for robustness
all_entries <- list.files(current_mosaic_dir, full.names = FALSE)
field_dirs <- all_entries[sapply(
file.path(current_mosaic_dir, all_entries),
dir.exists
)]
is_per_field <- length(field_dirs) > 0 &&
file.exists(file.path(current_mosaic_dir, field_dirs[1], week_file))
if (is_per_field) {
message("Detected per-field mosaic structure...")
message("Using field-by-field extraction (similar to cane supply workflow)...")
# Use the same extraction method as cane supply
current_stats <- calculate_field_statistics(
field_boundaries_sf,
current_week,
current_year,
current_mosaic_dir,
report_date = Sys.Date()
)
# Extract CI pixels for each field from their individual mosaics
ci_pixels_by_field <- list()
for (i in seq_len(nrow(field_boundaries_sf))) {
field_name <- field_boundaries_sf$field[i]
field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
tryCatch({
field_raster <- terra::rast(field_mosaic_path)
ci_band <- field_raster[["CI"]]
field_vect <- terra::vect(field_boundaries_sf[i, ])
ci_pixels_by_field[[i]] <- extract_ci_values(ci_band, field_vect)
}, error = function(e) {
message(paste(" Warning: Could not extract CI for field", field_name, ":", e$message))
ci_pixels_by_field[[i]] <- NULL
})
} else {
ci_pixels_by_field[[i]] <- NULL
}
}
# For uniformity calculations that need a reference raster, load first available
current_mosaic <- NULL
for (field_name in field_dirs) {
field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
tryCatch({
current_mosaic <- terra::rast(field_mosaic_path)[["CI"]]
break
}, error = function(e) {
next
})
}
}
} else {
stop("ERROR: Per-field mosaic structure required (weekly_mosaic/{FIELD_NAME}/week_WW_YYYY.tif)")
}
# Load previous week mosaic (if available)
previous_stats <- NULL
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")..."))
# Try loading previous week from the same per-field directory structure
prev_week_file <- sprintf("week_%02d_%d.tif", target_prev$week, target_prev$year)
prev_field_exists <- any(sapply(field_dirs, function(field) {
file.exists(file.path(current_mosaic_dir, field, prev_week_file))
}))
if (prev_field_exists) {
message(" Found previous week per-field mosaics, calculating statistics...")
previous_stats <- calculate_field_statistics(
field_boundaries_sf,
target_prev$week,
target_prev$year,
current_mosaic_dir,
report_date = Sys.Date() - 7
)
} else {
message(" Previous week mosaic not available - skipping area change KPI")
}
# Calculate 6 KPIs
message("\nCalculating KPI 1: Field Uniformity...")
uniformity_kpi <- calculate_field_uniformity_kpi(
ci_pixels_by_field,
field_boundaries_sf,
ci_band = NULL,
mosaic_dir = current_mosaic_dir,
week_file = week_file
)
message("Calculating KPI 2: Area Change...")
if (!is.null(previous_stats)) {
area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats, field_boundaries_sf)
} else {
area_change_kpi <- data.frame(
field_idx = seq_len(nrow(field_boundaries_sf)),
mean_ci_abs_change = NA_real_,
interpretation = rep("No previous data", nrow(field_boundaries_sf))
)
}
message("Calculating KPI 3: TCH Forecasted...")
tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf,
data_dir = data_dir, project_dir = project_dir)
message("Calculating KPI 4: Growth Decline...")
# Load historical field statistics to build weekly mean CI time series per field
# (growth_decline_kpi expects temporal series, not spatial pixel arrays)
weekly_mean_ci_by_field <- list()
# Build list of weekly mean CI values for each field (4-week lookback)
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
weekly_ci_values <- c()
}
# Try to load historical data for trend calculation
if (!is.null(output_dir) && !is.null(project_dir)) {
tryCatch({
historical_data <- load_historical_field_data(
project_dir = project_dir,
current_week = current_week,
current_year = current_year,
reports_dir = output_dir,
num_weeks = 4,
auto_generate = FALSE,
field_boundaries_sf = field_boundaries_sf
)
if (!is.null(historical_data) && length(historical_data) > 0) {
message(" Building weekly mean CI time series from historical data...")
# Initialize list with empty vectors for each field
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
weekly_mean_ci_by_field[[field_idx]] <- c()
}
# Extract Mean_CI from each historical week (reverse order to go chronologically)
for (hist_idx in rev(seq_along(historical_data))) {
hist_week <- historical_data[[hist_idx]]
hist_data <- hist_week$data
# Extract Mean_CI column if available
if ("Mean_CI" %in% names(hist_data)) {
# Match fields between historical data and field_boundaries
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
field_name <- field_boundaries_sf$field[field_idx]
# Find matching row in historical data by field name/ID
field_row <- which(
(hist_data$Field_id == field_name | hist_data$Field_name == field_name) &
!is.na(hist_data$Mean_CI)
)
if (length(field_row) > 0) {
mean_ci_val <- as.numeric(hist_data$Mean_CI[field_row[1]])
if (!is.na(mean_ci_val)) {
weekly_mean_ci_by_field[[field_idx]] <- c(weekly_mean_ci_by_field[[field_idx]], mean_ci_val)
}
}
}
}
}
message(paste(" ✓ Loaded weekly Mean_CI for", sum(sapply(weekly_mean_ci_by_field, length) > 0), "fields"))
}
}, error = function(e) {
message(paste(" Note: Could not load historical field data for trend analysis:", e$message))
})
}
# If no historical data available, create empty vectors (will result in "Insufficient data")
if (length(weekly_mean_ci_by_field) == 0 || all(sapply(weekly_mean_ci_by_field, length) == 0)) {
message(" Warning: No historical weekly CI data available - attempting to load from RDS cache...")
# LOAD HISTORICAL WEEKS FOR TREND CALCULATION (4-week and 8-week analysis)
# Try to load previous weeks from cache (same as cane_supply workflow)
ci_values_4week_per_field <- list()
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
ci_values_4week <- numeric()
field_name <- field_boundaries_sf$field[field_idx]
# Load up to 4 previous weeks
for (lookback in 3:0) {
target_week <- current_week - lookback
target_year <- current_year
if (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
# Try to load from cached RDS files
if (!is.null(output_dir)) {
rds_filename <- sprintf("%s_field_analysis_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(output_dir, rds_filename)
# Normalize path safely (for cases where file may not exist yet)
rds_path_normalized <- tryCatch(
normalizePath(rds_path, winslash = "/"),
error = function(e) rds_path # Fall back to original path if normalizePath fails
)
message(paste(" Trying to load:", rds_path_normalized))
if (file.exists(rds_path_normalized)) {
tryCatch({
rds_content <- readRDS(rds_path_normalized)
# RDS files are saved as lists with 'field_analysis' key (not 'field_data')
if (is.list(rds_content) && "field_analysis" %in% names(rds_content)) {
cached_data <- rds_content$field_analysis
} else if (is.list(rds_content) && "field_data" %in% names(rds_content)) {
# Fallback for older RDS format
cached_data <- rds_content$field_data
} else if (is.data.frame(rds_content)) {
cached_data <- rds_content
} else {
cached_data <- NULL
}
if (!is.null(cached_data) && is.data.frame(cached_data) && "Mean_CI" %in% names(cached_data)) {
matching_row <- which(cached_data$Field_id == field_name | cached_data$Field_name == field_name)
if (length(matching_row) > 0 && !is.na(cached_data$Mean_CI[matching_row[1]])) {
ci_val <- as.numeric(cached_data$Mean_CI[matching_row[1]])
message(paste(" ✓ Loaded week", target_week, "CI =", ci_val))
ci_values_4week <- c(ci_values_4week, ci_val)
}
}
}, error = function(e) {
message(paste(" Error loading:", e$message))
})
}
}
}
# Add current week CI
if (!is.null(current_stats) && nrow(current_stats) > 0) {
matching_row <- which(
(current_stats$Field_id == field_name | current_stats$Field_name == field_name) &
!is.na(current_stats$Mean_CI)
)
if (length(matching_row) > 0) {
ci_val <- as.numeric(current_stats$Mean_CI[matching_row[1]])
message(paste(" ✓ Loaded current week CI =", ci_val))
ci_values_4week <- c(ci_values_4week, ci_val)
}
}
ci_values_4week_per_field[[field_idx]] <- ci_values_4week
}
# Use 4-week CI series for trend calculation (or current week if unavailable)
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
weekly_mean_ci_by_field[[field_idx]] <- ci_values_4week_per_field[[field_idx]]
if (length(weekly_mean_ci_by_field[[field_idx]]) == 0) {
weekly_mean_ci_by_field[[field_idx]] <- NA_real_
}
}
message(paste(" ✓ Loaded trend data for", sum(sapply(weekly_mean_ci_by_field, length) > 0), "fields"))
}
# Calculate growth decline using weekly time series (not spatial pixel arrays)
growth_decline_kpi <- calculate_growth_decline_kpi(weekly_mean_ci_by_field)
message("Calculating KPI 5: Field Patchiness...")
# Calculate patchiness using both Gini coefficient and Moran's I spatial clustering
patchiness_kpi <- calculate_patchiness_kpi(
ci_pixels_by_field,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = current_mosaic_dir,
week_file = week_file,
mean_ci_values = current_stats$Mean_CI
)
message("Calculating KPI 6: Gap Filling...")
# Build list of per-field files for this week
per_field_files <- c()
for (field_name in field_dirs) {
field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
per_field_files <- c(per_field_files, field_mosaic_path)
}
}
if (length(per_field_files) > 0) {
# Use the common wrapper function (same as cane supply)
gap_scores_result <- calculate_gap_scores(per_field_files, field_boundaries_sf)
# Guard against NULL or empty result from calculate_gap_scores
if (is.null(gap_scores_result) || nrow(gap_scores_result) == 0) {
message(" Warning: calculate_gap_scores returned NULL/empty - creating fallback")
gap_scores_result <- data.frame(
Field_id = field_boundaries_sf$field,
gap_score = NA_real_,
stringsAsFactors = FALSE
)
}
# Convert to the format expected by orchestrator
gap_filling_kpi <- gap_scores_result %>%
mutate(field_idx = match(Field_id, field_boundaries_sf$field)) %>%
select(field_idx, gap_score) %>%
mutate(
gap_level = dplyr::case_when(
gap_score < 10 ~ "Minimal",
gap_score < 25 ~ "Moderate",
TRUE ~ "Significant"
),
mean_ci = NA_real_,
outlier_threshold = NA_real_
)
} else {
# Fallback: no per-field files
gap_filling_kpi <- data.frame(
field_idx = seq_len(nrow(field_boundaries_sf)),
gap_score = NA_real_,
gap_level = NA_character_,
mean_ci = NA_real_,
outlier_threshold = NA_real_
)
}
# Compile results
all_kpis <- list(
uniformity = uniformity_kpi,
area_change = area_change_kpi,
tch_forecasted = tch_kpi,
growth_decline = growth_decline_kpi,
patchiness = patchiness_kpi,
gap_filling = gap_filling_kpi
)
# Deduplicate KPI dataframes to ensure one row per field_idx
# (sometimes joins or calculations can create duplicate rows)
message("Deduplicating KPI results (keeping first occurrence per field)...")
all_kpis$uniformity <- all_kpis$uniformity %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$area_change <- all_kpis$area_change %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$tch_forecasted <- all_kpis$tch_forecasted %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$growth_decline <- all_kpis$growth_decline %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$patchiness <- all_kpis$patchiness %>%
distinct(field_idx, .keep_all = TRUE)
all_kpis$gap_filling <- all_kpis$gap_filling %>%
distinct(field_idx, .keep_all = TRUE)
# Built single-sheet field detail table with all KPIs
message("\nBuilding comprehensive field detail table...")
field_detail_df <- create_field_detail_table(
field_boundaries_sf = field_boundaries_sf,
all_kpis = all_kpis,
current_week = current_week,
current_year = current_year,
current_stats = current_stats
)
# Create summary tables
message("\nCreating summary tables...")
kpi_summary <- create_summary_tables(all_kpis)
# Export
message("\nExporting KPI data (single-sheet format)...")
export_paths <- export_kpi_data(
field_detail_df = field_detail_df,
kpi_summary = kpi_summary,
output_dir = output_dir,
week = current_week,
year = current_year,
project_dir = project_dir
)
message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year))
return(list(
field_analysis_df = field_detail_df,
kpis = all_kpis,
summary_tables = kpi_summary,
export_paths = export_paths,
metadata = list(
week = current_week,
year = current_year,
project = project_dir
)
))
}