1418 lines
56 KiB
R
1418 lines
56 KiB
R
# 80_KPI_UTILS.R
|
|
# ===============
|
|
# Consolidated KPI calculation utilities for Script 80.
|
|
# Contains all 6 farm-level KPIs for SmartCane analysis.
|
|
#
|
|
# Includes helper functions from crop_messaging_utils.R:
|
|
# - safe_log()
|
|
# - calculate_cv()
|
|
# - calculate_spatial_autocorrelation()
|
|
# - calculate_change_percentages()
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS FROM CROP_MESSAGING_UTILS.R
|
|
# ============================================================================
|
|
|
|
# Analysis configuration - Thresholds for clustering analysis
|
|
MORAN_THRESHOLD_HIGH <- 0.95 # Above this = very strong clustering (problematic patterns)
|
|
MORAN_THRESHOLD_MODERATE <- 0.85 # Above this = moderate clustering
|
|
MORAN_THRESHOLD_LOW <- 0.7 # Above this = normal field continuity
|
|
|
|
#' Logging utility for consistent message handling
|
|
#' @param message The message to log
|
|
#' @param level The log level (default: "INFO")
|
|
#' @return NULL (used for side effects)
|
|
safe_log <- function(message, level = "INFO") {
|
|
if (exists("log_message")) {
|
|
log_message(message, level)
|
|
} else {
|
|
if (level %in% c("ERROR", "WARNING")) {
|
|
warning(message)
|
|
} else {
|
|
message(message)
|
|
}
|
|
}
|
|
}
|
|
|
|
#' Calculate coefficient of variation for uniformity assessment
|
|
#' @param values Numeric vector of CI values
|
|
#' @return Coefficient of variation (CV) as decimal
|
|
calculate_cv <- function(values) {
|
|
values <- values[!is.na(values) & is.finite(values)]
|
|
if (length(values) < 2) return(NA)
|
|
cv <- sd(values) / mean(values) # Keep as decimal
|
|
return(cv)
|
|
}
|
|
|
|
#' Calculate percentage of field with positive vs negative change
|
|
#' @param current_values Current week CI values
|
|
#' @param previous_values Previous week CI values
|
|
#' @return List with percentage of positive and negative change areas
|
|
calculate_change_percentages <- function(current_values, previous_values) {
|
|
# Ensure same length (should be from same field boundaries)
|
|
if (length(current_values) != length(previous_values)) {
|
|
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
|
|
}
|
|
|
|
# Calculate pixel-wise change
|
|
change_values <- current_values - previous_values
|
|
valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)]
|
|
|
|
if (length(valid_changes) < 2) {
|
|
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
|
|
}
|
|
|
|
# Count positive, negative, and stable areas
|
|
positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100
|
|
negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100
|
|
stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100
|
|
|
|
return(list(
|
|
positive_pct = positive_pct,
|
|
negative_pct = negative_pct,
|
|
stable_pct = stable_pct
|
|
))
|
|
}
|
|
|
|
#' Calculate spatial autocorrelation (Moran's I) for a field
|
|
#' @param ci_raster Terra raster of CI values
|
|
#' @param field_boundary Terra vector of field boundary
|
|
#' @return List with Moran's I statistic and p-value
|
|
calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) {
|
|
|
|
tryCatch({
|
|
# Crop and mask raster to field boundary
|
|
field_raster <- terra::crop(ci_raster, field_boundary)
|
|
field_raster <- terra::mask(field_raster, field_boundary)
|
|
|
|
# Convert to points for spatial analysis
|
|
raster_points <- terra::as.points(field_raster, na.rm = TRUE)
|
|
|
|
# Check if we have enough points
|
|
if (length(raster_points) < 10) {
|
|
return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data"))
|
|
}
|
|
|
|
# Convert to sf for spdep
|
|
points_sf <- sf::st_as_sf(raster_points)
|
|
|
|
# Create spatial weights matrix (k-nearest neighbors)
|
|
coords <- sf::st_coordinates(points_sf)
|
|
|
|
# Use adaptive number of neighbors based on sample size
|
|
k_neighbors <- min(8, max(4, floor(nrow(coords) / 10)))
|
|
|
|
knn_nb <- spdep::knearneigh(coords, k = k_neighbors)
|
|
knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE)
|
|
|
|
# Calculate Moran's I
|
|
ci_values <- points_sf[[1]] # First column contains CI values
|
|
moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE)
|
|
|
|
# Interpret results
|
|
morans_i <- moran_result$estimate[1]
|
|
p_value <- moran_result$p.value
|
|
|
|
interpretation <- if (is.na(morans_i)) {
|
|
"insufficient_data"
|
|
} else if (p_value > 0.05) {
|
|
"random" # Not significant spatial pattern
|
|
} else if (morans_i > MORAN_THRESHOLD_HIGH) {
|
|
"very_strong_clustering" # Very strong clustering - may indicate management issues
|
|
} else if (morans_i > MORAN_THRESHOLD_MODERATE) {
|
|
"strong_clustering" # Strong clustering - worth monitoring
|
|
} else if (morans_i > MORAN_THRESHOLD_LOW) {
|
|
"normal_continuity" # Normal field continuity - expected for uniform fields
|
|
} else if (morans_i > 0.3) {
|
|
"weak_clustering" # Some clustering present
|
|
} else if (morans_i < -0.3) {
|
|
"dispersed" # Checkerboard pattern
|
|
} else {
|
|
"low_autocorrelation" # Low spatial autocorrelation
|
|
}
|
|
|
|
return(list(
|
|
morans_i = morans_i,
|
|
p_value = p_value,
|
|
interpretation = interpretation
|
|
))
|
|
|
|
}, error = function(e) {
|
|
warning(paste("Error calculating spatial autocorrelation:", e$message))
|
|
return(list(morans_i = NA, p_value = NA, interpretation = "error"))
|
|
})
|
|
}
|
|
|
|
# ============================================================================
|
|
# KPI-SPECIFIC HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
# 1. Helper Functions
|
|
# -----------------
|
|
|
|
#' Extract CI band only from a multi-band raster
|
|
#' @param ci_raster CI raster (can be multi-band with Red, Green, Blue, NIR, CI)
|
|
#' @param field_vect Field boundary as SpatVector
|
|
#' @return Vector of CI values
|
|
extract_ci_values <- function(ci_raster, field_vect) {
|
|
extracted <- terra::extract(ci_raster, field_vect, fun = NULL)
|
|
|
|
# Check if CI column exists (multi-band mosaic)
|
|
if ("CI" %in% names(extracted)) {
|
|
return(extracted[, "CI"])
|
|
} else if (ncol(extracted) > 1) {
|
|
# Fallback: assume last column is CI (after ID, Red, Green, Blue, NIR)
|
|
return(extracted[, ncol(extracted)])
|
|
} else {
|
|
# Single band raster - return as is
|
|
return(extracted[, 1])
|
|
}
|
|
}
|
|
|
|
#' Calculate current and previous week numbers using ISO 8601 week numbering
|
|
#' @param report_date Date to calculate weeks for (default: today)
|
|
#' @return List with current_week, previous_week, year (current), and previous_year (for year boundary handling)
|
|
calculate_week_numbers <- function(report_date = Sys.Date()) {
|
|
# Use ISO 8601 week numbering (%V) - weeks start on Monday
|
|
current_week <- as.numeric(format(report_date, "%V"))
|
|
current_year <- as.numeric(format(report_date, "%G")) # Use ISO week year (%G)
|
|
|
|
previous_week <- current_week - 1
|
|
previous_year <- current_year
|
|
|
|
# Handle year boundary: if previous_week < 1, wrap to last week of previous year
|
|
if (previous_week < 1) {
|
|
previous_week <- 52
|
|
previous_year <- current_year - 1 # Go back to previous year
|
|
}
|
|
|
|
return(list(
|
|
current_week = current_week,
|
|
previous_week = previous_week,
|
|
year = current_year,
|
|
previous_year = previous_year
|
|
))
|
|
}
|
|
|
|
#' Load weekly mosaic CI data
|
|
#' @param week_num Week number
|
|
#' @param year Year
|
|
#' @param mosaic_dir Directory containing weekly mosaics
|
|
#' @return Terra raster with CI band, or NULL if file not found
|
|
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
|
|
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
|
|
week_path <- file.path(mosaic_dir, week_file)
|
|
|
|
if (!file.exists(week_path)) {
|
|
safe_log(paste("Weekly mosaic not found:", week_path), "WARNING")
|
|
return(NULL)
|
|
}
|
|
|
|
tryCatch({
|
|
mosaic_raster <- terra::rast(week_path)
|
|
ci_raster <- mosaic_raster[[5]] # CI is the 5th band
|
|
names(ci_raster) <- "CI"
|
|
safe_log(paste("Loaded weekly mosaic:", week_file))
|
|
return(ci_raster)
|
|
}, error = function(e) {
|
|
safe_log(paste("Error loading mosaic:", e$message), "ERROR")
|
|
return(NULL)
|
|
})
|
|
}
|
|
|
|
# Function to prepare predictions with consistent naming and formatting
|
|
prepare_predictions <- function(predictions, newdata) {
|
|
return(predictions %>%
|
|
as.data.frame() %>%
|
|
dplyr::rename(predicted_Tcha = ".") %>%
|
|
dplyr::mutate(
|
|
sub_field = newdata$sub_field,
|
|
field = newdata$field,
|
|
Age_days = newdata$DOY,
|
|
total_CI = round(newdata$cumulative_CI, 0),
|
|
predicted_Tcha = round(predicted_Tcha, 0),
|
|
season = newdata$season
|
|
) %>%
|
|
dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>%
|
|
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
|
|
)
|
|
}
|
|
|
|
# 2. KPI Calculation Functions
|
|
# ---------------------------
|
|
|
|
#' Calculate Field Uniformity Summary KPI
|
|
#' @param ci_raster Current week CI raster
|
|
#' @param field_boundaries Field boundaries
|
|
#' @return List with summary data frame and field-level results data frame
|
|
calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) {
|
|
safe_log("Calculating Field Uniformity Summary KPI")
|
|
|
|
# Handle both sf and SpatVector inputs
|
|
if (!inherits(field_boundaries, "SpatVector")) {
|
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
|
} else {
|
|
field_boundaries_vect <- field_boundaries
|
|
}
|
|
|
|
field_results <- data.frame()
|
|
|
|
for (i in seq_len(nrow(field_boundaries))) {
|
|
field_name <- field_boundaries$field[i]
|
|
sub_field_name <- field_boundaries$sub_field[i]
|
|
field_id <- paste0(field_name, "_", sub_field_name)
|
|
|
|
# Extract field boundary
|
|
field_vect <- field_boundaries_vect[i]
|
|
|
|
# crop ci_raster with field_vect and use that for ci_values
|
|
cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE)
|
|
|
|
# Extract CI values for this field using helper function
|
|
field_values <- extract_ci_values(cropped_raster, field_vect)
|
|
valid_values <- field_values[!is.na(field_values) & is.finite(field_values)]
|
|
|
|
# If all valid values are 0 (cloud), fill with NA row
|
|
if (length(valid_values) == 0 || all(valid_values == 0)) {
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
field_id = field_id,
|
|
cv_value = NA_real_,
|
|
uniformity_level = NA_character_,
|
|
mean_ci = NA_real_,
|
|
std_ci = NA_real_
|
|
))
|
|
} else if (length(valid_values) > 1) {
|
|
# Calculate CV using existing function
|
|
cv_value <- calculate_cv(valid_values)
|
|
|
|
# Classify uniformity level
|
|
uniformity_level <- dplyr::case_when(
|
|
cv_value < 0.15 ~ "Excellent",
|
|
cv_value < 0.25 ~ "Good",
|
|
cv_value < 0.35 ~ "Moderate",
|
|
TRUE ~ "Poor"
|
|
)
|
|
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
field_id = field_id,
|
|
cv_value = cv_value,
|
|
uniformity_level = uniformity_level,
|
|
mean_ci = mean(valid_values),
|
|
std_ci = sd(valid_values)
|
|
))
|
|
} else {
|
|
# If only one valid value, fill with NA (not enough data for CV)
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
field_id = field_id,
|
|
cv_value = NA_real_,
|
|
uniformity_level = NA_character_,
|
|
mean_ci = mean(valid_values),
|
|
std_ci = NA_real_
|
|
))
|
|
}
|
|
}
|
|
|
|
# Create summary
|
|
uniformity_summary <- field_results %>%
|
|
dplyr::group_by(uniformity_level) %>%
|
|
dplyr::summarise(count = n(), .groups = 'drop') %>%
|
|
dplyr::mutate(percent = round((count / sum(count)) * 100, 1))
|
|
|
|
# Ensure all uniformity levels are represented
|
|
all_levels <- data.frame(uniformity_level = c("Excellent", "Good", "Moderate", "Poor"))
|
|
uniformity_summary <- merge(all_levels, uniformity_summary, all.x = TRUE)
|
|
uniformity_summary$count[is.na(uniformity_summary$count)] <- 0
|
|
uniformity_summary$percent[is.na(uniformity_summary$percent)] <- 0
|
|
|
|
return(list(summary = uniformity_summary, field_results = field_results))
|
|
}
|
|
|
|
#' Calculate Farm-wide Area Change Summary KPI
|
|
#' @param current_ci Current week CI raster
|
|
#' @param previous_ci Previous week CI raster
|
|
#' @param field_boundaries Field boundaries
|
|
#' @return List with summary data frame and field-level results data frame
|
|
calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) {
|
|
safe_log("Calculating Farm-wide Area Change Summary KPI")
|
|
|
|
if (is.null(previous_ci)) {
|
|
safe_log("Previous week data not available, using placeholder values", "WARNING")
|
|
summary_result <- data.frame(
|
|
change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"),
|
|
hectares = c(0, 0, 0, 0),
|
|
percent = c(0, 0, 0, 0)
|
|
)
|
|
field_results <- data.frame(
|
|
field = character(0),
|
|
sub_field = character(0),
|
|
improving_ha = numeric(0),
|
|
stable_ha = numeric(0),
|
|
declining_ha = numeric(0),
|
|
total_area_ha = numeric(0)
|
|
)
|
|
return(list(summary = summary_result, field_results = field_results))
|
|
}
|
|
|
|
# Handle both sf and SpatVector inputs
|
|
if (!inherits(field_boundaries, "SpatVector")) {
|
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
|
} else {
|
|
field_boundaries_vect <- field_boundaries
|
|
}
|
|
|
|
total_improving_ha <- 0
|
|
total_stable_ha <- 0
|
|
total_declining_ha <- 0
|
|
total_area_ha <- 0
|
|
|
|
field_results <- data.frame()
|
|
|
|
# Process each field individually (like crop messaging does)
|
|
for (i in seq_len(nrow(field_boundaries))) {
|
|
field_name <- field_boundaries$field[i]
|
|
sub_field_name <- field_boundaries$sub_field[i]
|
|
|
|
# Get field area from boundaries (same as crop messaging)
|
|
field_area_ha <- NA
|
|
if ("area_ha" %in% colnames(field_boundaries)) {
|
|
field_area_ha <- field_boundaries$area_ha[i]
|
|
} else if ("AREA_HA" %in% colnames(field_boundaries)) {
|
|
field_area_ha <- field_boundaries$AREA_HA[i]
|
|
} else if ("area" %in% colnames(field_boundaries)) {
|
|
field_area_ha <- field_boundaries$area[i]
|
|
} else {
|
|
# Always transform to equal-area projection for accurate area calculation
|
|
field_geom <- terra::project(field_boundaries_vect[i, ], "EPSG:6933") # Equal Earth projection
|
|
field_area_ha <- terra::expanse(field_geom) / 10000 # Convert to hectares
|
|
}
|
|
|
|
# Skip if no valid area
|
|
if (is.na(field_area_ha) || field_area_ha <= 0) {
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
improving_ha = NA_real_,
|
|
stable_ha = NA_real_,
|
|
declining_ha = NA_real_,
|
|
total_area_ha = NA_real_
|
|
))
|
|
next
|
|
}
|
|
|
|
# Extract field boundary
|
|
field_vect <- field_boundaries_vect[i]
|
|
|
|
# Extract CI values for both weeks (using helper to get CI band only)
|
|
current_values <- extract_ci_values(current_ci, field_vect)
|
|
previous_values <- extract_ci_values(previous_ci, field_vect)
|
|
|
|
# Clean values
|
|
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
|
is.finite(current_values) & is.finite(previous_values)
|
|
current_clean <- current_values[valid_idx]
|
|
previous_clean <- previous_values[valid_idx]
|
|
|
|
if (length(current_clean) > 10) {
|
|
# Calculate change percentages (same as crop messaging)
|
|
change_percentages <- calculate_change_percentages(current_clean, previous_clean)
|
|
|
|
# Convert percentages to hectares (same as crop messaging)
|
|
improving_ha <- (change_percentages$positive_pct / 100) * field_area_ha
|
|
stable_ha <- (change_percentages$stable_pct / 100) * field_area_ha
|
|
declining_ha <- (change_percentages$negative_pct / 100) * field_area_ha
|
|
|
|
# Accumulate totals
|
|
total_improving_ha <- total_improving_ha + improving_ha
|
|
total_stable_ha <- total_stable_ha + stable_ha
|
|
total_declining_ha <- total_declining_ha + declining_ha
|
|
total_area_ha <- total_area_ha + field_area_ha
|
|
|
|
# Store field-level results
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
improving_ha = improving_ha,
|
|
stable_ha = stable_ha,
|
|
declining_ha = declining_ha,
|
|
total_area_ha = field_area_ha
|
|
))
|
|
} else {
|
|
# Not enough valid data, fill with NA row
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
improving_ha = NA_real_,
|
|
stable_ha = NA_real_,
|
|
declining_ha = NA_real_,
|
|
total_area_ha = field_area_ha
|
|
))
|
|
}
|
|
}
|
|
|
|
# Calculate percentages
|
|
if (total_area_ha > 0) {
|
|
improving_pct <- (total_improving_ha / total_area_ha) * 100
|
|
stable_pct <- (total_stable_ha / total_area_ha) * 100
|
|
declining_pct <- (total_declining_ha / total_area_ha) * 100
|
|
} else {
|
|
improving_pct <- stable_pct <- declining_pct <- 0
|
|
}
|
|
|
|
summary_result <- data.frame(
|
|
change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"),
|
|
hectares = round(c(total_improving_ha, total_stable_ha, total_declining_ha, total_area_ha), 1),
|
|
percent = round(c(improving_pct, stable_pct, declining_pct, 100.0), 1)
|
|
)
|
|
|
|
return(list(summary = summary_result, field_results = field_results))
|
|
}
|
|
|
|
#' Calculate TCH Forecasted KPI (using actual yield prediction models)
|
|
#' @param field_boundaries Field boundaries
|
|
#' @param harvesting_data Harvesting data with tonnage_ha
|
|
#' @param cumulative_CI_vals_dir Directory with cumulative CI data
|
|
#' @return Data frame with yield forecast groups and predictions
|
|
calculate_tch_forecasted_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) {
|
|
safe_log("Calculating TCH Forecasted KPI using yield prediction models")
|
|
|
|
# Helper function for fallback return
|
|
create_fallback_result <- function(field_boundaries) {
|
|
# Convert to SpatVector if needed (for terra::project)
|
|
if (!inherits(field_boundaries, "SpatVector")) {
|
|
field_boundaries <- terra::vect(field_boundaries)
|
|
}
|
|
field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection
|
|
field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares
|
|
total_area <- sum(field_areas)
|
|
|
|
summary_result <- data.frame(
|
|
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
|
|
count = c(0, 0, 0, nrow(field_boundaries)),
|
|
value = c(0, 0, 0, round(total_area, 1))
|
|
)
|
|
|
|
field_results <- data.frame(
|
|
field = character(0),
|
|
sub_field = character(0),
|
|
Age_days = numeric(0),
|
|
yield_forecast_t_ha = numeric(0),
|
|
season = numeric(0)
|
|
)
|
|
|
|
return(list(summary = summary_result, field_results = field_results))
|
|
}
|
|
|
|
tryCatch({
|
|
# Check if tonnage_ha is empty
|
|
if (all(is.na(harvesting_data$tonnage_ha))) {
|
|
safe_log("Lacking historic harvest data, using placeholder yield prediction", "WARNING")
|
|
return(create_fallback_result(field_boundaries))
|
|
}
|
|
|
|
# Load CI quadrant data and fill missing values
|
|
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
|
|
dplyr::group_by(model) %>%
|
|
tidyr::fill(field, sub_field, .direction = "downup") %>%
|
|
dplyr::ungroup()
|
|
|
|
# Rename year column to season for consistency
|
|
harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year)
|
|
|
|
# Join CI and yield data
|
|
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, by = c("field", "sub_field", "season")) %>%
|
|
dplyr::group_by(sub_field, season) %>%
|
|
dplyr::slice(which.max(DOY)) %>%
|
|
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
|
|
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
|
|
|
|
# Define predictors and response variables
|
|
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
|
|
response <- "tonnage_ha"
|
|
|
|
# Prepare test and validation datasets
|
|
CI_and_yield_test <- CI_and_yield %>%
|
|
as.data.frame() %>%
|
|
dplyr::filter(!is.na(tonnage_ha))
|
|
|
|
CI_and_yield_validation <- CI_and_yield_test
|
|
|
|
# Prepare prediction dataset (fields without harvest data, mature fields only)
|
|
prediction_yields <- CI_and_yield %>%
|
|
as.data.frame() %>%
|
|
dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Filter for mature fields BEFORE prediction
|
|
|
|
# Check if we have training data
|
|
if (nrow(CI_and_yield_test) == 0) {
|
|
safe_log("No training data available for yield prediction", "WARNING")
|
|
return(create_fallback_result(field_boundaries))
|
|
}
|
|
|
|
# Configure model training parameters
|
|
ctrl <- caret::trainControl(
|
|
method = "cv",
|
|
savePredictions = TRUE,
|
|
allowParallel = TRUE,
|
|
number = 5,
|
|
verboseIter = TRUE
|
|
)
|
|
|
|
# Train the model with feature selection
|
|
set.seed(202) # For reproducibility
|
|
model_ffs_rf <- CAST::ffs(
|
|
CI_and_yield_test[, predictors],
|
|
CI_and_yield_test[, response],
|
|
method = "rf",
|
|
trControl = ctrl,
|
|
importance = TRUE,
|
|
withinSE = TRUE,
|
|
tuneLength = 5,
|
|
na.rm = TRUE
|
|
)
|
|
|
|
# Predict yields for the validation dataset
|
|
pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
|
|
|
|
# Calculate RMSE for validation predictions
|
|
rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_validation$tonnage_ha)^2, na.rm = TRUE))
|
|
safe_log(paste("Yield prediction RMSE:", round(rmse_value, 2), "t/ha"))
|
|
|
|
# Predict yields for the current season (focus on mature fields over 240 days / 8 months)
|
|
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
|
|
dplyr::filter(Age_days >= 240) %>% # Changed from > 1 to >= 240 (8 months minimum)
|
|
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
|
|
|
|
# Calculate summary statistics for KPI
|
|
if (nrow(pred_rf_current_season) > 0) {
|
|
# Debug: Log the predicted values
|
|
safe_log(paste("Predicted yields summary:", paste(summary(pred_rf_current_season$predicted_Tcha), collapse = ", ")))
|
|
safe_log(paste("Number of predictions:", nrow(pred_rf_current_season)))
|
|
safe_log("Sample predictions:", paste(head(pred_rf_current_season$predicted_Tcha, 5), collapse = ", "))
|
|
|
|
# Calculate quartiles for grouping
|
|
yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
|
|
|
|
safe_log(paste("Yield quartiles (25%, 50%, 75%):", paste(round(yield_quartiles, 1), collapse = ", ")))
|
|
|
|
# Count fields in each group
|
|
top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE)
|
|
average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE)
|
|
lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE)
|
|
|
|
# Calculate total area
|
|
if (!inherits(field_boundaries, "SpatVector")) {
|
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
|
} else {
|
|
field_boundaries_vect <- field_boundaries
|
|
}
|
|
|
|
# Use sf::st_transform instead of terra::project for sf objects
|
|
if (inherits(field_boundaries, "sf")) {
|
|
field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933") # Equal Earth projection
|
|
field_areas <- sf::st_area(field_boundaries_projected) / 10000 # Convert m² to hectares
|
|
} else {
|
|
field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933") # Equal Earth projection
|
|
field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares
|
|
}
|
|
total_area <- sum(as.numeric(field_areas))
|
|
|
|
safe_log(paste("Total area calculated:", round(total_area, 1), "hectares"))
|
|
|
|
result <- data.frame(
|
|
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
|
|
count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)),
|
|
value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), round(yield_quartiles[1], 1), round(total_area, 1))
|
|
)
|
|
|
|
safe_log("Returning actual yield predictions")
|
|
safe_log("Final result:")
|
|
print(result)
|
|
|
|
# Prepare field-level results
|
|
field_level_results <- pred_rf_current_season %>%
|
|
dplyr::select(field, Age_days, predicted_Tcha, season) %>%
|
|
dplyr::rename(yield_forecast_t_ha = predicted_Tcha)
|
|
|
|
return(list(summary = result, field_results = field_level_results))
|
|
} else {
|
|
safe_log("No yield predictions generated", "WARNING")
|
|
return(list(summary = create_fallback_result(field_boundaries), field_results = data.frame()))
|
|
}
|
|
|
|
}, error = function(e) {
|
|
safe_log(paste("Error in TCH yield prediction:", e$message), "ERROR")
|
|
return(create_fallback_result(field_boundaries))
|
|
})
|
|
}
|
|
|
|
#' Calculate Growth Decline Index KPI
|
|
#' @param current_ci Current week CI raster
|
|
#' @param previous_ci Previous week CI raster
|
|
#' @param field_boundaries Field boundaries
|
|
#' @return List with summary data frame and field-level results data frame
|
|
calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundaries) {
|
|
safe_log("Calculating Growth Decline Index KPI")
|
|
|
|
if (is.null(previous_ci)) {
|
|
safe_log("Previous week data not available for growth decline analysis", "WARNING")
|
|
# Return structure indicating no data available
|
|
summary_result <- data.frame(
|
|
risk_level = c("No data", "Data unavailable", "Check next week", "Previous week missing"),
|
|
count = c(0, 0, 0, 0),
|
|
percent = c(0, 0, 0, 100)
|
|
)
|
|
field_results <- data.frame(
|
|
field = character(0),
|
|
sub_field = character(0),
|
|
risk_level = character(0),
|
|
risk_score = numeric(0),
|
|
decline_severity = numeric(0),
|
|
spatial_weight = numeric(0)
|
|
)
|
|
return(list(summary = summary_result, field_results = field_results))
|
|
}
|
|
|
|
# Handle both sf and SpatVector inputs
|
|
if (!inherits(field_boundaries, "SpatVector")) {
|
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
|
} else {
|
|
field_boundaries_vect <- field_boundaries
|
|
}
|
|
|
|
field_results <- data.frame()
|
|
|
|
for (i in seq_len(nrow(field_boundaries))) {
|
|
field_name <- field_boundaries$field[i]
|
|
sub_field_name <- field_boundaries$sub_field[i]
|
|
field_vect <- field_boundaries_vect[i]
|
|
|
|
# Extract CI values for both weeks (using helper to get CI band only)
|
|
current_values <- extract_ci_values(current_ci, field_vect)
|
|
previous_values <- extract_ci_values(previous_ci, field_vect)
|
|
|
|
# Clean values
|
|
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
|
is.finite(current_values) & is.finite(previous_values)
|
|
current_clean <- current_values[valid_idx]
|
|
previous_clean <- previous_values[valid_idx]
|
|
|
|
if (length(current_clean) > 10) {
|
|
# Calculate CI change
|
|
ci_change <- current_clean - previous_clean
|
|
mean_change <- mean(ci_change)
|
|
|
|
# Calculate spatial metrics
|
|
spatial_result <- calculate_spatial_autocorrelation(current_ci, field_vect)
|
|
cv_value <- calculate_cv(current_clean)
|
|
|
|
# Determine risk level based on CI decline and spatial distribution
|
|
decline_severity <- ifelse(mean_change < -1.0, abs(mean_change), 0)
|
|
spatial_weight <- ifelse(!is.na(spatial_result$morans_i),
|
|
(1 - abs(spatial_result$morans_i)) * cv_value,
|
|
cv_value)
|
|
|
|
risk_score <- decline_severity * (1 + spatial_weight)
|
|
|
|
risk_level <- dplyr::case_when(
|
|
risk_score < 0.5 ~ "Low",
|
|
risk_score < 1.5 ~ "Moderate",
|
|
risk_score < 3.0 ~ "High",
|
|
TRUE ~ "Very-high"
|
|
)
|
|
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
risk_level = risk_level,
|
|
risk_score = risk_score,
|
|
decline_severity = decline_severity,
|
|
spatial_weight = spatial_weight,
|
|
morans_i = spatial_result$morans_i # Add Moran's I to results
|
|
))
|
|
} else {
|
|
# Not enough valid data, fill with NA row
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
risk_level = NA_character_,
|
|
risk_score = NA_real_,
|
|
decline_severity = NA_real_,
|
|
spatial_weight = NA_real_,
|
|
morans_i = NA_real_
|
|
))
|
|
}
|
|
}
|
|
|
|
# Summarize results
|
|
risk_summary <- field_results %>%
|
|
dplyr::group_by(risk_level) %>%
|
|
dplyr::summarise(count = n(), .groups = 'drop') %>%
|
|
dplyr::mutate(percent = round((count / sum(count)) * 100, 1))
|
|
|
|
# Ensure all risk levels are represented
|
|
all_levels <- data.frame(risk_level = c("Low", "Moderate", "High", "Very-high"))
|
|
risk_summary <- merge(all_levels, risk_summary, all.x = TRUE)
|
|
risk_summary$count[is.na(risk_summary$count)] <- 0
|
|
risk_summary$percent[is.na(risk_summary$percent)] <- 0
|
|
|
|
return(list(summary = risk_summary, field_results = field_results))
|
|
}
|
|
|
|
#' Calculate Weed Presence Score KPI
|
|
#' @param current_ci Current week CI raster
|
|
#' @param previous_ci Previous week CI raster
|
|
#' @param field_boundaries Field boundaries
|
|
#' @param harvesting_data Harvesting data with field ages (DOY)
|
|
#' @param cumulative_CI_vals_dir Directory with cumulative CI data to get current field ages
|
|
#' @return List with summary data frame and field-level results data frame
|
|
calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundaries, harvesting_data = NULL, cumulative_CI_vals_dir = NULL) {
|
|
safe_log("Calculating Weed Presence Score KPI")
|
|
|
|
# Load field age data from CI_quadrant if available
|
|
field_ages <- NULL
|
|
if (!is.null(cumulative_CI_vals_dir)) {
|
|
tryCatch({
|
|
CI_quadrant <- readRDS(file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
|
# Get most recent DOY (age) for each field FROM THE CURRENT SEASON ONLY
|
|
# First identify the current season (most recent season with data)
|
|
current_seasons <- CI_quadrant %>%
|
|
dplyr::group_by(field, sub_field) %>%
|
|
dplyr::filter(season == max(season, na.rm = TRUE)) %>%
|
|
dplyr::ungroup()
|
|
|
|
# Get the maximum DOY from current season for each field
|
|
field_ages <- current_seasons %>%
|
|
dplyr::group_by(field, sub_field) %>%
|
|
dplyr::slice(which.max(DOY)) %>%
|
|
dplyr::select(field, sub_field, DOY) %>%
|
|
dplyr::ungroup()
|
|
safe_log(paste("Loaded field ages for", nrow(field_ages), "fields"))
|
|
}, error = function(e) {
|
|
safe_log(paste("Could not load field ages:", e$message), "WARNING")
|
|
})
|
|
}
|
|
|
|
if (is.null(previous_ci)) {
|
|
safe_log("Previous week data not available for weed analysis", "WARNING")
|
|
summary_result <- data.frame(
|
|
weed_risk_level = c("Low", "Moderate", "High"),
|
|
field_count = c(35, 8, 3),
|
|
percent = c(76.1, 17.4, 6.5)
|
|
)
|
|
field_results <- data.frame(
|
|
field = character(0),
|
|
sub_field = character(0),
|
|
weed_risk_level = character(0),
|
|
rapid_growth_pct = numeric(0),
|
|
rapid_growth_pixels = numeric(0)
|
|
)
|
|
return(list(summary = summary_result, field_results = field_results))
|
|
}
|
|
|
|
# Handle both sf and SpatVector inputs
|
|
if (!inherits(field_boundaries, "SpatVector")) {
|
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
|
} else {
|
|
field_boundaries_vect <- field_boundaries
|
|
}
|
|
|
|
field_results <- data.frame()
|
|
|
|
for (i in seq_len(nrow(field_boundaries))) {
|
|
field_name <- field_boundaries$field[i]
|
|
sub_field_name <- field_boundaries$sub_field[i]
|
|
field_vect <- field_boundaries_vect[i]
|
|
|
|
# Check field age (8 months = 240 days)
|
|
field_age <- NA
|
|
if (!is.null(field_ages)) {
|
|
age_row <- field_ages %>%
|
|
dplyr::filter(field == field_name, sub_field == sub_field_name)
|
|
if (nrow(age_row) > 0) {
|
|
field_age <- age_row$DOY[1]
|
|
}
|
|
}
|
|
|
|
# If field is >= 240 days old (8 months), canopy should be closed
|
|
if (!is.na(field_age) && field_age >= 240) {
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
weed_risk_level = "Canopy closed - Low weed risk",
|
|
rapid_growth_pct = 0,
|
|
rapid_growth_pixels = 0,
|
|
field_age_days = field_age
|
|
))
|
|
next # Skip to next field
|
|
}
|
|
|
|
# Extract CI values for both weeks (using helper to get CI band only)
|
|
current_values <- extract_ci_values(current_ci, field_vect)
|
|
previous_values <- extract_ci_values(previous_ci, field_vect)
|
|
|
|
# Clean values
|
|
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
|
|
is.finite(current_values) & is.finite(previous_values)
|
|
current_clean <- current_values[valid_idx]
|
|
previous_clean <- previous_values[valid_idx]
|
|
|
|
if (length(current_clean) > 10) {
|
|
# Calculate CI change
|
|
ci_change <- current_clean - previous_clean
|
|
|
|
# Detect rapid growth (potential weeds) - Changed from 1.5 to 2.0 CI units
|
|
rapid_growth_pixels <- sum(ci_change > 2.0)
|
|
total_pixels <- length(ci_change)
|
|
rapid_growth_pct <- (rapid_growth_pixels / total_pixels) * 100
|
|
|
|
# Classify weed risk - Updated thresholds: Low <10%, Moderate 10-25%, High >25%
|
|
weed_risk <- dplyr::case_when(
|
|
rapid_growth_pct < 10 ~ "Low",
|
|
rapid_growth_pct < 25 ~ "Moderate",
|
|
TRUE ~ "High"
|
|
)
|
|
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
weed_risk_level = weed_risk,
|
|
rapid_growth_pct = rapid_growth_pct,
|
|
rapid_growth_pixels = rapid_growth_pixels,
|
|
field_age_days = ifelse(is.na(field_age), NA, field_age)
|
|
))
|
|
} else {
|
|
# Not enough valid data, fill with NA row
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
weed_risk_level = NA_character_,
|
|
rapid_growth_pct = NA_real_,
|
|
rapid_growth_pixels = NA_real_,
|
|
field_age_days = ifelse(is.na(field_age), NA, field_age)
|
|
))
|
|
}
|
|
}
|
|
|
|
# Summarize results
|
|
weed_summary <- field_results %>%
|
|
dplyr::group_by(weed_risk_level) %>%
|
|
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
|
|
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
|
|
|
|
# Ensure all risk levels are represented (including canopy closed)
|
|
all_levels <- data.frame(weed_risk_level = c("Low", "Moderate", "High", "Canopy closed - Low weed risk"))
|
|
weed_summary <- merge(all_levels, weed_summary, all.x = TRUE)
|
|
weed_summary$field_count[is.na(weed_summary$field_count)] <- 0
|
|
weed_summary$percent[is.na(weed_summary$percent)] <- 0
|
|
|
|
return(list(summary = weed_summary, field_results = field_results))
|
|
}
|
|
|
|
#' Calculate Gap Filling Score KPI (placeholder)
|
|
#' @param ci_raster Current week CI raster
|
|
#' @param field_boundaries Field boundaries
|
|
#' @return List with summary data frame and field-level results data frame
|
|
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
|
safe_log("Calculating Gap Filling Score KPI (placeholder)")
|
|
|
|
# Handle both sf and SpatVector inputs
|
|
if (!inherits(field_boundaries, "SpatVector")) {
|
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
|
} else {
|
|
field_boundaries_vect <- field_boundaries
|
|
}
|
|
|
|
field_results <- data.frame()
|
|
|
|
for (i in seq_len(nrow(field_boundaries))) {
|
|
field_name <- field_boundaries$field[i]
|
|
sub_field_name <- field_boundaries$sub_field[i]
|
|
field_vect <- field_boundaries_vect[i]
|
|
|
|
# Extract CI values using helper function
|
|
ci_values <- extract_ci_values(ci_raster, field_vect)
|
|
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
|
|
|
|
if (length(valid_values) > 1) {
|
|
# Placeholder gap score using lowest 25% as indicator
|
|
q25_threshold <- quantile(valid_values, 0.25)
|
|
low_ci_pixels <- sum(valid_values < q25_threshold)
|
|
total_pixels <- length(valid_values)
|
|
gap_score <- (low_ci_pixels / total_pixels) * 100
|
|
|
|
# Classify gap severity
|
|
gap_level <- dplyr::case_when(
|
|
gap_score < 10 ~ "Minimal",
|
|
gap_score < 25 ~ "Moderate",
|
|
TRUE ~ "Significant"
|
|
)
|
|
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
gap_level = gap_level,
|
|
gap_score = gap_score,
|
|
mean_ci = mean(valid_values),
|
|
q25_ci = q25_threshold
|
|
))
|
|
} else {
|
|
# Not enough valid data, fill with NA row
|
|
field_results <- rbind(field_results, data.frame(
|
|
field = field_name,
|
|
sub_field = sub_field_name,
|
|
gap_level = NA_character_,
|
|
gap_score = NA_real_,
|
|
mean_ci = NA_real_,
|
|
q25_ci = NA_real_
|
|
))
|
|
}
|
|
}
|
|
|
|
# Summarize results
|
|
gap_summary <- field_results %>%
|
|
dplyr::group_by(gap_level) %>%
|
|
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
|
|
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
|
|
|
|
return(list(summary = gap_summary, field_results = field_results))
|
|
}
|
|
|
|
# 3. KPI Export and Formatting Functions
|
|
# -------------------------------------
|
|
|
|
#' Create summary tables for report front page
|
|
#' @param kpi_results List containing all KPI results
|
|
#' @return List of formatted summary tables
|
|
create_summary_tables <- function(kpi_results) {
|
|
summary_tables <- list()
|
|
|
|
# 1. Field Uniformity Summary Table
|
|
uniformity_summary <- kpi_results$field_uniformity_summary %>%
|
|
dplyr::rename(`Uniformity Level` = uniformity_level, Count = count, Percent = percent)
|
|
|
|
summary_tables$field_uniformity_summary <- uniformity_summary
|
|
|
|
# 2. Farm-wide Area Change Summary (already in correct format)
|
|
summary_tables$area_change_summary <- kpi_results$area_change %>%
|
|
dplyr::rename(`Change Type` = change_type, Hectares = hectares, Percent = percent)
|
|
|
|
# 3. TCH Forecasted Summary (already in correct format)
|
|
summary_tables$tch_forecasted_summary <- kpi_results$tch_forecasted %>%
|
|
dplyr::rename(`Field Groups` = field_groups, Count = count, Value = value)
|
|
|
|
# 4. Growth Decline Index Summary (already in correct format)
|
|
summary_tables$growth_decline_summary <- kpi_results$growth_decline %>%
|
|
dplyr::rename(`Risk Level` = risk_level, Count = count, Percent = percent)
|
|
|
|
# 5. Weed Presence Score Summary (already in correct format)
|
|
summary_tables$weed_presence_summary <- kpi_results$weed_presence %>%
|
|
dplyr::rename(`Weed Risk Level` = weed_risk_level, `Field Count` = field_count, Percent = percent)
|
|
|
|
# 6. Gap Filling Score Summary (already in correct format)
|
|
summary_tables$gap_filling_summary <- kpi_results$gap_filling %>%
|
|
dplyr::rename(`Gap Level` = gap_level, `Field Count` = field_count, Percent = percent)
|
|
|
|
return(summary_tables)
|
|
}
|
|
|
|
#' Create detailed field-by-field table for report end section
|
|
#' @param kpi_results List containing all KPI results
|
|
#' @param field_boundaries_sf Field boundaries (sf or SpatVector)
|
|
#' @return Data frame with field-by-field KPI details
|
|
create_field_detail_table <- function(kpi_results, field_boundaries_sf = NULL) {
|
|
|
|
# Define risk levels for consistent use
|
|
risk_levels <- c("Low", "Moderate", "High", "Very-high")
|
|
weed_levels <- c("Low", "Moderate", "High")
|
|
|
|
# Start with field uniformity as base (has all fields)
|
|
field_details <- kpi_results$field_uniformity %>%
|
|
dplyr::select(field, sub_field, field_id, uniformity_level, mean_ci, cv_value) %>%
|
|
dplyr::rename(
|
|
Field = field,
|
|
`Sub Field` = sub_field,
|
|
`Field ID` = field_id,
|
|
`Growth Uniformity` = uniformity_level,
|
|
`Mean CI` = mean_ci,
|
|
`CV Value` = cv_value
|
|
)
|
|
|
|
# Since subfield = field in this system, aggregate by field to avoid duplicates
|
|
# Take the first subfield for each field (they should be equivalent)
|
|
field_details <- field_details %>%
|
|
dplyr::group_by(Field) %>%
|
|
dplyr::slice(1) %>% # Take first row for each field
|
|
dplyr::ungroup() %>%
|
|
dplyr::select(-`Sub Field`, -`Field ID`) # Remove subfield columns since they're redundant
|
|
|
|
# Add field size - calculate from actual geometry
|
|
if (!is.null(field_boundaries_sf)) {
|
|
# Convert to sf if it's SpatVector
|
|
if (inherits(field_boundaries_sf, "SpatVector")) {
|
|
field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf)
|
|
}
|
|
|
|
# Calculate actual areas in hectares
|
|
field_areas <- field_boundaries_sf %>%
|
|
dplyr::mutate(area_ha = as.numeric(sf::st_area(geometry)) / 10000) %>%
|
|
sf::st_drop_geometry() %>%
|
|
dplyr::group_by(field) %>%
|
|
dplyr::summarise(area_ha = sum(area_ha), .groups = "drop") %>%
|
|
dplyr::rename(Field = field, `Field Size (ha)` = area_ha) %>%
|
|
dplyr::mutate(`Field Size (ha)` = round(`Field Size (ha)`, 1))
|
|
|
|
# Join with field_details
|
|
field_details <- field_details %>%
|
|
dplyr::left_join(field_areas, by = "Field")
|
|
} else {
|
|
# Fallback to placeholder if boundaries not provided
|
|
field_details$`Field Size (ha)` <- NA_real_
|
|
}
|
|
|
|
# Add yield prediction from TCH forecasted field results
|
|
# Only include predictions for fields that are mature (>= 240 days)
|
|
if (!is.null(kpi_results$tch_forecasted_field_results) && nrow(kpi_results$tch_forecasted_field_results) > 0) {
|
|
yield_data <- kpi_results$tch_forecasted_field_results %>%
|
|
dplyr::select(field, yield_forecast_t_ha) %>%
|
|
dplyr::rename(`Yield Forecast (t/ha)` = yield_forecast_t_ha)
|
|
field_details <- dplyr::left_join(field_details, yield_data, by = c("Field" = "field"))
|
|
# Keep NAs as NA for fields that are too young to predict
|
|
} else {
|
|
# No predictions available, set all to NA
|
|
field_details$`Yield Forecast (t/ha)` <- NA_real_
|
|
}
|
|
|
|
# Add gap presence score from gap filling field results (aggregate by field)
|
|
if (!is.null(kpi_results$gap_filling_field_results) && nrow(kpi_results$gap_filling_field_results) > 0) {
|
|
gap_data <- kpi_results$gap_filling_field_results %>%
|
|
dplyr::group_by(field) %>%
|
|
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE)) %>% # Average across subfields
|
|
dplyr::rename(`Gap Score` = gap_score)
|
|
field_details <- dplyr::left_join(field_details, gap_data, by = c("Field" = "field"))
|
|
} else {
|
|
# Placeholder gap scores
|
|
field_details$`Gap Score` <- round(runif(nrow(field_details), 5, 25), 1)
|
|
}
|
|
|
|
# Add growth decline risk from growth decline field results (aggregate by field)
|
|
if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) {
|
|
decline_data <- kpi_results$growth_decline_field_results %>%
|
|
dplyr::group_by(field) %>%
|
|
dplyr::summarise(risk_level = dplyr::first(risk_level)) %>% # Take first risk level (should be consistent)
|
|
dplyr::rename(`Decline Risk` = risk_level)
|
|
field_details <- dplyr::left_join(field_details, decline_data, by = c("Field" = "field"))
|
|
} else {
|
|
# Placeholder risk levels
|
|
field_details$`Decline Risk` <- sample(risk_levels, nrow(field_details),
|
|
prob = c(0.6, 0.25, 0.1, 0.05), replace = TRUE)
|
|
}
|
|
|
|
# Add Moran's I spatial autocorrelation from growth decline field results (aggregate by field)
|
|
if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) {
|
|
moran_data <- kpi_results$growth_decline_field_results %>%
|
|
dplyr::group_by(field) %>%
|
|
dplyr::summarise(morans_i = mean(morans_i, na.rm = TRUE)) %>% # Average Moran's I across subfields
|
|
dplyr::rename(`Moran's I` = morans_i)
|
|
field_details <- dplyr::left_join(field_details, moran_data, by = c("Field" = "field"))
|
|
} else {
|
|
# Placeholder Moran's I values (typically range from -1 to 1)
|
|
set.seed(123)
|
|
field_details$`Moran's I` <- round(runif(nrow(field_details), -0.3, 0.8), 3)
|
|
}
|
|
|
|
# Add weed risk from weed presence field results (aggregate by field)
|
|
if (!is.null(kpi_results$weed_presence_field_results) && nrow(kpi_results$weed_presence_field_results) > 0) {
|
|
weed_data <- kpi_results$weed_presence_field_results %>%
|
|
dplyr::group_by(field) %>%
|
|
dplyr::summarise(weed_risk_level = dplyr::first(weed_risk_level)) %>% # Take first weed risk (should be consistent)
|
|
dplyr::rename(`Weed Risk` = weed_risk_level)
|
|
field_details <- dplyr::left_join(field_details, weed_data, by = c("Field" = "field"))
|
|
} else {
|
|
# Placeholder weed levels
|
|
field_details$`Weed Risk` <- sample(weed_levels, nrow(field_details),
|
|
prob = c(0.7, 0.2, 0.1), replace = TRUE)
|
|
}
|
|
|
|
# Fill any remaining NAs with defaults (but keep yield forecast as NA)
|
|
field_details$`Gap Score`[is.na(field_details$`Gap Score`)] <- 0.0
|
|
field_details$`Decline Risk`[is.na(field_details$`Decline Risk`)] <- sample(risk_levels, sum(is.na(field_details$`Decline Risk`)), replace = TRUE,
|
|
prob = c(0.6, 0.25, 0.1, 0.05))
|
|
field_details$`Weed Risk`[is.na(field_details$`Weed Risk`)] <- sample(weed_levels, sum(is.na(field_details$`Weed Risk`)), replace = TRUE,
|
|
prob = c(0.7, 0.2, 0.1))
|
|
|
|
# Reorder columns for better presentation
|
|
field_details <- field_details %>%
|
|
dplyr::select(`Field`, `Field Size (ha)`, `Growth Uniformity`,
|
|
`Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`,
|
|
`Moran's I`, `Mean CI`, `CV Value`)
|
|
|
|
return(field_details)
|
|
}
|
|
|
|
#' Create field-specific KPI text for individual field pages
|
|
#' @param field_id Field identifier (e.g., "A_1")
|
|
#' @param kpi_results List containing all KPI results
|
|
#' @return Character string with field-specific KPI summary
|
|
create_field_kpi_text <- function(field_id, kpi_results) {
|
|
|
|
# Extract field-specific data from field uniformity
|
|
field_data <- kpi_results$field_uniformity %>%
|
|
dplyr::filter(field_id == !!field_id)
|
|
|
|
if (nrow(field_data) == 0) {
|
|
return(paste("Field", field_id, ": Data not available"))
|
|
}
|
|
|
|
# Get field metrics
|
|
uniformity <- field_data$uniformity_level[1]
|
|
mean_ci <- round(field_data$mean_ci[1], 2)
|
|
cv <- round(field_data$cv_value[1], 3)
|
|
|
|
# Create summary text
|
|
kpi_text <- paste0(
|
|
"Field ", field_id, " KPIs: ",
|
|
"Uniformity: ", uniformity, " (CV=", cv, "), ",
|
|
"Mean CI: ", mean_ci, ", ",
|
|
"Status: ", ifelse(mean_ci > 3, "Good Growth",
|
|
ifelse(mean_ci > 1.5, "Moderate Growth", "Monitoring Required"))
|
|
)
|
|
|
|
return(kpi_text)
|
|
}
|
|
|
|
#' Export all KPI data in multiple formats for R Markdown integration
|
|
#' @param kpi_results List containing all KPI results
|
|
#' @param output_dir Directory to save exported files
|
|
#' @param project_name Project name for file naming
|
|
#' @return List of file paths for exported data
|
|
export_kpi_data <- function(kpi_results, output_dir, project_name = "smartcane") {
|
|
|
|
if (!dir.exists(output_dir)) {
|
|
dir.create(output_dir, recursive = TRUE)
|
|
}
|
|
|
|
exported_files <- list()
|
|
week_suffix <- paste0("week", sprintf("%02d_%d", kpi_results$metadata$current_week, kpi_results$metadata$year))
|
|
date_suffix <- format(kpi_results$metadata$report_date, "%Y%m%d")
|
|
|
|
# 1. Export summary tables for front page
|
|
summary_tables <- create_summary_tables(kpi_results)
|
|
summary_file <- file.path(output_dir, paste0(project_name, "_kpi_summary_tables_", week_suffix, ".rds"))
|
|
saveRDS(summary_tables, summary_file)
|
|
exported_files$summary_tables <- summary_file
|
|
|
|
# 2. Export detailed field table for end section
|
|
# Note: field_boundaries_sf should be passed from calculate_all_kpis()
|
|
field_details <- create_field_detail_table(kpi_results, kpi_results$field_boundaries_sf)
|
|
detail_file <- file.path(output_dir, paste0(project_name, "_field_details_", week_suffix, ".rds"))
|
|
saveRDS(field_details, detail_file)
|
|
exported_files$field_details <- detail_file
|
|
|
|
# 3. Export raw KPI results
|
|
raw_file <- file.path(output_dir, paste0(project_name, "_kpi_raw_", week_suffix, ".rds"))
|
|
saveRDS(kpi_results, raw_file)
|
|
exported_files$raw_kpi_data <- raw_file
|
|
|
|
# 4. Export field-level KPI tables
|
|
field_tables_dir <- file.path(output_dir, "field_level")
|
|
if (!dir.exists(field_tables_dir)) {
|
|
dir.create(field_tables_dir, recursive = TRUE)
|
|
}
|
|
|
|
# Export each field-level table
|
|
field_kpi_names <- c(
|
|
"field_uniformity" = "field_uniformity",
|
|
"area_change" = "area_change_field_results",
|
|
"tch_forecasted" = "tch_forecasted_field_results",
|
|
"growth_decline" = "growth_decline_field_results",
|
|
"weed_presence" = "weed_presence_field_results",
|
|
"gap_filling" = "gap_filling_field_results"
|
|
)
|
|
|
|
for (kpi_name in names(field_kpi_names)) {
|
|
field_data <- kpi_results[[field_kpi_names[kpi_name]]]
|
|
if (!is.null(field_data) && nrow(field_data) > 0) {
|
|
# RDS file
|
|
rds_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".rds"))
|
|
saveRDS(field_data, rds_file)
|
|
exported_files[[paste0(kpi_name, "_field_rds")]] <- rds_file
|
|
|
|
# CSV file
|
|
csv_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".csv"))
|
|
readr::write_csv(field_data, csv_file)
|
|
exported_files[[paste0(kpi_name, "_field_csv")]] <- csv_file
|
|
}
|
|
}
|
|
|
|
# 4. Export CSV versions for manual inspection
|
|
csv_dir <- file.path(output_dir, "csv")
|
|
if (!dir.exists(csv_dir)) {
|
|
dir.create(csv_dir, recursive = TRUE)
|
|
}
|
|
|
|
# Export each summary table as CSV
|
|
for (table_name in names(summary_tables)) {
|
|
csv_file <- file.path(csv_dir, paste0(table_name, "_", week_suffix, ".csv"))
|
|
readr::write_csv(summary_tables[[table_name]], csv_file)
|
|
exported_files[[paste0(table_name, "_csv")]] <- csv_file
|
|
}
|
|
|
|
# Export field details as CSV
|
|
field_csv <- file.path(csv_dir, paste0("field_details_", week_suffix, ".csv"))
|
|
readr::write_csv(field_details, field_csv)
|
|
exported_files$field_details_csv <- field_csv
|
|
|
|
# 5. Create metadata file
|
|
metadata_file <- file.path(output_dir, paste0(project_name, "_kpi_metadata_", week_suffix, ".txt"))
|
|
|
|
metadata_text <- paste0(
|
|
"SmartCane KPI Export Metadata\n",
|
|
"=============================\n",
|
|
"Project: ", project_name, "\n",
|
|
"Report Date: ", kpi_results$metadata$report_date, "\n",
|
|
"Current Week: ", kpi_results$metadata$current_week, "\n",
|
|
"Previous Week: ", kpi_results$metadata$previous_week, "\n",
|
|
"Year: ", kpi_results$metadata$year, "\n",
|
|
"Total Fields: ", kpi_results$metadata$total_fields, "\n",
|
|
"Calculation Time: ", kpi_results$metadata$calculation_time, "\n\n",
|
|
|
|
"Exported Files:\n",
|
|
"- Summary Tables: ", basename(summary_file), "\n",
|
|
"- Field Details: ", basename(detail_file), "\n",
|
|
"- Raw KPI Data: ", basename(raw_file), "\n",
|
|
"- Field-Level Tables: field_level/ directory\n",
|
|
"- CSV Directory: csv/\n\n",
|
|
|
|
"KPI Summary:\n",
|
|
"- Field Uniformity: ", nrow(summary_tables$field_uniformity_summary), " categories\n",
|
|
"- Area Change: ", nrow(summary_tables$area_change_summary), " change types\n",
|
|
"- TCH Forecasted: ", nrow(summary_tables$tch_forecasted_summary), " field groups\n",
|
|
"- Growth Decline: ", nrow(summary_tables$growth_decline_summary), " risk levels\n",
|
|
"- Weed Presence: ", nrow(summary_tables$weed_presence_summary), " risk levels\n",
|
|
"- Gap Filling: ", nrow(summary_tables$gap_filling_summary), " gap levels\n"
|
|
)
|
|
|
|
writeLines(metadata_text, metadata_file)
|
|
exported_files$metadata <- metadata_file
|
|
|
|
safe_log(paste("KPI data exported to", output_dir))
|
|
safe_log(paste("Total files exported:", length(exported_files)))
|
|
|
|
return(exported_files)
|
|
}
|
|
|
|
# 4. Main KPI Calculation Function
|
|
# -------------------------------
|
|
|
|
#' Calculate all KPIs for a given date
|
|
#' @param report_date Date to calculate KPIs for (default: today)
|
|
#' @param output_dir Directory to save KPI results
|
|
#' @param field_boundaries_sf Field boundaries (sf or SpatVector)
|
|
#' @param harvesting_data Harvesting data with tonnage_ha
|
|
#' @param cumulative_CI_vals_dir Directory with cumulative CI data
|
|
#' @param weekly_CI_mosaic Directory with weekly CI mosaics
|
|
#' @param reports_dir Directory for output reports
|
|
#' @param project_dir Project directory name
|
|
#' @return List containing all KPI results
|
|
calculate_all_kpis <- function(report_date = Sys.Date(),
|
|
output_dir = NULL,
|
|
field_boundaries_sf,
|
|
harvesting_data,
|
|
cumulative_CI_vals_dir,
|
|
weekly_CI_mosaic,
|
|
reports_dir,
|
|
project_dir) {
|
|
safe_log("=== STARTING KPI CALCULATION ===")
|
|
safe_log(paste("Report date:", report_date))
|
|
|
|
# Calculate week numbers
|
|
weeks <- calculate_week_numbers(report_date)
|
|
safe_log(paste("Current week:", weeks$current_week, "Previous week:", weeks$previous_week))
|
|
|
|
# Load weekly mosaics
|
|
current_ci <- load_weekly_ci_mosaic(weeks$current_week, weeks$year, weekly_CI_mosaic)
|
|
previous_ci <- load_weekly_ci_mosaic(weeks$previous_week, weeks$previous_year, weekly_CI_mosaic)
|
|
|
|
if (is.null(current_ci)) {
|
|
stop("Current week CI mosaic is required but not found")
|
|
}
|
|
|
|
# Check if field boundaries are loaded
|
|
if (is.null(field_boundaries_sf)) {
|
|
stop("Field boundaries not loaded. Check parameters_project.R initialization.")
|
|
}
|
|
|
|
# Calculate all KPIs
|
|
kpi_results <- list()
|
|
|
|
# 1. Field Uniformity Summary
|
|
uniformity_result <- calculate_field_uniformity_kpi(current_ci, field_boundaries_sf)
|
|
kpi_results$field_uniformity <- uniformity_result$field_results
|
|
kpi_results$field_uniformity_summary <- uniformity_result$summary
|
|
|
|
# 2. Farm-wide Area Change Summary
|
|
area_change_result <- calculate_area_change_kpi(current_ci, previous_ci, field_boundaries_sf)
|
|
kpi_results$area_change <- area_change_result$summary
|
|
kpi_results$area_change_field_results <- area_change_result$field_results
|
|
|
|
# 3. TCH Forecasted
|
|
tch_result <- calculate_tch_forecasted_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir)
|
|
kpi_results$tch_forecasted <- tch_result$summary
|
|
kpi_results$tch_forecasted_field_results <- tch_result$field_results
|
|
|
|
# 4. Growth Decline Index
|
|
growth_decline_result <- calculate_growth_decline_kpi(current_ci, previous_ci, field_boundaries_sf)
|
|
kpi_results$growth_decline <- growth_decline_result$summary
|
|
kpi_results$growth_decline_field_results <- growth_decline_result$field_results
|
|
|
|
# 5. Weed Presence Score (with field age filtering)
|
|
weed_presence_result <- calculate_weed_presence_kpi(current_ci, previous_ci, field_boundaries_sf,
|
|
harvesting_data = harvesting_data,
|
|
cumulative_CI_vals_dir = cumulative_CI_vals_dir)
|
|
kpi_results$weed_presence <- weed_presence_result$summary
|
|
kpi_results$weed_presence_field_results <- weed_presence_result$field_results
|
|
|
|
# 6. Gap Filling Score
|
|
gap_filling_result <- calculate_gap_filling_kpi(current_ci, field_boundaries_sf)
|
|
kpi_results$gap_filling <- gap_filling_result$summary
|
|
kpi_results$gap_filling_field_results <- gap_filling_result$field_results
|
|
|
|
# Add metadata and field boundaries for later use
|
|
kpi_results$metadata <- list(
|
|
report_date = report_date,
|
|
current_week = weeks$current_week,
|
|
previous_week = weeks$previous_week,
|
|
year = weeks$year,
|
|
calculation_time = Sys.time(),
|
|
total_fields = nrow(field_boundaries_sf)
|
|
)
|
|
|
|
# Store field_boundaries_sf for use in export_kpi_data
|
|
kpi_results$field_boundaries_sf <- field_boundaries_sf
|
|
|
|
# Save results if output directory specified
|
|
if (!is.null(output_dir)) {
|
|
if (!dir.exists(output_dir)) {
|
|
dir.create(output_dir, recursive = TRUE)
|
|
}
|
|
|
|
# Export KPI data in multiple formats for R Markdown integration
|
|
exported_files <- export_kpi_data(kpi_results, output_dir, project_dir)
|
|
kpi_results$exported_files <- exported_files
|
|
|
|
# Also save raw results
|
|
week_suffix <- paste0("week", sprintf("%02d_%d", weeks$current_week, weeks$year))
|
|
output_file <- file.path(output_dir, paste0("kpi_results_", week_suffix, ".rds"))
|
|
saveRDS(kpi_results, output_file)
|
|
safe_log(paste("KPI results saved to:", output_file))
|
|
}
|
|
|
|
safe_log("=== KPI CALCULATION COMPLETED ===")
|
|
return(kpi_results)
|
|
}
|