SmartCane/r_app/old_scripts/kpi_utils.R

1394 lines
55 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# KPI_UTILS.R
# ===========
# Utility functions for SmartCane KPI calculation workflow.
# These functions support the calculation of 6 key performance indicators:
# 1. Field Uniformity Summary
# 2. Farm-wide Area Change Summary
# 3. TCH Forecasted
# 4. Growth Decline Index
# 5. Weed Presence Score
# 6. Gap Filling Score
# Note: This file depends on functions from crop_messaging_utils.R:
# - safe_log()
# - calculate_cv()
# - calculate_spatial_autocorrelation()
# - calculate_change_percentages()
# 1. Helper Functions
# -----------------
#' Extract CI band only from a multi-band raster
#' @param ci_raster CI raster (can be multi-band with Red, Green, Blue, NIR, CI)
#' @param field_vect Field boundary as SpatVector
#' @return Vector of CI values
extract_ci_values <- function(ci_raster, field_vect) {
extracted <- terra::extract(ci_raster, field_vect, fun = NULL)
# Check if CI column exists (multi-band mosaic)
if ("CI" %in% names(extracted)) {
return(extracted[, "CI"])
} else if (ncol(extracted) > 1) {
# Fallback: assume last column is CI (after ID, Red, Green, Blue, NIR)
return(extracted[, ncol(extracted)])
} else {
# Single band raster - return as is
return(extracted[, 1])
}
}
#' Calculate current and previous week numbers using ISO 8601 week numbering
#' @param report_date Date to calculate weeks for (default: today)
#' @return List with current_week and previous_week numbers
calculate_week_numbers <- function(report_date = Sys.Date()) {
# Use ISO 8601 week and year numbering - weeks start on Monday
# This matches the date-math approach in mosaic_creation.R
report_date <- as.Date(report_date)
# Get ISO week and year for current date
current_week <- lubridate::isoweek(report_date)
current_iso_year <- lubridate::isoyear(report_date)
# Calculate previous week by subtracting 7 days and recalculating
previous_date <- report_date - 7
previous_week <- lubridate::isoweek(previous_date)
previous_iso_year <- lubridate::isoyear(previous_date)
return(list(
current_week = current_week,
current_iso_year = current_iso_year,
previous_week = previous_week,
previous_iso_year = previous_iso_year,
report_date = report_date
))
}
#' Load weekly mosaic CI data
#' @param week_num Week number
#' @param year Year
#' @param mosaic_dir Directory containing weekly mosaics
#' @return Terra raster with CI band, or NULL if file not found
# Helper function to load CI raster for a specific field (handles both single-file and per-field architectures)
load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) {
# Check if this is per-field loading mode
is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field")
if (is_per_field) {
# Per-field architecture: load this specific field's mosaic
per_field_dir <- attr(ci_raster_or_obj, "per_field_dir")
week_file <- attr(ci_raster_or_obj, "week_file")
field_mosaic_path <- file.path(per_field_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
tryCatch({
field_mosaic <- terra::rast(field_mosaic_path)
# Extract CI band (5th band) if multi-band, otherwise use as-is
if (terra::nlyr(field_mosaic) >= 5) {
return(field_mosaic[[5]])
} else {
return(field_mosaic[[1]])
}
}, error = function(e) {
safe_log(paste("Error loading per-field mosaic for", field_name, ":", e$message), "WARNING")
return(NULL)
})
} else {
safe_log(paste("Per-field mosaic not found for", field_name), "WARNING")
return(NULL)
}
} else {
# Single-file architecture: crop from loaded raster
if (!is.null(field_vect)) {
return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE))
} else {
return(ci_raster_or_obj)
}
}
}
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
week_path <- file.path(mosaic_dir, week_file)
# FIRST: Try to load single-file mosaic (legacy approach)
if (file.exists(week_path)) {
tryCatch({
mosaic_raster <- terra::rast(week_path)
ci_raster <- mosaic_raster[[5]] # CI is the 5th band
names(ci_raster) <- "CI"
safe_log(paste("Loaded weekly mosaic (single-file):", week_file))
return(ci_raster)
}, error = function(e) {
safe_log(paste("Error loading mosaic:", e$message), "ERROR")
return(NULL)
})
}
# SECOND: Per-field architecture - store mosaic_dir path for later per-field loading
# Don't try to merge - just return the directory path so field-level functions can load per-field
if (dir.exists(mosaic_dir)) {
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
# Check if any field has this week's mosaic
found_any <- FALSE
for (field in field_dirs) {
field_mosaic_path <- file.path(mosaic_dir, field, week_file)
if (file.exists(field_mosaic_path)) {
found_any <- TRUE
break
}
}
if (found_any) {
safe_log(paste("Found per-field mosaics for week", sprintf("%02d", week_num), year,
"- will load per-field on demand"))
# Return a special object that indicates per-field loading is needed
# Store the mosaic_dir path in the raster's metadata
dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA)
attr(dummy_raster, "per_field_dir") <- mosaic_dir
attr(dummy_raster, "week_file") <- week_file
attr(dummy_raster, "is_per_field") <- TRUE
return(dummy_raster)
}
}
# If we get here, no mosaic found
safe_log(paste("Weekly mosaic not found for week", sprintf("%02d", week_num), year), "WARNING")
return(NULL)
}
# Function to prepare predictions with consistent naming and formatting
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
# 2. KPI Calculation Functions
# ---------------------------
#' Calculate Field Uniformity Summary KPI
#' @param ci_raster Current week CI raster
#' @param field_boundaries Field boundaries
#' @return List with summary data frame and field-level results data frame
calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) {
safe_log("Calculating Field Uniformity Summary KPI")
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
field_results_list <- vector("list", nrow(field_boundaries))
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
field_id <- paste0(field_name, "_", sub_field_name)
# Extract field boundary
field_vect <- field_boundaries_vect[i]
# Load appropriate CI raster using helper function
cropped_raster <- load_field_ci_raster(ci_raster, field_name, field_vect)
# Extract CI values for this field using helper function
if (!is.null(cropped_raster)) {
field_values <- extract_ci_values(cropped_raster, field_vect)
valid_values <- field_values[!is.na(field_values) & is.finite(field_values)]
} else {
valid_values <- c()
}
# If all valid values are 0 (cloud), fill with NA row
if (length(valid_values) == 0 || all(valid_values == 0)) {
field_results_list[[i]] <- data.frame(
field = field_name,
sub_field = sub_field_name,
field_id = field_id,
cv_value = NA_real_,
uniformity_level = NA_character_,
mean_ci = NA_real_,
std_ci = NA_real_
)
} else if (length(valid_values) > 1) {
# Calculate CV using existing function
cv_value <- calculate_cv(valid_values)
# Classify uniformity level
uniformity_level <- dplyr::case_when(
cv_value < 0.15 ~ "Excellent",
cv_value < 0.25 ~ "Good",
cv_value < 0.35 ~ "Moderate",
TRUE ~ "Poor"
)
field_results_list[[i]] <- data.frame(
field = field_name,
sub_field = sub_field_name,
field_id = field_id,
cv_value = cv_value,
uniformity_level = uniformity_level,
mean_ci = mean(valid_values),
std_ci = sd(valid_values)
)
} else {
# If only one valid value, fill with NA (not enough data for CV)
field_results_list[[i]] <- data.frame(
field = field_name,
sub_field = sub_field_name,
field_id = field_id,
cv_value = NA_real_,
uniformity_level = NA_character_,
mean_ci = mean(valid_values),
std_ci = NA_real_
)
}
}
field_results <- do.call(rbind, field_results_list)
# Create summary
uniformity_summary <- field_results %>%
dplyr::group_by(uniformity_level) %>%
dplyr::summarise(count = n(), .groups = 'drop') %>%
dplyr::mutate(percent = round((count / sum(count)) * 100, 1))
# Ensure all uniformity levels are represented
all_levels <- data.frame(uniformity_level = c("Excellent", "Good", "Moderate", "Poor"))
uniformity_summary <- merge(all_levels, uniformity_summary, all.x = TRUE)
uniformity_summary$count[is.na(uniformity_summary$count)] <- 0
uniformity_summary$percent[is.na(uniformity_summary$percent)] <- 0
return(list(summary = uniformity_summary, field_results = field_results))
}
#' Calculate Farm-wide Area Change Summary KPI
#' @param current_ci Current week CI raster
#' @param previous_ci Previous week CI raster
#' @param field_boundaries Field boundaries
#' @return List with summary data frame and field-level results data frame
calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) {
safe_log("Calculating Farm-wide Area Change Summary KPI")
if (is.null(previous_ci)) {
safe_log("Previous week data not available, using placeholder values", "WARNING")
summary_result <- data.frame(
change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"),
hectares = c(0, 0, 0, 0),
percent = c(0, 0, 0, 0)
)
field_results <- data.frame(
field = character(0),
sub_field = character(0),
improving_ha = numeric(0),
stable_ha = numeric(0),
declining_ha = numeric(0),
total_area_ha = numeric(0)
)
return(list(summary = summary_result, field_results = field_results))
}
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
total_improving_ha <- 0
total_stable_ha <- 0
total_declining_ha <- 0
total_area_ha <- 0
field_results <- data.frame()
# Process each field individually (like crop messaging does)
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
# Get field area from boundaries (same as crop messaging)
field_area_ha <- NA
if ("area_ha" %in% colnames(field_boundaries)) {
field_area_ha <- field_boundaries$area_ha[i]
} else if ("AREA_HA" %in% colnames(field_boundaries)) {
field_area_ha <- field_boundaries$AREA_HA[i]
} else if ("area" %in% colnames(field_boundaries)) {
field_area_ha <- field_boundaries$area[i]
} else {
# Always transform to equal-area projection for accurate area calculation
field_geom <- terra::project(field_boundaries_vect[i, ], "EPSG:6933") # Equal Earth projection
field_area_ha <- terra::expanse(field_geom) / 10000 # Convert to hectares
}
# Skip if no valid area
if (is.na(field_area_ha) || field_area_ha <= 0) {
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
improving_ha = NA_real_,
stable_ha = NA_real_,
declining_ha = NA_real_,
total_area_ha = NA_real_
))
next
}
# Extract field boundary
field_vect <- field_boundaries_vect[i]
# Load appropriate CI rasters using helper function
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
# Extract CI values for both weeks
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
current_values <- extract_ci_values(current_field_ci, field_vect)
previous_values <- extract_ci_values(previous_field_ci, field_vect)
} else {
current_values <- c()
previous_values <- c()
}
# Clean values
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
is.finite(current_values) & is.finite(previous_values)
current_clean <- current_values[valid_idx]
previous_clean <- previous_values[valid_idx]
if (length(current_clean) > 10) {
# Calculate change percentages (same as crop messaging)
change_percentages <- calculate_change_percentages(current_clean, previous_clean)
# Convert percentages to hectares (same as crop messaging)
improving_ha <- (change_percentages$positive_pct / 100) * field_area_ha
stable_ha <- (change_percentages$stable_pct / 100) * field_area_ha
declining_ha <- (change_percentages$negative_pct / 100) * field_area_ha
# Accumulate totals
total_improving_ha <- total_improving_ha + improving_ha
total_stable_ha <- total_stable_ha + stable_ha
total_declining_ha <- total_declining_ha + declining_ha
total_area_ha <- total_area_ha + field_area_ha
# Store field-level results
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
improving_ha = improving_ha,
stable_ha = stable_ha,
declining_ha = declining_ha,
total_area_ha = field_area_ha
))
} else {
# Not enough valid data, fill with NA row
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
improving_ha = NA_real_,
stable_ha = NA_real_,
declining_ha = NA_real_,
total_area_ha = field_area_ha
))
}
}
# Calculate percentages
if (total_area_ha > 0) {
improving_pct <- (total_improving_ha / total_area_ha) * 100
stable_pct <- (total_stable_ha / total_area_ha) * 100
declining_pct <- (total_declining_ha / total_area_ha) * 100
} else {
improving_pct <- stable_pct <- declining_pct <- 0
}
summary_result <- data.frame(
change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"),
hectares = round(c(total_improving_ha, total_stable_ha, total_declining_ha, total_area_ha), 1),
percent = round(c(improving_pct, stable_pct, declining_pct, 100.0), 1)
)
return(list(summary = summary_result, field_results = field_results))
}
#' Calculate TCH Forecasted KPI (using actual yield prediction models)
#' @param field_boundaries Field boundaries
#' @param harvesting_data Harvesting data with tonnage_ha
#' @param cumulative_CI_vals_dir Directory with cumulative CI data
#' @return Data frame with yield forecast groups and predictions
calculate_tch_forecasted_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) {
safe_log("Calculating TCH Forecasted KPI using yield prediction models")
# Helper function for fallback return
create_fallback_result <- function(field_boundaries) {
# Convert to SpatVector if needed (for terra::project)
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries <- terra::vect(field_boundaries)
}
field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection
field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares
total_area <- sum(field_areas)
summary_result <- data.frame(
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
count = c(0, 0, 0, nrow(field_boundaries)),
value = c(0, 0, 0, round(total_area, 1))
)
field_results <- data.frame(
field = character(0),
sub_field = character(0),
Age_days = numeric(0),
yield_forecast_t_ha = numeric(0),
season = numeric(0)
)
return(list(summary = summary_result, field_results = field_results))
}
tryCatch({
# Check if tonnage_ha is empty
if (is.null(harvesting_data) || !("tonnage_ha" %in% names(harvesting_data)) || all(is.na(harvesting_data$tonnage_ha))) {
safe_log("Lacking historic harvest data, using placeholder yield prediction", "WARNING")
return(create_fallback_result(field_boundaries)) }
# Load CI quadrant data and fill missing values
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup()
# Rename year column to season for consistency
harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year)
# Join CI and yield data
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
# Define predictors and response variables
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
response <- "tonnage_ha"
# Prepare test and validation datasets
CI_and_yield_test <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
CI_and_yield_validation <- CI_and_yield_test
# Prepare prediction dataset (fields without harvest data, mature fields only)
prediction_yields <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Filter for mature fields BEFORE prediction
# Check if we have training data
if (nrow(CI_and_yield_test) == 0) {
safe_log("No training data available for yield prediction", "WARNING")
return(create_fallback_result(field_boundaries))
}
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = TRUE
)
# Train the model with feature selection
set.seed(202) # For reproducibility
model_ffs_rf <- CAST::ffs(
CI_and_yield_test[, predictors],
CI_and_yield_test[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5,
na.rm = TRUE
)
# Predict yields for the validation dataset
pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
# Calculate RMSE for validation predictions
rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_validation$tonnage_ha)^2, na.rm = TRUE))
safe_log(paste("Yield prediction RMSE:", round(rmse_value, 2), "t/ha"))
# Predict yields for the current season (focus on mature fields over 240 days / 8 months)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days >= 240) %>% # Changed from > 1 to >= 240 (8 months minimum)
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
# Calculate summary statistics for KPI
if (nrow(pred_rf_current_season) > 0) {
# Debug: Log the predicted values
safe_log(paste("Predicted yields summary:", paste(summary(pred_rf_current_season$predicted_Tcha), collapse = ", ")))
safe_log(paste("Number of predictions:", nrow(pred_rf_current_season)))
safe_log("Sample predictions:", paste(head(pred_rf_current_season$predicted_Tcha, 5), collapse = ", "))
# Calculate quartiles for grouping
yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
safe_log(paste("Yield quartiles (25%, 50%, 75%):", paste(round(yield_quartiles, 1), collapse = ", ")))
# Count fields in each group
top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE)
average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE)
lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE)
# Calculate total area
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
# Use sf::st_transform instead of terra::project for sf objects
if (inherits(field_boundaries, "sf")) {
field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933") # Equal Earth projection
field_areas <- sf::st_area(field_boundaries_projected) / 10000 # Convert m² to hectares
} else {
field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933") # Equal Earth projection
field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares
}
total_area <- sum(as.numeric(field_areas))
safe_log(paste("Total area calculated:", round(total_area, 1), "hectares"))
result <- data.frame(
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)),
value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), round(yield_quartiles[1], 1), round(total_area, 1))
)
safe_log("Returning actual yield predictions")
safe_log("Final result:")
print(result)
# Prepare field-level results
field_level_results <- pred_rf_current_season %>%
dplyr::select(field, Age_days, predicted_Tcha, season) %>%
dplyr::rename(yield_forecast_t_ha = predicted_Tcha)
return(list(summary = result, field_results = field_level_results))
} else {
safe_log("No yield predictions generated", "WARNING")
return(list(summary = create_fallback_result(field_boundaries), field_results = data.frame()))
}
}, error = function(e) {
safe_log(paste("Error in TCH yield prediction:", e$message), "ERROR")
return(create_fallback_result(field_boundaries))
})
}
#' Calculate Growth Decline Index KPI
#' @param current_ci Current week CI raster
#' @param previous_ci Previous week CI raster
#' @param field_boundaries Field boundaries
#' @return List with summary data frame and field-level results data frame
calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundaries) {
safe_log("Calculating Growth Decline Index KPI")
if (is.null(previous_ci)) {
safe_log("Previous week data not available for growth decline analysis", "WARNING")
# Return structure indicating no data available
summary_result <- data.frame(
risk_level = c("No data", "Data unavailable", "Check next week", "Previous week missing"),
count = c(0, 0, 0, 0),
percent = c(0, 0, 0, 100)
)
field_results <- data.frame(
field = character(0),
sub_field = character(0),
risk_level = character(0),
risk_score = numeric(0),
decline_severity = numeric(0),
spatial_weight = numeric(0)
)
return(list(summary = summary_result, field_results = field_results))
}
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
field_results <- data.frame()
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
field_vect <- field_boundaries_vect[i]
# Load appropriate CI rasters using helper function
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
# Extract CI values for both weeks
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
current_values <- extract_ci_values(current_field_ci, field_vect)
previous_values <- extract_ci_values(previous_field_ci, field_vect)
} else {
current_values <- c()
previous_values <- c()
}
# Extract CI values for both weeks
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
current_values <- extract_ci_values(current_field_ci, field_vect)
previous_values <- extract_ci_values(previous_field_ci, field_vect)
} else {
current_values <- c()
previous_values <- c()
}
# Clean values
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
is.finite(current_values) & is.finite(previous_values)
current_clean <- current_values[valid_idx]
previous_clean <- previous_values[valid_idx]
if (length(current_clean) > 10) {
# Calculate CI change
ci_change <- current_clean - previous_clean
mean_change <- mean(ci_change)
# Calculate spatial metrics
spatial_result <- calculate_spatial_autocorrelation(current_ci, field_vect)
cv_value <- calculate_cv(current_clean)
# Determine risk level based on CI decline and spatial distribution
decline_severity <- ifelse(mean_change < -1.0, abs(mean_change), 0)
spatial_weight <- ifelse(!is.na(spatial_result$morans_i),
(1 - abs(spatial_result$morans_i)) * cv_value,
cv_value)
risk_score <- decline_severity * (1 + spatial_weight)
risk_level <- dplyr::case_when(
risk_score < 0.5 ~ "Low",
risk_score < 1.5 ~ "Moderate",
risk_score < 3.0 ~ "High",
TRUE ~ "Very-high"
)
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
risk_level = risk_level,
risk_score = risk_score,
decline_severity = decline_severity,
spatial_weight = spatial_weight,
morans_i = spatial_result$morans_i # Add Moran's I to results
))
} else {
# Not enough valid data, fill with NA row
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
risk_level = NA_character_,
risk_score = NA_real_,
decline_severity = NA_real_,
spatial_weight = NA_real_,
morans_i = NA_real_
))
}
}
# Summarize results
risk_summary <- field_results %>%
dplyr::group_by(risk_level) %>%
dplyr::summarise(count = n(), .groups = 'drop') %>%
dplyr::mutate(percent = round((count / sum(count)) * 100, 1))
# Ensure all risk levels are represented
all_levels <- data.frame(risk_level = c("Low", "Moderate", "High", "Very-high"))
risk_summary <- merge(all_levels, risk_summary, all.x = TRUE)
risk_summary$count[is.na(risk_summary$count)] <- 0
risk_summary$percent[is.na(risk_summary$percent)] <- 0
return(list(summary = risk_summary, field_results = field_results))
}
#' Calculate Weed Presence Score KPI
#' @param current_ci Current week CI raster
#' @param previous_ci Previous week CI raster
#' @param field_boundaries Field boundaries
#' @param harvesting_data Harvesting data with field ages (DOY)
#' @param cumulative_CI_vals_dir Directory with cumulative CI data to get current field ages
#' @return List with summary data frame and field-level results data frame
calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundaries, harvesting_data = NULL, cumulative_CI_vals_dir = NULL) {
safe_log("Calculating Weed Presence Score KPI")
# Load field age data from CI_quadrant if available
field_ages <- NULL
if (!is.null(cumulative_CI_vals_dir)) {
tryCatch({
CI_quadrant <- readRDS(file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
# Get most recent DOY (age) for each field FROM THE CURRENT SEASON ONLY
# First identify the current season (most recent season with data)
current_seasons <- CI_quadrant %>%
dplyr::group_by(field, sub_field) %>%
dplyr::filter(season == max(season, na.rm = TRUE)) %>%
dplyr::ungroup()
# Get the maximum DOY from current season for each field
field_ages <- current_seasons %>%
dplyr::group_by(field, sub_field) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, DOY) %>%
dplyr::ungroup()
safe_log(paste("Loaded field ages for", nrow(field_ages), "fields"))
}, error = function(e) {
safe_log(paste("Could not load field ages:", e$message), "WARNING")
})
}
if (is.null(previous_ci)) {
safe_log("Previous week data not available for weed analysis", "WARNING")
summary_result <- data.frame(
weed_risk_level = c("Low", "Moderate", "High"),
field_count = c(0, 0, 0),
percent = c(NA_real_, NA_real_, NA_real_)
)
field_results <- data.frame(
field = character(0),
sub_field = character(0),
weed_risk_level = character(0),
rapid_growth_pct = numeric(0),
rapid_growth_pixels = numeric(0)
)
return(list(summary = summary_result, field_results = field_results))
}
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
field_results <- data.frame()
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
field_vect <- field_boundaries_vect[i]
# Check field age (8 months = 240 days)
field_age <- NA
if (!is.null(field_ages)) {
age_row <- field_ages %>%
dplyr::filter(field == field_name, sub_field == sub_field_name)
if (nrow(age_row) > 0) {
field_age <- age_row$DOY[1]
}
}
# If field is >= 240 days old (8 months), canopy should be closed
if (!is.na(field_age) && field_age >= 240) {
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
weed_risk_level = "Canopy closed - Low weed risk",
rapid_growth_pct = 0,
rapid_growth_pixels = 0,
field_age_days = field_age
))
next # Skip to next field
}
# Extract CI values for both weeks (using helper to get CI band only)
current_field_ci <- load_field_ci_raster(current_ci, field_name, field_vect)
previous_field_ci <- load_field_ci_raster(previous_ci, field_name, field_vect)
# Extract CI values for both weeks
if (!is.null(current_field_ci) && !is.null(previous_field_ci)) {
current_values <- extract_ci_values(current_field_ci, field_vect)
previous_values <- extract_ci_values(previous_field_ci, field_vect)
} else {
current_values <- c()
previous_values <- c()
}
# Clean values
valid_idx <- !is.na(current_values) & !is.na(previous_values) &
is.finite(current_values) & is.finite(previous_values)
current_clean <- current_values[valid_idx]
previous_clean <- previous_values[valid_idx]
if (length(current_clean) > 10) {
# Calculate CI change
ci_change <- current_clean - previous_clean
# Detect rapid growth (potential weeds) - Changed from 1.5 to 2.0 CI units
rapid_growth_pixels <- sum(ci_change > 2.0)
total_pixels <- length(ci_change)
rapid_growth_pct <- (rapid_growth_pixels / total_pixels) * 100
# Classify weed risk - Updated thresholds: Low <10%, Moderate 10-25%, High >25%
weed_risk <- dplyr::case_when(
rapid_growth_pct < 10 ~ "Low",
rapid_growth_pct < 25 ~ "Moderate",
TRUE ~ "High"
)
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
weed_risk_level = weed_risk,
rapid_growth_pct = rapid_growth_pct,
rapid_growth_pixels = rapid_growth_pixels,
field_age_days = ifelse(is.na(field_age), NA, field_age)
))
} else {
# Not enough valid data, fill with NA row
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
weed_risk_level = NA_character_,
rapid_growth_pct = NA_real_,
rapid_growth_pixels = NA_real_,
field_age_days = ifelse(is.na(field_age), NA, field_age)
))
}
}
# Summarize results
weed_summary <- field_results %>%
dplyr::group_by(weed_risk_level) %>%
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
# Ensure all risk levels are represented (including canopy closed)
all_levels <- data.frame(weed_risk_level = c("Low", "Moderate", "High", "Canopy closed - Low weed risk"))
weed_summary <- merge(all_levels, weed_summary, all.x = TRUE)
weed_summary$field_count[is.na(weed_summary$field_count)] <- 0
weed_summary$percent[is.na(weed_summary$percent)] <- 0
return(list(summary = weed_summary, field_results = field_results))
}
#' Calculate Gap Filling Score KPI (placeholder)
#' @param ci_raster Current week CI raster
#' @param field_boundaries Field boundaries
#' @return List with summary data frame and field-level results data frame
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
safe_log("Calculating Gap Filling Score KPI (placeholder)")
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
field_results <- data.frame()
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
field_vect <- field_boundaries_vect[i]
# Load appropriate CI raster using helper function
field_ci <- load_field_ci_raster(ci_raster, field_name, field_vect)
# Extract CI values using helper function
if (!is.null(field_ci)) {
ci_values <- extract_ci_values(field_ci, field_vect)
} else {
ci_values <- c()
}
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
if (length(valid_values) > 1) {
# Gap score using 2σ below median to detect outliers
median_ci <- median(valid_values)
sd_ci <- sd(valid_values)
outlier_threshold <- median_ci - (2 * sd_ci)
low_ci_pixels <- sum(valid_values < outlier_threshold)
total_pixels <- length(valid_values)
gap_score <- (low_ci_pixels / total_pixels) * 100
# Classify gap severity
gap_level <- dplyr::case_when(
gap_score < 10 ~ "Minimal",
gap_score < 25 ~ "Moderate",
TRUE ~ "Significant"
)
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
gap_level = gap_level,
gap_score = gap_score,
mean_ci = mean(valid_values),
outlier_threshold = outlier_threshold
))
} else {
# Not enough valid data, fill with NA row
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
gap_level = NA_character_,
gap_score = NA_real_,
mean_ci = NA_real_,
outlier_threshold = NA_real_
))
}
}
# Summarize results
gap_summary <- field_results %>%
dplyr::group_by(gap_level) %>%
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
return(list(summary = gap_summary, field_results = field_results))
}
# 3. KPI Export and Formatting Functions
# -------------------------------------
#' Create summary tables for report front page
#' @param kpi_results List containing all KPI results
#' @return List of formatted summary tables
create_summary_tables <- function(kpi_results) {
summary_tables <- list()
# 1. Field Uniformity Summary Table
uniformity_summary <- kpi_results$field_uniformity_summary %>%
dplyr::rename(`Uniformity Level` = uniformity_level, Count = count, Percent = percent)
summary_tables$field_uniformity_summary <- uniformity_summary
# 2. Farm-wide Area Change Summary (already in correct format)
summary_tables$area_change_summary <- kpi_results$area_change %>%
dplyr::rename(`Change Type` = change_type, Hectares = hectares, Percent = percent)
# 3. TCH Forecasted Summary (already in correct format)
summary_tables$tch_forecasted_summary <- kpi_results$tch_forecasted %>%
dplyr::rename(`Field Groups` = field_groups, Count = count, Value = value)
# 4. Growth Decline Index Summary (already in correct format)
summary_tables$growth_decline_summary <- kpi_results$growth_decline %>%
dplyr::rename(`Risk Level` = risk_level, Count = count, Percent = percent)
# 5. Weed Presence Score Summary (already in correct format)
summary_tables$weed_presence_summary <- kpi_results$weed_presence %>%
dplyr::rename(`Weed Risk Level` = weed_risk_level, `Field Count` = field_count, Percent = percent)
# 6. Gap Filling Score Summary (already in correct format)
summary_tables$gap_filling_summary <- kpi_results$gap_filling %>%
dplyr::rename(`Gap Level` = gap_level, `Field Count` = field_count, Percent = percent)
return(summary_tables)
}
#' Create detailed field-by-field table for report end section
#' @param kpi_results List containing all KPI results
#' @param field_boundaries_sf Field boundaries (sf or SpatVector)
#' @return Data frame with field-by-field KPI details
create_field_detail_table <- function(kpi_results, field_boundaries_sf = NULL) {
# Define risk levels for consistent use
risk_levels <- c("Low", "Moderate", "High", "Very-high")
weed_levels <- c("Low", "Moderate", "High")
# Start with field uniformity as base (has all fields)
field_details <- kpi_results$field_uniformity %>%
dplyr::select(field, sub_field, field_id, uniformity_level, mean_ci, cv_value) %>%
dplyr::rename(
Field = field,
`Sub Field` = sub_field,
`Field ID` = field_id,
`Growth Uniformity` = uniformity_level,
`Mean CI` = mean_ci,
`CV Value` = cv_value
)
# Since subfield = field in this system, aggregate by field to avoid duplicates
# Take the first subfield for each field (they should be equivalent)
field_details <- field_details %>%
dplyr::group_by(Field) %>%
dplyr::slice(1) %>% # Take first row for each field
dplyr::ungroup() %>%
dplyr::select(-`Sub Field`, -`Field ID`) # Remove subfield columns since they're redundant
# Add field size - calculate from actual geometry
if (!is.null(field_boundaries_sf)) {
# Convert to sf if it's SpatVector
if (inherits(field_boundaries_sf, "SpatVector")) {
field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf)
}
# Calculate actual areas in hectares
field_areas <- field_boundaries_sf %>%
dplyr::mutate(area_ha = as.numeric(sf::st_area(geometry)) / 10000) %>%
sf::st_drop_geometry() %>%
dplyr::group_by(field) %>%
dplyr::summarise(area_ha = sum(area_ha), .groups = "drop") %>%
dplyr::rename(Field = field, `Field Size (ha)` = area_ha) %>%
dplyr::mutate(`Field Size (ha)` = round(`Field Size (ha)`, 1))
# Join with field_details
field_details <- field_details %>%
dplyr::left_join(field_areas, by = "Field")
} else {
# Fallback to placeholder if boundaries not provided
field_details$`Field Size (ha)` <- NA_real_
}
# Add yield prediction from TCH forecasted field results
# Only include predictions for fields that are mature (>= 240 days)
if (!is.null(kpi_results$tch_forecasted_field_results) && nrow(kpi_results$tch_forecasted_field_results) > 0) {
yield_data <- kpi_results$tch_forecasted_field_results %>%
dplyr::select(field, yield_forecast_t_ha) %>%
dplyr::rename(`Yield Forecast (t/ha)` = yield_forecast_t_ha)
field_details <- dplyr::left_join(field_details, yield_data, by = c("Field" = "field"))
# Keep NAs as NA for fields that are too young to predict
} else {
# No predictions available, set all to NA
field_details$`Yield Forecast (t/ha)` <- NA_real_
}
# Add gap presence score from gap filling field results (aggregate by field)
if (!is.null(kpi_results$gap_filling_field_results) && nrow(kpi_results$gap_filling_field_results) > 0) {
gap_data <- kpi_results$gap_filling_field_results %>%
dplyr::group_by(field) %>%
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE)) %>% # Average across subfields
dplyr::rename(`Gap Score` = gap_score)
field_details <- dplyr::left_join(field_details, gap_data, by = c("Field" = "field"))
} else {
# Gap score data unavailable - set to NA_real_ for deterministic handling
field_details$`Gap Score` <- NA_real_
}
# Add growth decline risk from growth decline field results (aggregate by field)
if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) {
decline_data <- kpi_results$growth_decline_field_results %>%
dplyr::group_by(field) %>%
dplyr::summarise(risk_level = dplyr::first(risk_level)) %>% # Take first risk level (should be consistent)
dplyr::rename(`Decline Risk` = risk_level)
field_details <- dplyr::left_join(field_details, decline_data, by = c("Field" = "field"))
} else {
# Decline risk data unavailable - set to NA for deterministic handling
field_details$`Decline Risk` <- NA
}
# Add Moran's I spatial autocorrelation from growth decline field results (aggregate by field)
if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) {
moran_data <- kpi_results$growth_decline_field_results %>%
dplyr::group_by(field) %>%
dplyr::summarise(morans_i = mean(morans_i, na.rm = TRUE)) %>% # Average Moran's I across subfields
dplyr::rename(`Moran's I` = morans_i)
field_details <- dplyr::left_join(field_details, moran_data, by = c("Field" = "field"))
} else {
# Moran's I data unavailable - set to NA_real_ for deterministic handling
field_details$`Moran's I` <- NA_real_
}
# Add weed risk from weed presence field results (aggregate by field)
if (!is.null(kpi_results$weed_presence_field_results) && nrow(kpi_results$weed_presence_field_results) > 0) {
weed_data <- kpi_results$weed_presence_field_results %>%
dplyr::group_by(field) %>%
dplyr::summarise(weed_risk_level = dplyr::first(weed_risk_level)) %>% # Take first weed risk (should be consistent)
dplyr::rename(`Weed Risk` = weed_risk_level)
field_details <- dplyr::left_join(field_details, weed_data, by = c("Field" = "field"))
} else {
# Weed risk data unavailable - set to NA for deterministic handling
field_details$`Weed Risk` <- NA
}
# Keep any remaining NAs as-is for all fields (NA indicates data unavailable)
# Do not fill with random values - let downstream code handle NA values deterministically
# Reorder columns for better presentation
field_details <- field_details %>%
dplyr::select(`Field`, `Field Size (ha)`, `Growth Uniformity`,
`Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`,
`Moran's I`, `Mean CI`, `CV Value`)
return(field_details)
}
#' Create field-specific KPI text for individual field pages
#' @param field_id Field identifier (e.g., "A_1")
#' @param kpi_results List containing all KPI results
#' @return Character string with field-specific KPI summary
create_field_kpi_text <- function(field_id, kpi_results) {
# Extract field-specific data from field uniformity
field_data <- kpi_results$field_uniformity %>%
dplyr::filter(field_id == !!field_id)
if (nrow(field_data) == 0) {
return(paste("Field", field_id, ": Data not available"))
}
# Get field metrics
uniformity <- field_data$uniformity_level[1]
mean_ci <- round(field_data$mean_ci[1], 2)
cv <- round(field_data$cv_value[1], 3)
# Create summary text
kpi_text <- paste0(
"Field ", field_id, " KPIs: ",
"Uniformity: ", uniformity, " (CV=", cv, "), ",
"Mean CI: ", mean_ci, ", ",
"Status: ", ifelse(mean_ci > 3, "Good Growth",
ifelse(mean_ci > 1.5, "Moderate Growth", "Monitoring Required"))
)
return(kpi_text)
}
#' Export all KPI data in multiple formats for R Markdown integration
#' @param kpi_results List containing all KPI results
#' @param output_dir Directory to save exported files
#' @param project_name Project name for file naming
#' @return List of file paths for exported data
export_kpi_data <- function(kpi_results, output_dir, project_name = "smartcane") {
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
exported_files <- list()
week_suffix <- paste0("week", kpi_results$metadata$current_week)
date_suffix <- format(kpi_results$metadata$report_date, "%Y%m%d")
# 1. Export summary tables for front page
summary_tables <- create_summary_tables(kpi_results)
summary_file <- file.path(output_dir, paste0(project_name, "_kpi_summary_tables_", week_suffix, ".rds"))
saveRDS(summary_tables, summary_file)
exported_files$summary_tables <- summary_file
# 2. Export detailed field table for end section
# Note: field_boundaries_sf should be passed from calculate_all_kpis()
field_details <- create_field_detail_table(kpi_results, kpi_results$field_boundaries_sf)
detail_file <- file.path(output_dir, paste0(project_name, "_field_details_", week_suffix, ".rds"))
saveRDS(field_details, detail_file)
exported_files$field_details <- detail_file
# 3. Export raw KPI results
raw_file <- file.path(output_dir, paste0(project_name, "_kpi_raw_", week_suffix, ".rds"))
saveRDS(kpi_results, raw_file)
exported_files$raw_kpi_data <- raw_file
# 4. Export field-level KPI tables
field_tables_dir <- file.path(output_dir, "field_level")
if (!dir.exists(field_tables_dir)) {
dir.create(field_tables_dir, recursive = TRUE)
}
# Export each field-level table
field_kpi_names <- c(
"field_uniformity" = "field_uniformity",
"area_change" = "area_change_field_results",
"tch_forecasted" = "tch_forecasted_field_results",
"growth_decline" = "growth_decline_field_results",
"weed_presence" = "weed_presence_field_results",
"gap_filling" = "gap_filling_field_results"
)
for (kpi_name in names(field_kpi_names)) {
field_data <- kpi_results[[field_kpi_names[kpi_name]]]
if (!is.null(field_data) && nrow(field_data) > 0) {
# RDS file
rds_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".rds"))
saveRDS(field_data, rds_file)
exported_files[[paste0(kpi_name, "_field_rds")]] <- rds_file
# CSV file
csv_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".csv"))
readr::write_csv(field_data, csv_file)
exported_files[[paste0(kpi_name, "_field_csv")]] <- csv_file
}
}
# 4. Export CSV versions for manual inspection
csv_dir <- file.path(output_dir, "csv")
if (!dir.exists(csv_dir)) {
dir.create(csv_dir, recursive = TRUE)
}
# Export each summary table as CSV
for (table_name in names(summary_tables)) {
csv_file <- file.path(csv_dir, paste0(table_name, "_", week_suffix, ".csv"))
readr::write_csv(summary_tables[[table_name]], csv_file)
exported_files[[paste0(table_name, "_csv")]] <- csv_file
}
# Export field details as CSV
field_csv <- file.path(csv_dir, paste0("field_details_", week_suffix, ".csv"))
readr::write_csv(field_details, field_csv)
exported_files$field_details_csv <- field_csv
# 5. Create metadata file
metadata_file <- file.path(output_dir, paste0(project_name, "_kpi_metadata_", week_suffix, ".txt"))
metadata_text <- paste0(
"SmartCane KPI Export Metadata\n",
"=============================\n",
"Project: ", project_name, "\n",
"Report Date: ", kpi_results$metadata$report_date, "\n",
"Current Week: ", kpi_results$metadata$current_week, "\n",
"Previous Week: ", kpi_results$metadata$previous_week, "\n",
"Year: ", kpi_results$metadata$year, "\n",
"Total Fields: ", kpi_results$metadata$total_fields, "\n",
"Calculation Time: ", kpi_results$metadata$calculation_time, "\n\n",
"Exported Files:\n",
"- Summary Tables: ", basename(summary_file), "\n",
"- Field Details: ", basename(detail_file), "\n",
"- Raw KPI Data: ", basename(raw_file), "\n",
"- Field-Level Tables: field_level/ directory\n",
"- CSV Directory: csv/\n\n",
"KPI Summary:\n",
"- Field Uniformity: ", nrow(summary_tables$field_uniformity_summary), " categories\n",
"- Area Change: ", nrow(summary_tables$area_change_summary), " change types\n",
"- TCH Forecasted: ", nrow(summary_tables$tch_forecasted_summary), " field groups\n",
"- Growth Decline: ", nrow(summary_tables$growth_decline_summary), " risk levels\n",
"- Weed Presence: ", nrow(summary_tables$weed_presence_summary), " risk levels\n",
"- Gap Filling: ", nrow(summary_tables$gap_filling_summary), " gap levels\n"
)
writeLines(metadata_text, metadata_file)
exported_files$metadata <- metadata_file
safe_log(paste("KPI data exported to", output_dir))
safe_log(paste("Total files exported:", length(exported_files)))
return(exported_files)
}
# 4. Main KPI Calculation Function
# -------------------------------
#' Calculate all KPIs for a given date
#' @param report_date Date to calculate KPIs for (default: today)
#' @param output_dir Directory to save KPI results
#' @param field_boundaries_sf Field boundaries (sf or SpatVector)
#' @param harvesting_data Harvesting data with tonnage_ha
#' @param cumulative_CI_vals_dir Directory with cumulative CI data
#' @param weekly_CI_mosaic Directory with weekly CI mosaics
#' @param reports_dir Directory for output reports
#' @param project_dir Project directory name
#' @return List containing all KPI results
calculate_all_kpis <- function(report_date = Sys.Date(),
output_dir = NULL,
field_boundaries_sf,
harvesting_data,
cumulative_CI_vals_dir,
weekly_CI_mosaic,
reports_dir,
project_dir) {
safe_log("=== STARTING KPI CALCULATION ===")
safe_log(paste("Report date:", report_date))
weeks <- calculate_week_numbers(report_date)
safe_log(paste("Current week:", weeks$current_week, "Previous week:", weeks$previous_week))
# Load weekly mosaics
current_ci <- load_weekly_ci_mosaic(weeks$current_week, weeks$current_iso_year, weekly_CI_mosaic)
previous_ci <- load_weekly_ci_mosaic(weeks$previous_week, weeks$previous_iso_year, weekly_CI_mosaic)
if (is.null(current_ci)) {
stop("Current week CI mosaic is required but not found")
}
if (is.null(field_boundaries_sf)) {
stop("Field boundaries not loaded. Check parameters_project.R initialization.")
}
# Calculate all KPIs
kpi_results <- list()
# 1. Field Uniformity Summary
uniformity_result <- calculate_field_uniformity_kpi(current_ci, field_boundaries_sf)
kpi_results$field_uniformity <- uniformity_result$field_results
kpi_results$field_uniformity_summary <- uniformity_result$summary
# 2. Farm-wide Area Change Summary
area_change_result <- calculate_area_change_kpi(current_ci, previous_ci, field_boundaries_sf)
kpi_results$area_change <- area_change_result$summary
kpi_results$area_change_field_results <- area_change_result$field_results
# 3. TCH Forecasted
tch_result <- calculate_tch_forecasted_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir)
kpi_results$tch_forecasted <- tch_result$summary
kpi_results$tch_forecasted_field_results <- tch_result$field_results
# 4. Growth Decline Index
growth_decline_result <- calculate_growth_decline_kpi(current_ci, previous_ci, field_boundaries_sf)
kpi_results$growth_decline <- growth_decline_result$summary
kpi_results$growth_decline_field_results <- growth_decline_result$field_results
# 5. Weed Presence Score (with field age filtering)
weed_presence_result <- calculate_weed_presence_kpi(current_ci, previous_ci, field_boundaries_sf,
harvesting_data = harvesting_data,
cumulative_CI_vals_dir = cumulative_CI_vals_dir)
kpi_results$weed_presence <- weed_presence_result$summary
kpi_results$weed_presence_field_results <- weed_presence_result$field_results
# 6. Gap Filling Score
gap_filling_result <- calculate_gap_filling_kpi(current_ci, field_boundaries_sf)
kpi_results$gap_filling <- gap_filling_result$summary
kpi_results$gap_filling_field_results <- gap_filling_result$field_results
# Add metadata and field boundaries for later use
kpi_results$metadata <- list(
report_date = report_date,
current_week = weeks$current_week,
previous_week = weeks$previous_week,
year = weeks$current_iso_year,
calculation_time = Sys.time(),
total_fields = nrow(field_boundaries_sf)
)
kpi_results$field_boundaries_sf <- field_boundaries_sf
# Save results if output directory specified
if (!is.null(output_dir)) {
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
# Export KPI data in multiple formats for R Markdown integration
exported_files <- export_kpi_data(kpi_results, output_dir, project_dir)
kpi_results$exported_files <- exported_files
# Also save raw results
week_suffix <- paste0("week", weeks$current_week)
output_file <- file.path(output_dir, paste0("kpi_results_", week_suffix, ".rds"))
saveRDS(kpi_results, output_file)
safe_log(paste("KPI results saved to:", output_file))
}
safe_log("=== KPI CALCULATION COMPLETED ===")
return(kpi_results)
}