# KPI_UTILS.R # =========== # Utility functions for SmartCane KPI calculation workflow. # These functions support the calculation of 6 key performance indicators: # 1. Field Uniformity Summary # 2. Farm-wide Area Change Summary # 3. TCH Forecasted # 4. Growth Decline Index # 5. Weed Presence Score # 6. Gap Filling Score # Note: This file depends on functions from crop_messaging_utils.R: # - safe_log() # - calculate_cv() # - calculate_spatial_autocorrelation() # - calculate_change_percentages() # 1. Helper Functions # ----------------- #' Extract CI band only from a multi-band raster #' @param ci_raster CI raster (can be multi-band with Red, Green, Blue, NIR, CI) #' @param field_vect Field boundary as SpatVector #' @return Vector of CI values extract_ci_values <- function(ci_raster, field_vect) { extracted <- terra::extract(ci_raster, field_vect, fun = NULL) # Check if CI column exists (multi-band mosaic) if ("CI" %in% names(extracted)) { return(extracted[, "CI"]) } else if (ncol(extracted) > 1) { # Fallback: assume last column is CI (after ID, Red, Green, Blue, NIR) return(extracted[, ncol(extracted)]) } else { # Single band raster - return as is return(extracted[, 1]) } } #' Calculate current and previous week numbers using ISO 8601 week numbering #' @param report_date Date to calculate weeks for (default: today) #' @return List with current_week and previous_week numbers calculate_week_numbers <- function(report_date = Sys.Date()) { # Use ISO 8601 week numbering (%V) - weeks start on Monday current_week <- as.numeric(format(report_date, "%V")) previous_week <- current_week - 1 # Handle year boundary if (previous_week < 1) { previous_week <- 52 } return(list( current_week = current_week, previous_week = previous_week, year = as.numeric(format(report_date, "%Y")) )) } #' Load weekly mosaic CI data #' @param week_num Week number #' @param year Year #' @param mosaic_dir Directory containing weekly mosaics #' @return Terra raster with CI band, or NULL if file not found load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) { week_file <- sprintf("week_%02d_%d.tif", week_num, year) week_path <- file.path(mosaic_dir, week_file) if (!file.exists(week_path)) { safe_log(paste("Weekly mosaic not found:", week_path), "WARNING") return(NULL) } tryCatch({ mosaic_raster <- terra::rast(week_path) ci_raster <- mosaic_raster[[5]] # CI is the 5th band names(ci_raster) <- "CI" safe_log(paste("Loaded weekly mosaic:", week_file)) return(ci_raster) }, error = function(e) { safe_log(paste("Error loading mosaic:", e$message), "ERROR") return(NULL) }) } # Function to prepare predictions with consistent naming and formatting prepare_predictions <- function(predictions, newdata) { return(predictions %>% as.data.frame() %>% dplyr::rename(predicted_Tcha = ".") %>% dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, Age_days = newdata$DOY, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season ) %>% dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>% dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) } # 2. KPI Calculation Functions # --------------------------- #' Calculate Field Uniformity Summary KPI #' @param ci_raster Current week CI raster #' @param field_boundaries Field boundaries #' @return List with summary data frame and field-level results data frame calculate_field_uniformity_kpi <- function(ci_raster, field_boundaries) { safe_log("Calculating Field Uniformity Summary KPI") # Handle both sf and SpatVector inputs if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) } else { field_boundaries_vect <- field_boundaries } field_results <- data.frame() for (i in seq_len(nrow(field_boundaries))) { field_name <- field_boundaries$field[i] sub_field_name <- field_boundaries$sub_field[i] field_id <- paste0(field_name, "_", sub_field_name) # Extract field boundary field_vect <- field_boundaries_vect[i] # crop ci_raster with field_vect and use that for ci_values cropped_raster <- terra::crop(ci_raster, field_vect, mask = TRUE) # Extract CI values for this field using helper function field_values <- extract_ci_values(cropped_raster, field_vect) valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] # If all valid values are 0 (cloud), fill with NA row if (length(valid_values) == 0 || all(valid_values == 0)) { field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, field_id = field_id, cv_value = NA_real_, uniformity_level = NA_character_, mean_ci = NA_real_, std_ci = NA_real_ )) } else if (length(valid_values) > 1) { # Calculate CV using existing function cv_value <- calculate_cv(valid_values) # Classify uniformity level uniformity_level <- dplyr::case_when( cv_value < 0.15 ~ "Excellent", cv_value < 0.25 ~ "Good", cv_value < 0.35 ~ "Moderate", TRUE ~ "Poor" ) field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, field_id = field_id, cv_value = cv_value, uniformity_level = uniformity_level, mean_ci = mean(valid_values), std_ci = sd(valid_values) )) } else { # If only one valid value, fill with NA (not enough data for CV) field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, field_id = field_id, cv_value = NA_real_, uniformity_level = NA_character_, mean_ci = mean(valid_values), std_ci = NA_real_ )) } } # Create summary uniformity_summary <- field_results %>% dplyr::group_by(uniformity_level) %>% dplyr::summarise(count = n(), .groups = 'drop') %>% dplyr::mutate(percent = round((count / sum(count)) * 100, 1)) # Ensure all uniformity levels are represented all_levels <- data.frame(uniformity_level = c("Excellent", "Good", "Moderate", "Poor")) uniformity_summary <- merge(all_levels, uniformity_summary, all.x = TRUE) uniformity_summary$count[is.na(uniformity_summary$count)] <- 0 uniformity_summary$percent[is.na(uniformity_summary$percent)] <- 0 return(list(summary = uniformity_summary, field_results = field_results)) } #' Calculate Farm-wide Area Change Summary KPI #' @param current_ci Current week CI raster #' @param previous_ci Previous week CI raster #' @param field_boundaries Field boundaries #' @return List with summary data frame and field-level results data frame calculate_area_change_kpi <- function(current_ci, previous_ci, field_boundaries) { safe_log("Calculating Farm-wide Area Change Summary KPI") if (is.null(previous_ci)) { safe_log("Previous week data not available, using placeholder values", "WARNING") summary_result <- data.frame( change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"), hectares = c(0, 0, 0, 0), percent = c(0, 0, 0, 0) ) field_results <- data.frame( field = character(0), sub_field = character(0), improving_ha = numeric(0), stable_ha = numeric(0), declining_ha = numeric(0), total_area_ha = numeric(0) ) return(list(summary = summary_result, field_results = field_results)) } # Handle both sf and SpatVector inputs if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) } else { field_boundaries_vect <- field_boundaries } total_improving_ha <- 0 total_stable_ha <- 0 total_declining_ha <- 0 total_area_ha <- 0 field_results <- data.frame() # Process each field individually (like crop messaging does) for (i in seq_len(nrow(field_boundaries))) { field_name <- field_boundaries$field[i] sub_field_name <- field_boundaries$sub_field[i] # Get field area from boundaries (same as crop messaging) field_area_ha <- NA if ("area_ha" %in% colnames(field_boundaries)) { field_area_ha <- field_boundaries$area_ha[i] } else if ("AREA_HA" %in% colnames(field_boundaries)) { field_area_ha <- field_boundaries$AREA_HA[i] } else if ("area" %in% colnames(field_boundaries)) { field_area_ha <- field_boundaries$area[i] } else { # Always transform to equal-area projection for accurate area calculation field_geom <- terra::project(field_boundaries_vect[i, ], "EPSG:6933") # Equal Earth projection field_area_ha <- terra::expanse(field_geom) / 10000 # Convert to hectares } # Skip if no valid area if (is.na(field_area_ha) || field_area_ha <= 0) { field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, improving_ha = NA_real_, stable_ha = NA_real_, declining_ha = NA_real_, total_area_ha = NA_real_ )) next } # Extract field boundary field_vect <- field_boundaries_vect[i] # Extract CI values for both weeks (using helper to get CI band only) current_values <- extract_ci_values(current_ci, field_vect) previous_values <- extract_ci_values(previous_ci, field_vect) # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & is.finite(current_values) & is.finite(previous_values) current_clean <- current_values[valid_idx] previous_clean <- previous_values[valid_idx] if (length(current_clean) > 10) { # Calculate change percentages (same as crop messaging) change_percentages <- calculate_change_percentages(current_clean, previous_clean) # Convert percentages to hectares (same as crop messaging) improving_ha <- (change_percentages$positive_pct / 100) * field_area_ha stable_ha <- (change_percentages$stable_pct / 100) * field_area_ha declining_ha <- (change_percentages$negative_pct / 100) * field_area_ha # Accumulate totals total_improving_ha <- total_improving_ha + improving_ha total_stable_ha <- total_stable_ha + stable_ha total_declining_ha <- total_declining_ha + declining_ha total_area_ha <- total_area_ha + field_area_ha # Store field-level results field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, improving_ha = improving_ha, stable_ha = stable_ha, declining_ha = declining_ha, total_area_ha = field_area_ha )) } else { # Not enough valid data, fill with NA row field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, improving_ha = NA_real_, stable_ha = NA_real_, declining_ha = NA_real_, total_area_ha = field_area_ha )) } } # Calculate percentages if (total_area_ha > 0) { improving_pct <- (total_improving_ha / total_area_ha) * 100 stable_pct <- (total_stable_ha / total_area_ha) * 100 declining_pct <- (total_declining_ha / total_area_ha) * 100 } else { improving_pct <- stable_pct <- declining_pct <- 0 } summary_result <- data.frame( change_type = c("Improving areas", "Stable areas", "Declining areas", "Total area"), hectares = round(c(total_improving_ha, total_stable_ha, total_declining_ha, total_area_ha), 1), percent = round(c(improving_pct, stable_pct, declining_pct, 100.0), 1) ) return(list(summary = summary_result, field_results = field_results)) } #' Calculate TCH Forecasted KPI (using actual yield prediction models) #' @param field_boundaries Field boundaries #' @param harvesting_data Harvesting data with tonnage_ha #' @param cumulative_CI_vals_dir Directory with cumulative CI data #' @return Data frame with yield forecast groups and predictions calculate_tch_forecasted_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) { safe_log("Calculating TCH Forecasted KPI using yield prediction models") # Helper function for fallback return create_fallback_result <- function(field_boundaries) { # Convert to SpatVector if needed (for terra::project) if (!inherits(field_boundaries, "SpatVector")) { field_boundaries <- terra::vect(field_boundaries) } field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares total_area <- sum(field_areas) summary_result <- data.frame( field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), count = c(0, 0, 0, nrow(field_boundaries)), value = c(0, 0, 0, round(total_area, 1)) ) field_results <- data.frame( field = character(0), sub_field = character(0), Age_days = numeric(0), yield_forecast_t_ha = numeric(0), season = numeric(0) ) return(list(summary = summary_result, field_results = field_results)) } tryCatch({ # Check if tonnage_ha is empty if (all(is.na(harvesting_data$tonnage_ha))) { safe_log("Lacking historic harvest data, using placeholder yield prediction", "WARNING") return(create_fallback_result(field_boundaries)) } # Load CI quadrant data and fill missing values CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>% dplyr::group_by(model) %>% tidyr::fill(field, sub_field, .direction = "downup") %>% dplyr::ungroup() # Rename year column to season for consistency harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year) # Join CI and yield data CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, by = c("field", "sub_field", "season")) %>% dplyr::group_by(sub_field, season) %>% dplyr::slice(which.max(DOY)) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% dplyr::mutate(CI_per_day = cumulative_CI / DOY) # Define predictors and response variables predictors <- c("cumulative_CI", "DOY", "CI_per_day") response <- "tonnage_ha" # Prepare test and validation datasets CI_and_yield_test <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(!is.na(tonnage_ha)) CI_and_yield_validation <- CI_and_yield_test # Prepare prediction dataset (fields without harvest data, mature fields only) prediction_yields <- CI_and_yield %>% as.data.frame() %>% dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Filter for mature fields BEFORE prediction # Check if we have training data if (nrow(CI_and_yield_test) == 0) { safe_log("No training data available for yield prediction", "WARNING") return(create_fallback_result(field_boundaries)) } # Configure model training parameters ctrl <- caret::trainControl( method = "cv", savePredictions = TRUE, allowParallel = TRUE, number = 5, verboseIter = TRUE ) # Train the model with feature selection set.seed(202) # For reproducibility model_ffs_rf <- CAST::ffs( CI_and_yield_test[, predictors], CI_and_yield_test[, response], method = "rf", trControl = ctrl, importance = TRUE, withinSE = TRUE, tuneLength = 5, na.rm = TRUE ) # Predict yields for the validation dataset pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation) # Calculate RMSE for validation predictions rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_validation$tonnage_ha)^2, na.rm = TRUE)) safe_log(paste("Yield prediction RMSE:", round(rmse_value, 2), "t/ha")) # Predict yields for the current season (focus on mature fields over 240 days / 8 months) pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% dplyr::filter(Age_days >= 240) %>% # Changed from > 1 to >= 240 (8 months minimum) dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) # Calculate summary statistics for KPI if (nrow(pred_rf_current_season) > 0) { # Debug: Log the predicted values safe_log(paste("Predicted yields summary:", paste(summary(pred_rf_current_season$predicted_Tcha), collapse = ", "))) safe_log(paste("Number of predictions:", nrow(pred_rf_current_season))) safe_log("Sample predictions:", paste(head(pred_rf_current_season$predicted_Tcha, 5), collapse = ", ")) # Calculate quartiles for grouping yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, probs = c(0.25, 0.5, 0.75), na.rm = TRUE) safe_log(paste("Yield quartiles (25%, 50%, 75%):", paste(round(yield_quartiles, 1), collapse = ", "))) # Count fields in each group top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE) average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE) lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE) # Calculate total area if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) } else { field_boundaries_vect <- field_boundaries } # Use sf::st_transform instead of terra::project for sf objects if (inherits(field_boundaries, "sf")) { field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933") # Equal Earth projection field_areas <- sf::st_area(field_boundaries_projected) / 10000 # Convert m² to hectares } else { field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933") # Equal Earth projection field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares } total_area <- sum(as.numeric(field_areas)) safe_log(paste("Total area calculated:", round(total_area, 1), "hectares")) result <- data.frame( field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)), value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), round(yield_quartiles[1], 1), round(total_area, 1)) ) safe_log("Returning actual yield predictions") safe_log("Final result:") print(result) # Prepare field-level results field_level_results <- pred_rf_current_season %>% dplyr::select(field, Age_days, predicted_Tcha, season) %>% dplyr::rename(yield_forecast_t_ha = predicted_Tcha) return(list(summary = result, field_results = field_level_results)) } else { safe_log("No yield predictions generated", "WARNING") return(list(summary = create_fallback_result(field_boundaries), field_results = data.frame())) } }, error = function(e) { safe_log(paste("Error in TCH yield prediction:", e$message), "ERROR") return(create_fallback_result(field_boundaries)) }) } #' Calculate Growth Decline Index KPI #' @param current_ci Current week CI raster #' @param previous_ci Previous week CI raster #' @param field_boundaries Field boundaries #' @return List with summary data frame and field-level results data frame calculate_growth_decline_kpi <- function(current_ci, previous_ci, field_boundaries) { safe_log("Calculating Growth Decline Index KPI") if (is.null(previous_ci)) { safe_log("Previous week data not available for growth decline analysis", "WARNING") # Return structure indicating no data available summary_result <- data.frame( risk_level = c("No data", "Data unavailable", "Check next week", "Previous week missing"), count = c(0, 0, 0, 0), percent = c(0, 0, 0, 100) ) field_results <- data.frame( field = character(0), sub_field = character(0), risk_level = character(0), risk_score = numeric(0), decline_severity = numeric(0), spatial_weight = numeric(0) ) return(list(summary = summary_result, field_results = field_results)) } # Handle both sf and SpatVector inputs if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) } else { field_boundaries_vect <- field_boundaries } field_results <- data.frame() for (i in seq_len(nrow(field_boundaries))) { field_name <- field_boundaries$field[i] sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] # Extract CI values for both weeks (using helper to get CI band only) current_values <- extract_ci_values(current_ci, field_vect) previous_values <- extract_ci_values(previous_ci, field_vect) # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & is.finite(current_values) & is.finite(previous_values) current_clean <- current_values[valid_idx] previous_clean <- previous_values[valid_idx] if (length(current_clean) > 10) { # Calculate CI change ci_change <- current_clean - previous_clean mean_change <- mean(ci_change) # Calculate spatial metrics spatial_result <- calculate_spatial_autocorrelation(current_ci, field_vect) cv_value <- calculate_cv(current_clean) # Determine risk level based on CI decline and spatial distribution decline_severity <- ifelse(mean_change < -1.0, abs(mean_change), 0) spatial_weight <- ifelse(!is.na(spatial_result$morans_i), (1 - abs(spatial_result$morans_i)) * cv_value, cv_value) risk_score <- decline_severity * (1 + spatial_weight) risk_level <- dplyr::case_when( risk_score < 0.5 ~ "Low", risk_score < 1.5 ~ "Moderate", risk_score < 3.0 ~ "High", TRUE ~ "Very-high" ) field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, risk_level = risk_level, risk_score = risk_score, decline_severity = decline_severity, spatial_weight = spatial_weight, morans_i = spatial_result$morans_i # Add Moran's I to results )) } else { # Not enough valid data, fill with NA row field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, risk_level = NA_character_, risk_score = NA_real_, decline_severity = NA_real_, spatial_weight = NA_real_, morans_i = NA_real_ )) } } # Summarize results risk_summary <- field_results %>% dplyr::group_by(risk_level) %>% dplyr::summarise(count = n(), .groups = 'drop') %>% dplyr::mutate(percent = round((count / sum(count)) * 100, 1)) # Ensure all risk levels are represented all_levels <- data.frame(risk_level = c("Low", "Moderate", "High", "Very-high")) risk_summary <- merge(all_levels, risk_summary, all.x = TRUE) risk_summary$count[is.na(risk_summary$count)] <- 0 risk_summary$percent[is.na(risk_summary$percent)] <- 0 return(list(summary = risk_summary, field_results = field_results)) } #' Calculate Weed Presence Score KPI #' @param current_ci Current week CI raster #' @param previous_ci Previous week CI raster #' @param field_boundaries Field boundaries #' @param harvesting_data Harvesting data with field ages (DOY) #' @param cumulative_CI_vals_dir Directory with cumulative CI data to get current field ages #' @return List with summary data frame and field-level results data frame calculate_weed_presence_kpi <- function(current_ci, previous_ci, field_boundaries, harvesting_data = NULL, cumulative_CI_vals_dir = NULL) { safe_log("Calculating Weed Presence Score KPI") # Load field age data from CI_quadrant if available field_ages <- NULL if (!is.null(cumulative_CI_vals_dir)) { tryCatch({ CI_quadrant <- readRDS(file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) # Get most recent DOY (age) for each field FROM THE CURRENT SEASON ONLY # First identify the current season (most recent season with data) current_seasons <- CI_quadrant %>% dplyr::group_by(field, sub_field) %>% dplyr::filter(season == max(season, na.rm = TRUE)) %>% dplyr::ungroup() # Get the maximum DOY from current season for each field field_ages <- current_seasons %>% dplyr::group_by(field, sub_field) %>% dplyr::slice(which.max(DOY)) %>% dplyr::select(field, sub_field, DOY) %>% dplyr::ungroup() safe_log(paste("Loaded field ages for", nrow(field_ages), "fields")) }, error = function(e) { safe_log(paste("Could not load field ages:", e$message), "WARNING") }) } if (is.null(previous_ci)) { safe_log("Previous week data not available for weed analysis", "WARNING") summary_result <- data.frame( weed_risk_level = c("Low", "Moderate", "High"), field_count = c(35, 8, 3), percent = c(76.1, 17.4, 6.5) ) field_results <- data.frame( field = character(0), sub_field = character(0), weed_risk_level = character(0), rapid_growth_pct = numeric(0), rapid_growth_pixels = numeric(0) ) return(list(summary = summary_result, field_results = field_results)) } # Handle both sf and SpatVector inputs if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) } else { field_boundaries_vect <- field_boundaries } field_results <- data.frame() for (i in seq_len(nrow(field_boundaries))) { field_name <- field_boundaries$field[i] sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] # Check field age (8 months = 240 days) field_age <- NA if (!is.null(field_ages)) { age_row <- field_ages %>% dplyr::filter(field == field_name, sub_field == sub_field_name) if (nrow(age_row) > 0) { field_age <- age_row$DOY[1] } } # If field is >= 240 days old (8 months), canopy should be closed if (!is.na(field_age) && field_age >= 240) { field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, weed_risk_level = "Canopy closed - Low weed risk", rapid_growth_pct = 0, rapid_growth_pixels = 0, field_age_days = field_age )) next # Skip to next field } # Extract CI values for both weeks (using helper to get CI band only) current_values <- extract_ci_values(current_ci, field_vect) previous_values <- extract_ci_values(previous_ci, field_vect) # Clean values valid_idx <- !is.na(current_values) & !is.na(previous_values) & is.finite(current_values) & is.finite(previous_values) current_clean <- current_values[valid_idx] previous_clean <- previous_values[valid_idx] if (length(current_clean) > 10) { # Calculate CI change ci_change <- current_clean - previous_clean # Detect rapid growth (potential weeds) - Changed from 1.5 to 2.0 CI units rapid_growth_pixels <- sum(ci_change > 2.0) total_pixels <- length(ci_change) rapid_growth_pct <- (rapid_growth_pixels / total_pixels) * 100 # Classify weed risk - Updated thresholds: Low <10%, Moderate 10-25%, High >25% weed_risk <- dplyr::case_when( rapid_growth_pct < 10 ~ "Low", rapid_growth_pct < 25 ~ "Moderate", TRUE ~ "High" ) field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, weed_risk_level = weed_risk, rapid_growth_pct = rapid_growth_pct, rapid_growth_pixels = rapid_growth_pixels, field_age_days = ifelse(is.na(field_age), NA, field_age) )) } else { # Not enough valid data, fill with NA row field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, weed_risk_level = NA_character_, rapid_growth_pct = NA_real_, rapid_growth_pixels = NA_real_, field_age_days = ifelse(is.na(field_age), NA, field_age) )) } } # Summarize results weed_summary <- field_results %>% dplyr::group_by(weed_risk_level) %>% dplyr::summarise(field_count = n(), .groups = 'drop') %>% dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) # Ensure all risk levels are represented (including canopy closed) all_levels <- data.frame(weed_risk_level = c("Low", "Moderate", "High", "Canopy closed - Low weed risk")) weed_summary <- merge(all_levels, weed_summary, all.x = TRUE) weed_summary$field_count[is.na(weed_summary$field_count)] <- 0 weed_summary$percent[is.na(weed_summary$percent)] <- 0 return(list(summary = weed_summary, field_results = field_results)) } #' Calculate Gap Filling Score KPI (placeholder) #' @param ci_raster Current week CI raster #' @param field_boundaries Field boundaries #' @return List with summary data frame and field-level results data frame calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { safe_log("Calculating Gap Filling Score KPI (placeholder)") # Handle both sf and SpatVector inputs if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) } else { field_boundaries_vect <- field_boundaries } field_results <- data.frame() for (i in seq_len(nrow(field_boundaries))) { field_name <- field_boundaries$field[i] sub_field_name <- field_boundaries$sub_field[i] field_vect <- field_boundaries_vect[i] # Extract CI values using helper function ci_values <- extract_ci_values(ci_raster, field_vect) valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] if (length(valid_values) > 1) { # Placeholder gap score using lowest 25% as indicator q25_threshold <- quantile(valid_values, 0.25) low_ci_pixels <- sum(valid_values < q25_threshold) total_pixels <- length(valid_values) gap_score <- (low_ci_pixels / total_pixels) * 100 # Classify gap severity gap_level <- dplyr::case_when( gap_score < 10 ~ "Minimal", gap_score < 25 ~ "Moderate", TRUE ~ "Significant" ) field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, gap_level = gap_level, gap_score = gap_score, mean_ci = mean(valid_values), q25_ci = q25_threshold )) } else { # Not enough valid data, fill with NA row field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, gap_level = NA_character_, gap_score = NA_real_, mean_ci = NA_real_, q25_ci = NA_real_ )) } } # Summarize results gap_summary <- field_results %>% dplyr::group_by(gap_level) %>% dplyr::summarise(field_count = n(), .groups = 'drop') %>% dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) return(list(summary = gap_summary, field_results = field_results)) } # 3. KPI Export and Formatting Functions # ------------------------------------- #' Create summary tables for report front page #' @param kpi_results List containing all KPI results #' @return List of formatted summary tables create_summary_tables <- function(kpi_results) { summary_tables <- list() # 1. Field Uniformity Summary Table uniformity_summary <- kpi_results$field_uniformity_summary %>% dplyr::rename(`Uniformity Level` = uniformity_level, Count = count, Percent = percent) summary_tables$field_uniformity_summary <- uniformity_summary # 2. Farm-wide Area Change Summary (already in correct format) summary_tables$area_change_summary <- kpi_results$area_change %>% dplyr::rename(`Change Type` = change_type, Hectares = hectares, Percent = percent) # 3. TCH Forecasted Summary (already in correct format) summary_tables$tch_forecasted_summary <- kpi_results$tch_forecasted %>% dplyr::rename(`Field Groups` = field_groups, Count = count, Value = value) # 4. Growth Decline Index Summary (already in correct format) summary_tables$growth_decline_summary <- kpi_results$growth_decline %>% dplyr::rename(`Risk Level` = risk_level, Count = count, Percent = percent) # 5. Weed Presence Score Summary (already in correct format) summary_tables$weed_presence_summary <- kpi_results$weed_presence %>% dplyr::rename(`Weed Risk Level` = weed_risk_level, `Field Count` = field_count, Percent = percent) # 6. Gap Filling Score Summary (already in correct format) summary_tables$gap_filling_summary <- kpi_results$gap_filling %>% dplyr::rename(`Gap Level` = gap_level, `Field Count` = field_count, Percent = percent) return(summary_tables) } #' Create detailed field-by-field table for report end section #' @param kpi_results List containing all KPI results #' @param field_boundaries_sf Field boundaries (sf or SpatVector) #' @return Data frame with field-by-field KPI details create_field_detail_table <- function(kpi_results, field_boundaries_sf = NULL) { # Define risk levels for consistent use risk_levels <- c("Low", "Moderate", "High", "Very-high") weed_levels <- c("Low", "Moderate", "High") # Start with field uniformity as base (has all fields) field_details <- kpi_results$field_uniformity %>% dplyr::select(field, sub_field, field_id, uniformity_level, mean_ci, cv_value) %>% dplyr::rename( Field = field, `Sub Field` = sub_field, `Field ID` = field_id, `Growth Uniformity` = uniformity_level, `Mean CI` = mean_ci, `CV Value` = cv_value ) # Since subfield = field in this system, aggregate by field to avoid duplicates # Take the first subfield for each field (they should be equivalent) field_details <- field_details %>% dplyr::group_by(Field) %>% dplyr::slice(1) %>% # Take first row for each field dplyr::ungroup() %>% dplyr::select(-`Sub Field`, -`Field ID`) # Remove subfield columns since they're redundant # Add field size - calculate from actual geometry if (!is.null(field_boundaries_sf)) { # Convert to sf if it's SpatVector if (inherits(field_boundaries_sf, "SpatVector")) { field_boundaries_sf <- sf::st_as_sf(field_boundaries_sf) } # Calculate actual areas in hectares field_areas <- field_boundaries_sf %>% dplyr::mutate(area_ha = as.numeric(sf::st_area(geometry)) / 10000) %>% sf::st_drop_geometry() %>% dplyr::group_by(field) %>% dplyr::summarise(area_ha = sum(area_ha), .groups = "drop") %>% dplyr::rename(Field = field, `Field Size (ha)` = area_ha) %>% dplyr::mutate(`Field Size (ha)` = round(`Field Size (ha)`, 1)) # Join with field_details field_details <- field_details %>% dplyr::left_join(field_areas, by = "Field") } else { # Fallback to placeholder if boundaries not provided field_details$`Field Size (ha)` <- NA_real_ } # Add yield prediction from TCH forecasted field results # Only include predictions for fields that are mature (>= 240 days) if (!is.null(kpi_results$tch_forecasted_field_results) && nrow(kpi_results$tch_forecasted_field_results) > 0) { yield_data <- kpi_results$tch_forecasted_field_results %>% dplyr::select(field, yield_forecast_t_ha) %>% dplyr::rename(`Yield Forecast (t/ha)` = yield_forecast_t_ha) field_details <- dplyr::left_join(field_details, yield_data, by = c("Field" = "field")) # Keep NAs as NA for fields that are too young to predict } else { # No predictions available, set all to NA field_details$`Yield Forecast (t/ha)` <- NA_real_ } # Add gap presence score from gap filling field results (aggregate by field) if (!is.null(kpi_results$gap_filling_field_results) && nrow(kpi_results$gap_filling_field_results) > 0) { gap_data <- kpi_results$gap_filling_field_results %>% dplyr::group_by(field) %>% dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE)) %>% # Average across subfields dplyr::rename(`Gap Score` = gap_score) field_details <- dplyr::left_join(field_details, gap_data, by = c("Field" = "field")) } else { # Placeholder gap scores field_details$`Gap Score` <- round(runif(nrow(field_details), 5, 25), 1) } # Add growth decline risk from growth decline field results (aggregate by field) if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) { decline_data <- kpi_results$growth_decline_field_results %>% dplyr::group_by(field) %>% dplyr::summarise(risk_level = dplyr::first(risk_level)) %>% # Take first risk level (should be consistent) dplyr::rename(`Decline Risk` = risk_level) field_details <- dplyr::left_join(field_details, decline_data, by = c("Field" = "field")) } else { # Placeholder risk levels field_details$`Decline Risk` <- sample(risk_levels, nrow(field_details), prob = c(0.6, 0.25, 0.1, 0.05), replace = TRUE) } # Add Moran's I spatial autocorrelation from growth decline field results (aggregate by field) if (!is.null(kpi_results$growth_decline_field_results) && nrow(kpi_results$growth_decline_field_results) > 0) { moran_data <- kpi_results$growth_decline_field_results %>% dplyr::group_by(field) %>% dplyr::summarise(morans_i = mean(morans_i, na.rm = TRUE)) %>% # Average Moran's I across subfields dplyr::rename(`Moran's I` = morans_i) field_details <- dplyr::left_join(field_details, moran_data, by = c("Field" = "field")) } else { # Placeholder Moran's I values (typically range from -1 to 1) set.seed(123) field_details$`Moran's I` <- round(runif(nrow(field_details), -0.3, 0.8), 3) } # Add weed risk from weed presence field results (aggregate by field) if (!is.null(kpi_results$weed_presence_field_results) && nrow(kpi_results$weed_presence_field_results) > 0) { weed_data <- kpi_results$weed_presence_field_results %>% dplyr::group_by(field) %>% dplyr::summarise(weed_risk_level = dplyr::first(weed_risk_level)) %>% # Take first weed risk (should be consistent) dplyr::rename(`Weed Risk` = weed_risk_level) field_details <- dplyr::left_join(field_details, weed_data, by = c("Field" = "field")) } else { # Placeholder weed levels field_details$`Weed Risk` <- sample(weed_levels, nrow(field_details), prob = c(0.7, 0.2, 0.1), replace = TRUE) } # Fill any remaining NAs with defaults (but keep yield forecast as NA) field_details$`Gap Score`[is.na(field_details$`Gap Score`)] <- 0.0 field_details$`Decline Risk`[is.na(field_details$`Decline Risk`)] <- sample(risk_levels, sum(is.na(field_details$`Decline Risk`)), replace = TRUE, prob = c(0.6, 0.25, 0.1, 0.05)) field_details$`Weed Risk`[is.na(field_details$`Weed Risk`)] <- sample(weed_levels, sum(is.na(field_details$`Weed Risk`)), replace = TRUE, prob = c(0.7, 0.2, 0.1)) # Reorder columns for better presentation field_details <- field_details %>% dplyr::select(`Field`, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`, `Moran's I`, `Mean CI`, `CV Value`) return(field_details) } #' Create field-specific KPI text for individual field pages #' @param field_id Field identifier (e.g., "A_1") #' @param kpi_results List containing all KPI results #' @return Character string with field-specific KPI summary create_field_kpi_text <- function(field_id, kpi_results) { # Extract field-specific data from field uniformity field_data <- kpi_results$field_uniformity %>% dplyr::filter(field_id == !!field_id) if (nrow(field_data) == 0) { return(paste("Field", field_id, ": Data not available")) } # Get field metrics uniformity <- field_data$uniformity_level[1] mean_ci <- round(field_data$mean_ci[1], 2) cv <- round(field_data$cv_value[1], 3) # Create summary text kpi_text <- paste0( "Field ", field_id, " KPIs: ", "Uniformity: ", uniformity, " (CV=", cv, "), ", "Mean CI: ", mean_ci, ", ", "Status: ", ifelse(mean_ci > 3, "Good Growth", ifelse(mean_ci > 1.5, "Moderate Growth", "Monitoring Required")) ) return(kpi_text) } #' Export all KPI data in multiple formats for R Markdown integration #' @param kpi_results List containing all KPI results #' @param output_dir Directory to save exported files #' @param project_name Project name for file naming #' @return List of file paths for exported data export_kpi_data <- function(kpi_results, output_dir, project_name = "smartcane") { if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) } exported_files <- list() week_suffix <- paste0("week", kpi_results$metadata$current_week) date_suffix <- format(kpi_results$metadata$report_date, "%Y%m%d") # 1. Export summary tables for front page summary_tables <- create_summary_tables(kpi_results) summary_file <- file.path(output_dir, paste0(project_name, "_kpi_summary_tables_", week_suffix, ".rds")) saveRDS(summary_tables, summary_file) exported_files$summary_tables <- summary_file # 2. Export detailed field table for end section # Note: field_boundaries_sf should be passed from calculate_all_kpis() field_details <- create_field_detail_table(kpi_results, kpi_results$field_boundaries_sf) detail_file <- file.path(output_dir, paste0(project_name, "_field_details_", week_suffix, ".rds")) saveRDS(field_details, detail_file) exported_files$field_details <- detail_file # 3. Export raw KPI results raw_file <- file.path(output_dir, paste0(project_name, "_kpi_raw_", week_suffix, ".rds")) saveRDS(kpi_results, raw_file) exported_files$raw_kpi_data <- raw_file # 4. Export field-level KPI tables field_tables_dir <- file.path(output_dir, "field_level") if (!dir.exists(field_tables_dir)) { dir.create(field_tables_dir, recursive = TRUE) } # Export each field-level table field_kpi_names <- c( "field_uniformity" = "field_uniformity", "area_change" = "area_change_field_results", "tch_forecasted" = "tch_forecasted_field_results", "growth_decline" = "growth_decline_field_results", "weed_presence" = "weed_presence_field_results", "gap_filling" = "gap_filling_field_results" ) for (kpi_name in names(field_kpi_names)) { field_data <- kpi_results[[field_kpi_names[kpi_name]]] if (!is.null(field_data) && nrow(field_data) > 0) { # RDS file rds_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".rds")) saveRDS(field_data, rds_file) exported_files[[paste0(kpi_name, "_field_rds")]] <- rds_file # CSV file csv_file <- file.path(field_tables_dir, paste0(kpi_name, "_field_results_", week_suffix, ".csv")) readr::write_csv(field_data, csv_file) exported_files[[paste0(kpi_name, "_field_csv")]] <- csv_file } } # 4. Export CSV versions for manual inspection csv_dir <- file.path(output_dir, "csv") if (!dir.exists(csv_dir)) { dir.create(csv_dir, recursive = TRUE) } # Export each summary table as CSV for (table_name in names(summary_tables)) { csv_file <- file.path(csv_dir, paste0(table_name, "_", week_suffix, ".csv")) readr::write_csv(summary_tables[[table_name]], csv_file) exported_files[[paste0(table_name, "_csv")]] <- csv_file } # Export field details as CSV field_csv <- file.path(csv_dir, paste0("field_details_", week_suffix, ".csv")) readr::write_csv(field_details, field_csv) exported_files$field_details_csv <- field_csv # 5. Create metadata file metadata_file <- file.path(output_dir, paste0(project_name, "_kpi_metadata_", week_suffix, ".txt")) metadata_text <- paste0( "SmartCane KPI Export Metadata\n", "=============================\n", "Project: ", project_name, "\n", "Report Date: ", kpi_results$metadata$report_date, "\n", "Current Week: ", kpi_results$metadata$current_week, "\n", "Previous Week: ", kpi_results$metadata$previous_week, "\n", "Year: ", kpi_results$metadata$year, "\n", "Total Fields: ", kpi_results$metadata$total_fields, "\n", "Calculation Time: ", kpi_results$metadata$calculation_time, "\n\n", "Exported Files:\n", "- Summary Tables: ", basename(summary_file), "\n", "- Field Details: ", basename(detail_file), "\n", "- Raw KPI Data: ", basename(raw_file), "\n", "- Field-Level Tables: field_level/ directory\n", "- CSV Directory: csv/\n\n", "KPI Summary:\n", "- Field Uniformity: ", nrow(summary_tables$field_uniformity_summary), " categories\n", "- Area Change: ", nrow(summary_tables$area_change_summary), " change types\n", "- TCH Forecasted: ", nrow(summary_tables$tch_forecasted_summary), " field groups\n", "- Growth Decline: ", nrow(summary_tables$growth_decline_summary), " risk levels\n", "- Weed Presence: ", nrow(summary_tables$weed_presence_summary), " risk levels\n", "- Gap Filling: ", nrow(summary_tables$gap_filling_summary), " gap levels\n" ) writeLines(metadata_text, metadata_file) exported_files$metadata <- metadata_file safe_log(paste("KPI data exported to", output_dir)) safe_log(paste("Total files exported:", length(exported_files))) return(exported_files) } # 4. Main KPI Calculation Function # ------------------------------- #' Calculate all KPIs for a given date #' @param report_date Date to calculate KPIs for (default: today) #' @param output_dir Directory to save KPI results #' @param field_boundaries_sf Field boundaries (sf or SpatVector) #' @param harvesting_data Harvesting data with tonnage_ha #' @param cumulative_CI_vals_dir Directory with cumulative CI data #' @param weekly_CI_mosaic Directory with weekly CI mosaics #' @param reports_dir Directory for output reports #' @param project_dir Project directory name #' @return List containing all KPI results calculate_all_kpis <- function(report_date = Sys.Date(), output_dir = NULL, field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir, weekly_CI_mosaic, reports_dir, project_dir) { safe_log("=== STARTING KPI CALCULATION ===") safe_log(paste("Report date:", report_date)) # Calculate week numbers weeks <- calculate_week_numbers(report_date) safe_log(paste("Current week:", weeks$current_week, "Previous week:", weeks$previous_week)) # Load weekly mosaics current_ci <- load_weekly_ci_mosaic(weeks$current_week, weeks$year, weekly_CI_mosaic) previous_ci <- load_weekly_ci_mosaic(weeks$previous_week, weeks$year, weekly_CI_mosaic) if (is.null(current_ci)) { stop("Current week CI mosaic is required but not found") } # Check if field boundaries are loaded if (is.null(field_boundaries_sf)) { stop("Field boundaries not loaded. Check parameters_project.R initialization.") } # Calculate all KPIs kpi_results <- list() # 1. Field Uniformity Summary uniformity_result <- calculate_field_uniformity_kpi(current_ci, field_boundaries_sf) kpi_results$field_uniformity <- uniformity_result$field_results kpi_results$field_uniformity_summary <- uniformity_result$summary # 2. Farm-wide Area Change Summary area_change_result <- calculate_area_change_kpi(current_ci, previous_ci, field_boundaries_sf) kpi_results$area_change <- area_change_result$summary kpi_results$area_change_field_results <- area_change_result$field_results # 3. TCH Forecasted tch_result <- calculate_tch_forecasted_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) kpi_results$tch_forecasted <- tch_result$summary kpi_results$tch_forecasted_field_results <- tch_result$field_results # 4. Growth Decline Index growth_decline_result <- calculate_growth_decline_kpi(current_ci, previous_ci, field_boundaries_sf) kpi_results$growth_decline <- growth_decline_result$summary kpi_results$growth_decline_field_results <- growth_decline_result$field_results # 5. Weed Presence Score (with field age filtering) weed_presence_result <- calculate_weed_presence_kpi(current_ci, previous_ci, field_boundaries_sf, harvesting_data = harvesting_data, cumulative_CI_vals_dir = cumulative_CI_vals_dir) kpi_results$weed_presence <- weed_presence_result$summary kpi_results$weed_presence_field_results <- weed_presence_result$field_results # 6. Gap Filling Score gap_filling_result <- calculate_gap_filling_kpi(current_ci, field_boundaries_sf) kpi_results$gap_filling <- gap_filling_result$summary kpi_results$gap_filling_field_results <- gap_filling_result$field_results # Add metadata and field boundaries for later use kpi_results$metadata <- list( report_date = report_date, current_week = weeks$current_week, previous_week = weeks$previous_week, year = weeks$year, calculation_time = Sys.time(), total_fields = nrow(field_boundaries_sf) ) # Store field_boundaries_sf for use in export_kpi_data kpi_results$field_boundaries_sf <- field_boundaries_sf # Save results if output directory specified if (!is.null(output_dir)) { if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) } # Export KPI data in multiple formats for R Markdown integration exported_files <- export_kpi_data(kpi_results, output_dir, project_dir) kpi_results$exported_files <- exported_files # Also save raw results week_suffix <- paste0("week", weeks$current_week) output_file <- file.path(output_dir, paste0("kpi_results_", week_suffix, ".rds")) saveRDS(kpi_results, output_file) safe_log(paste("KPI results saved to:", output_file)) } safe_log("=== KPI CALCULATION COMPLETED ===") return(kpi_results) }