Refactor KPI calculation functions and enhance batch processing script for agronomic support

This commit is contained in:
Timon 2026-02-19 10:38:27 +01:00
parent 5d462dad63
commit 5bbdbda049
5 changed files with 214 additions and 233 deletions

View file

@ -850,33 +850,15 @@ calculate_all_field_analysis_agronomic_support <- function(
}
} else {
# Single-file mosaic (original behavior)
message("Loading current week mosaic...")
current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir)
if (is.null(current_mosaic)) {
stop("Could not load current week mosaic")
}
message("Extracting field statistics from current mosaic...")
current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
# Extract CI pixels for each field individually
ci_pixels_by_field <- list()
for (i in seq_len(nrow(field_boundaries_sf))) {
field_vect <- terra::vect(field_boundaries_sf[i, ])
ci_pixels_by_field[[i]] <- extract_ci_values(current_mosaic, field_vect)
}
stop("ERROR: Per-field mosaic structure required (weekly_mosaic/{FIELD_NAME}/week_WW_YYYY.tif)")
}
# Load previous week mosaic (if available)
previous_stats <- NULL
if (!is.null(previous_mosaic_dir) || is_per_field) {
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")..."))
if (is_per_field) {
# Try loading previous week from the same directory structure
# Try loading previous week from the same per-field directory structure
prev_week_file <- sprintf("week_%02d_%d.tif", target_prev$week, target_prev$year)
prev_field_exists <- any(sapply(field_dirs, function(field) {
file.exists(file.path(current_mosaic_dir, field, prev_week_file))
@ -894,20 +876,9 @@ calculate_all_field_analysis_agronomic_support <- function(
} else {
message(" Previous week mosaic not available - skipping area change KPI")
}
} else if (!is.null(previous_mosaic_dir)) {
previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir)
if (!is.null(previous_mosaic)) {
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
} else {
message(" Previous week mosaic not available - skipping area change KPI")
}
}
}
# Calculate 6 KPIs
message("\nCalculating KPI 1: Field Uniformity...")
if (is_per_field) {
uniformity_kpi <- calculate_field_uniformity_kpi(
ci_pixels_by_field,
field_boundaries_sf,
@ -915,13 +886,6 @@ calculate_all_field_analysis_agronomic_support <- function(
mosaic_dir = current_mosaic_dir,
week_file = week_file
)
} else {
uniformity_kpi <- calculate_field_uniformity_kpi(
ci_pixels_by_field,
field_boundaries_sf,
current_mosaic
)
}
message("Calculating KPI 2: Area Change...")
if (!is.null(previous_stats)) {

View file

@ -101,67 +101,31 @@ calculate_age_week <- function(planting_date, reference_date) {
round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0)
}
#' Assign crop phase based on age in weeks
#'
#' Determines crop phase from age in weeks using canonical PHASE_DEFINITIONS
#' from 80_utils_common.R for consistency across all workflows.
#'
#' @param age_week Numeric age in weeks
#' @return Character phase name (from PHASE_DEFINITIONS or "Unknown")
#'
#' @details
#' Uses the shared PHASE_DEFINITIONS to ensure identical phase boundaries
#' across all scripts. This wrapper delegates to get_phase_by_age() which
#' is the authoritative phase lookup function.
#'
#' Phase boundaries (from PHASE_DEFINITIONS):
#' - Germination: 0-6 weeks
#' - Tillering: 4-16 weeks
#' - Grand Growth: 17-39 weeks
#' - Maturation: 39+ weeks
calculate_phase <- function(age_week) {
# Delegate to canonical get_phase_by_age() from 80_utils_common.R
# This ensures all phase boundaries are consistent across workflows
get_phase_by_age(age_week)
}
#' Bin percentage into 10% intervals with special handling for 90-100%
#'
#' @param pct Numeric percentage value (0-100)
#' @return Character bin label
bin_percentage <- function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}
#' Calculate germination progress from CI threshold percentage
#'
#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2
#' @return Character bin label
calculate_germination_progress <- function(pct_pixels_ci_gte_2) {
bin_percentage(pct_pixels_ci_gte_2)
}
#' Categorize CV trend (long-term slope) into qualitative labels
#' Categorize regression slope into field uniformity interpretation
#'
#' @param cv_slope Numeric slope from CV trend analysis
#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform"
categorize_cv_trend_long_term <- function(cv_slope) {
if (is.na(cv_slope)) {
#' **Input**: Numeric slope from `calculate_regression_slope()` applied to CV values
#' **Output**: 3-category labels for field uniformity trend
#'
#' **Categories**:
#' - "More uniform" (slope < -0.01): Field becoming more homogeneous
#' - "Stable uniformity" (-0.01 ≤ slope ≤ 0.01): Field uniformity stable
#' - "Less uniform" (slope > 0.01): Field becoming more patchy/heterogeneous
#'
#' @param slope Numeric slope from `calculate_regression_slope(cv_values)`
#' @return Character category or NA
#'
categorize_cv_trend_long_term <- function(slope) {
if (is.na(slope)) {
return(NA_character_)
} else if (cv_slope < -0.01) {
} else if (slope < -0.01) {
return("More uniform")
} else if (cv_slope > 0.01) {
} else if (slope > 0.01) {
return("Less uniform")
} else {
return("Stable uniformity")
@ -235,58 +199,7 @@ calculate_status_alert <- function(imminent_prob, age_week, mean_ci,
NA_character_
}
#' Calculate yield prediction for CANE_SUPPLY workflows (Wrapper)
#'
#' This function wraps the shared yield prediction model from 80_utils_common.R
#' to provide CANE_SUPPLY clients (e.g., ANGATA) with ML-based yield forecasting.
#'
#' Uses Random Forest with Forward Feature Selection trained on:
#' - Cumulative Canopy Index (CI) from growth model
#' - Days After Harvest (DAH) / crop age
#' - CI-per-day (growth velocity)
#'
#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
#' - Top 25%: High-yield fields
#' - Average: Mid-range yield fields
#' - Lowest 25%: Lower-yield fields
#'
#' @param field_boundaries_sf SF object with field geometries
#' @param harvesting_data Data frame with harvest history (must have tonnage_ha column)
#' @param cumulative_CI_vals_dir Directory with combined CI RDS files
#'
#' @return List with:
#' - summary: Data frame with field_groups, count, and yield quartile predictions
#' - field_results: Data frame with field-level forecasts (yield_forecast_t_ha in t/ha)
#'
#' @details
#' **Data Requirements:**
#' - harvesting_data must include tonnage_ha column (yield in t/ha) for training
#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
#' - If either is missing, returns graceful fallback with NA values (not fake numbers)
#'
#' **Integration:**
#' This can be called from calculate_all_field_kpis() in cane_supply workflow to add
#' a new "Yield_Forecast" column to the 22-column KPI output.
#'
#' **Example:**
#' ```r
#' yield_result <- calculate_yield_prediction_kpi_cane_supply(
#' field_boundaries_sf,
#' harvesting_data,
#' file.path(data_dir, "combined_CI")
#' )
#' # yield_result$summary has quartiles
#' # yield_result$field_results has per-field forecasts
#' ```
calculate_yield_prediction_kpi_cane_supply <- function(field_boundaries_sf,
harvesting_data,
cumulative_CI_vals_dir) {
# Call the shared yield prediction function from 80_utils_common.R
result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir)
return(result)
}
#' Build complete per-field KPI dataframe with all 22 columns
@ -336,10 +249,10 @@ calculate_all_field_kpis <- function(current_stats,
},
# Column 10: Phase (based on Age_week)
Phase = sapply(Age_week, calculate_phase),
Phase = sapply(Age_week, get_phase_by_age),
# Column 12: Germination_progress (binned Pct_pixels_CI_gte_2)
Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress),
Germination_progress = sapply(Pct_pixels_CI_gte_2, bin_percentage),
# Column 13: Imminent_prob (from script 31 or NA)
Imminent_prob = {

View file

@ -276,23 +276,34 @@ categorize_four_week_trend <- function(ci_values_list) {
}
}
#' Round cloud coverage to interval categories
round_cloud_to_intervals <- function(cloud_pct_clear) {
if (is.na(cloud_pct_clear)) {
return(NA_character_)
}
if (cloud_pct_clear < 10) return("0-10%")
if (cloud_pct_clear < 20) return("10-20%")
if (cloud_pct_clear < 30) return("20-30%")
if (cloud_pct_clear < 40) return("30-40%")
if (cloud_pct_clear < 50) return("40-50%")
if (cloud_pct_clear < 60) return("50-60%")
if (cloud_pct_clear < 70) return("60-70%")
if (cloud_pct_clear < 80) return("70-80%")
if (cloud_pct_clear < 90) return("80-90%")
if (cloud_pct_clear < 95) return("90-95%")
return("95-100%")
#' Bin a percentage value into labeled intervals
#'
#' Universal binning function for percentages (0-100) with configurable precision.
#'
#' **Used for**:
#' - Cloud coverage binning: `bin_percentage(cloud_pct_clear)` → "0-10%", "90-95%", etc.
#' - Germination progress: `bin_percentage(pct_pixels_ci_gte_2)` → percentage bins
#' - Field acreage reporting: `bin_percentage(pct_area)` → standardized bins
#'
#' **Precision**: High-precision binning for 90-100% range (split at 95%)
#' to distinguish excellent conditions from marginal ones.
#'
#' @param pct Numeric value (0-100, typically a percentage)
#' @return Character bin label (e.g., "80-90%", "95-100%") or NA
#'
bin_percentage <- function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}
#' Get CI percentile range (10th to 90th)
@ -320,63 +331,45 @@ calculate_cv_trend <- function(cv_current, cv_previous) {
return(round(cv_current - cv_previous, 4))
}
#' Calculate four-week CI trend
calculate_four_week_trend <- function(mean_ci_values) {
if (is.null(mean_ci_values) || length(mean_ci_values) == 0) {
#' Calculate regression slope for temporal trend analysis
#'
#' Uses linear regression (lm) to compute trend slope over a time series.
#' Automatically handles NA values, short series, and errors.
#'
#' **Used for**:
#' - 4-week CI trends (growth trajectories): `calculate_regression_slope(ci_values_4week, 2)`
#' - 8-week CV trends (field uniformity change): `calculate_regression_slope(cv_values_8week, 2)`
#' - Any other multi-week trend analysis
#'
#' **Pairing**: Output slope is categorized by `categorize_cv_trend_long_term(slope)`
#' to produce labels: "More uniform" / "Stable uniformity" / "Less uniform"
#'
#' @param values Numeric vector of values (variable length, auto-determines weeks)
#' @param decimal_places Rounding precision (default: 2 for slope values)
#'
#' @return Numeric slope (negative = improving, positive = worsening) or NA
#'
#' @details
#' Implements: lm(values ~ seq_along(values)), returns coefficient for sequence index
#' with error handling for degenerate cases (NULL, empty, <2 values).
#'
calculate_regression_slope <- function(values, decimal_places = 2) {
if (is.null(values) || length(values) == 0) {
return(NA_real_)
}
ci_clean <- mean_ci_values[!is.na(mean_ci_values)]
clean_values <- values[!is.na(values)]
if (length(ci_clean) < 2) {
if (length(clean_values) < 2) {
return(NA_real_)
}
# Use linear regression slope (like agronomic_support workflow) instead of simple difference
weeks <- seq_along(ci_clean)
lm_fit <- lm(ci_clean ~ weeks)
slope <- coef(lm_fit)["weeks"]
return(round(as.numeric(slope), 3))
}
#' Categorize CV slope (8-week regression) into field uniformity interpretation
categorize_cv_slope <- function(slope) {
if (is.na(slope)) {
return(NA_character_)
}
if (slope <= CV_SLOPE_IMPROVEMENT_MIN) {
return("Excellent uniformity")
} else if (slope < CV_SLOPE_HOMOGENOUS_MIN) {
return("Homogenous growth")
} else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) {
return("Homogenous growth")
} else if (slope <= CV_SLOPE_PATCHINESS_MAX) {
return("Minor patchiness")
} else {
return("Severe fragmentation")
}
}
#' Calculate 8-week CV trend via linear regression slope
calculate_cv_trend_long_term <- function(cv_values) {
if (is.null(cv_values) || length(cv_values) == 0) {
return(NA_real_)
}
cv_clean <- cv_values[!is.na(cv_values)]
if (length(cv_clean) < 2) {
return(NA_real_)
}
weeks <- seq_along(cv_clean)
weeks <- seq_along(clean_values)
tryCatch({
lm_fit <- lm(cv_clean ~ weeks)
lm_fit <- lm(clean_values ~ weeks)
slope <- coef(lm_fit)["weeks"]
return(round(as.numeric(slope), 4))
return(round(as.numeric(slope), decimal_places))
}, error = function(e) {
return(NA_real_)
})
@ -1266,7 +1259,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i])
if (length(ci_values_4week) >= 2) {
current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week)
current_stats$Four_week_trend[i] <- calculate_regression_slope(ci_values_4week, 2)
four_week_trends_calculated <- four_week_trends_calculated + 1
}
}
@ -1283,7 +1276,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
cv_values_8week <- c(cv_values_8week, current_stats$CV[i])
if (length(cv_values_8week) >= 2) {
slope <- calculate_cv_trend_long_term(cv_values_8week)
slope <- calculate_regression_slope(cv_values_8week, 2)
current_stats$CV_Trend_Long_Term[i] <- slope
cv_long_term_calculated <- cv_long_term_calculated + 1
}

View file

@ -962,7 +962,7 @@ tryCatch({
## 1. About This Document
This document is the support document to the SmartCane data file. It includes the definitions, explanatory calculations and suggestions for interpretations of the data as provided. For additional questions please feel free to contact SmartCane support, through your contact person, or via info@smartcane.org.
This document is the support document to the SmartCane data file. It includes the definitions, explanatory calculations and suggestions for interpretations of the data as provided. For additional questions please feel free to contact SmartCane support, through your contact person, or via info@smartcane.ag.
## 2. About the Data File

111
r_app/batch_pipeline_aura.R Normal file
View file

@ -0,0 +1,111 @@
# ============================================================================
# BATCH PIPELINE RUNNER: Scripts 40, 80, 90 for Multiple Dates
# ============================================================================
# Purpose: Run weekly reporting pipeline for multiple dates (Dec 3, 2025 - Feb 4, 2026)
# Project: aura
# Usage: source("r_app/batch_pipeline_aura.R")
# ============================================================================
suppressPackageStartupMessages({
library(lubridate)
library(rmarkdown)
})
# Configuration
PROJECT <- "aura"
START_DATE <- as.Date("2025-12-03")
END_DATE <- as.Date("2026-02-04")
OFFSET <- 7
# Generate date sequence (every 7 days)
date_sequence <- seq(START_DATE, END_DATE, by = "7 days")
cat("\n========================================================\n")
cat("BATCH PIPELINE RUNNER for AURA Project\n")
cat("========================================================\n")
cat(sprintf("Project: %s\n", PROJECT))
cat(sprintf("Date range: %s to %s\n", format(START_DATE), format(END_DATE)))
cat(sprintf("Interval: Every %d days\n", OFFSET))
cat(sprintf("Total dates to process: %d\n", length(date_sequence)))
cat(sprintf("Dates: %s\n", paste(format(date_sequence), collapse = ", ")))
cat("========================================================\n\n")
# Process each date
for (i in seq_along(date_sequence)) {
current_date <- date_sequence[i]
date_str <- format(current_date, "%Y-%m-%d")
cat("\n")
cat(strrep("=", 70), "\n")
cat(sprintf("PROCESSING DATE: %s (%d of %d)\n", date_str, i, length(date_sequence)))
cat(strrep("=", 70), "\n\n")
# ==== SCRIPT 40: Create Weekly Mosaic ====
cat(sprintf("[%s] Running Script 40: Weekly Mosaic Creation\n", Sys.time()))
tryCatch({
r_path <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe"
script_40 <- "r_app/40_mosaic_creation_per_field.R"
cmd_40 <- c(script_40, date_str, as.character(OFFSET), PROJECT)
result_40 <- system2(r_path, args = cmd_40)
if (result_40 == 0) {
cat(sprintf("[%s] ✓ Script 40 completed successfully\n\n", Sys.time()))
} else {
cat(sprintf("[%s] ✗ Script 40 failed with exit code %d (continuing anyway)\n\n", Sys.time(), result_40))
}
}, error = function(e) {
cat(sprintf("[ERROR] Script 40 error: %s (continuing anyway)\n\n", e$message))
})
# ==== SCRIPT 80: Calculate KPIs ====
cat(sprintf("[%s] Running Script 80: Calculate KPIs\n", Sys.time()))
tryCatch({
r_path <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe"
script_80 <- "r_app/80_calculate_kpis.R"
# Note: R80 argument order is [END_DATE] [PROJECT] [OFFSET]
cmd_80 <- c(script_80, date_str, PROJECT, as.character(OFFSET))
result_80 <- system2(r_path, args = cmd_80)
if (result_80 == 0) {
cat(sprintf("[%s] ✓ Script 80 completed successfully\n\n", Sys.time()))
} else {
cat(sprintf("[%s] ✗ Script 80 failed with exit code %d (continuing anyway)\n\n", Sys.time(), result_80))
}
}, error = function(e) {
cat(sprintf("[ERROR] Script 80 error: %s (continuing anyway)\n\n", e$message))
})
# ==== SCRIPT 90: Generate Report ====
cat(sprintf("[%s] Running Script 90: Generate Agronomic Support Report\n", Sys.time()))
tryCatch({
output_filename <- sprintf("SmartCane_Report_agronomic_support_%s_%s.docx", PROJECT, date_str)
render(
"r_app/90_CI_report_with_kpis_agronomic_support.Rmd",
params = list(data_dir = PROJECT, report_date = as.Date(date_str)),
output_file = output_filename,
output_dir = file.path("laravel_app/storage/app", PROJECT, "reports"),
quiet = FALSE
)
cat(sprintf("[%s] ✓ Script 90 completed successfully\n", Sys.time()))
cat(sprintf(" Output: laravel_app/storage/app/%s/reports/%s\n\n", PROJECT, output_filename))
}, error = function(e) {
cat(sprintf("[%s] ✗ Script 90 failed: %s (continuing anyway)\n\n", Sys.time(), e$message))
})
}
# Summary
cat("\n")
cat(strrep("=", 70), "\n")
cat("BATCH PROCESSING COMPLETE\n")
cat(strrep("=", 70), "\n")
cat(sprintf("Processed %d dates from %s to %s\n",
length(date_sequence),
format(START_DATE),
format(END_DATE)))
cat("Check output directory for generated reports\n")
cat(sprintf("Reports location: laravel_app/storage/app/%s/reports/\n", PROJECT))
cat(strrep("=", 70), "\n\n")