From 5bbdbda049170c0323bb6971453e067b7b1ec436 Mon Sep 17 00:00:00 2001 From: Timon Date: Thu, 19 Feb 2026 10:38:27 +0100 Subject: [PATCH] Refactor KPI calculation functions and enhance batch processing script for agronomic support --- r_app/80_utils_agronomic_support.R | 78 +++-------- r_app/80_utils_cane_supply.R | 131 ++++--------------- r_app/80_utils_common.R | 125 +++++++++--------- r_app/91_CI_report_with_kpis_cane_supply.Rmd | 2 +- r_app/batch_pipeline_aura.R | 111 ++++++++++++++++ 5 files changed, 214 insertions(+), 233 deletions(-) create mode 100644 r_app/batch_pipeline_aura.R diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 54bd14e..8010fed 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -850,64 +850,35 @@ calculate_all_field_analysis_agronomic_support <- function( } } else { - # Single-file mosaic (original behavior) - message("Loading current week mosaic...") - current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir) - - if (is.null(current_mosaic)) { - stop("Could not load current week mosaic") - } - - message("Extracting field statistics from current mosaic...") - current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) - - # Extract CI pixels for each field individually - ci_pixels_by_field <- list() - for (i in seq_len(nrow(field_boundaries_sf))) { - field_vect <- terra::vect(field_boundaries_sf[i, ]) - ci_pixels_by_field[[i]] <- extract_ci_values(current_mosaic, field_vect) - } + stop("ERROR: Per-field mosaic structure required (weekly_mosaic/{FIELD_NAME}/week_WW_YYYY.tif)") } # Load previous week mosaic (if available) previous_stats <- NULL - if (!is.null(previous_mosaic_dir) || is_per_field) { - target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) - message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")...")) - - if (is_per_field) { - # Try loading previous week from the same directory structure - prev_week_file <- sprintf("week_%02d_%d.tif", target_prev$week, target_prev$year) - prev_field_exists <- any(sapply(field_dirs, function(field) { - file.exists(file.path(current_mosaic_dir, field, prev_week_file)) - })) - - if (prev_field_exists) { - message(" Found previous week per-field mosaics, calculating statistics...") - previous_stats <- calculate_field_statistics( - field_boundaries_sf, - target_prev$week, - target_prev$year, - current_mosaic_dir, - report_date = Sys.Date() - 7 - ) - } else { - message(" Previous week mosaic not available - skipping area change KPI") - } - } else if (!is.null(previous_mosaic_dir)) { - previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir) - - if (!is.null(previous_mosaic)) { - previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) - } else { - message(" Previous week mosaic not available - skipping area change KPI") - } - } + target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) + message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")...")) + + # Try loading previous week from the same per-field directory structure + prev_week_file <- sprintf("week_%02d_%d.tif", target_prev$week, target_prev$year) + prev_field_exists <- any(sapply(field_dirs, function(field) { + file.exists(file.path(current_mosaic_dir, field, prev_week_file)) + })) + + if (prev_field_exists) { + message(" Found previous week per-field mosaics, calculating statistics...") + previous_stats <- calculate_field_statistics( + field_boundaries_sf, + target_prev$week, + target_prev$year, + current_mosaic_dir, + report_date = Sys.Date() - 7 + ) + } else { + message(" Previous week mosaic not available - skipping area change KPI") } # Calculate 6 KPIs message("\nCalculating KPI 1: Field Uniformity...") - if (is_per_field) { uniformity_kpi <- calculate_field_uniformity_kpi( ci_pixels_by_field, field_boundaries_sf, @@ -915,13 +886,6 @@ calculate_all_field_analysis_agronomic_support <- function( mosaic_dir = current_mosaic_dir, week_file = week_file ) - } else { - uniformity_kpi <- calculate_field_uniformity_kpi( - ci_pixels_by_field, - field_boundaries_sf, - current_mosaic - ) - } message("Calculating KPI 2: Area Change...") if (!is.null(previous_stats)) { diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 7c75958..4172d37 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -101,67 +101,31 @@ calculate_age_week <- function(planting_date, reference_date) { round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0) } -#' Assign crop phase based on age in weeks -#' -#' Determines crop phase from age in weeks using canonical PHASE_DEFINITIONS -#' from 80_utils_common.R for consistency across all workflows. -#' -#' @param age_week Numeric age in weeks -#' @return Character phase name (from PHASE_DEFINITIONS or "Unknown") -#' -#' @details -#' Uses the shared PHASE_DEFINITIONS to ensure identical phase boundaries -#' across all scripts. This wrapper delegates to get_phase_by_age() which -#' is the authoritative phase lookup function. -#' -#' Phase boundaries (from PHASE_DEFINITIONS): -#' - Germination: 0-6 weeks -#' - Tillering: 4-16 weeks -#' - Grand Growth: 17-39 weeks -#' - Maturation: 39+ weeks -calculate_phase <- function(age_week) { - # Delegate to canonical get_phase_by_age() from 80_utils_common.R - # This ensures all phase boundaries are consistent across workflows - get_phase_by_age(age_week) -} -#' Bin percentage into 10% intervals with special handling for 90-100% -#' -#' @param pct Numeric percentage value (0-100) -#' @return Character bin label -bin_percentage <- function(pct) { - if (is.na(pct)) return(NA_character_) - if (pct >= 95) return("95-100%") - else if (pct >= 90) return("90-95%") - else if (pct >= 80) return("80-90%") - else if (pct >= 70) return("70-80%") - else if (pct >= 60) return("60-70%") - else if (pct >= 50) return("50-60%") - else if (pct >= 40) return("40-50%") - else if (pct >= 30) return("30-40%") - else if (pct >= 20) return("20-30%") - else if (pct >= 10) return("10-20%") - else return("0-10%") -} -#' Calculate germination progress from CI threshold percentage -#' -#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2 -#' @return Character bin label -calculate_germination_progress <- function(pct_pixels_ci_gte_2) { - bin_percentage(pct_pixels_ci_gte_2) -} -#' Categorize CV trend (long-term slope) into qualitative labels + + + +#' Categorize regression slope into field uniformity interpretation #' -#' @param cv_slope Numeric slope from CV trend analysis -#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform" -categorize_cv_trend_long_term <- function(cv_slope) { - if (is.na(cv_slope)) { +#' **Input**: Numeric slope from `calculate_regression_slope()` applied to CV values +#' **Output**: 3-category labels for field uniformity trend +#' +#' **Categories**: +#' - "More uniform" (slope < -0.01): Field becoming more homogeneous +#' - "Stable uniformity" (-0.01 ≤ slope ≤ 0.01): Field uniformity stable +#' - "Less uniform" (slope > 0.01): Field becoming more patchy/heterogeneous +#' +#' @param slope Numeric slope from `calculate_regression_slope(cv_values)` +#' @return Character category or NA +#' +categorize_cv_trend_long_term <- function(slope) { + if (is.na(slope)) { return(NA_character_) - } else if (cv_slope < -0.01) { + } else if (slope < -0.01) { return("More uniform") - } else if (cv_slope > 0.01) { + } else if (slope > 0.01) { return("Less uniform") } else { return("Stable uniformity") @@ -235,58 +199,7 @@ calculate_status_alert <- function(imminent_prob, age_week, mean_ci, NA_character_ } -#' Calculate yield prediction for CANE_SUPPLY workflows (Wrapper) -#' -#' This function wraps the shared yield prediction model from 80_utils_common.R -#' to provide CANE_SUPPLY clients (e.g., ANGATA) with ML-based yield forecasting. -#' -#' Uses Random Forest with Forward Feature Selection trained on: -#' - Cumulative Canopy Index (CI) from growth model -#' - Days After Harvest (DAH) / crop age -#' - CI-per-day (growth velocity) -#' -#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles: -#' - Top 25%: High-yield fields -#' - Average: Mid-range yield fields -#' - Lowest 25%: Lower-yield fields -#' -#' @param field_boundaries_sf SF object with field geometries -#' @param harvesting_data Data frame with harvest history (must have tonnage_ha column) -#' @param cumulative_CI_vals_dir Directory with combined CI RDS files -#' -#' @return List with: -#' - summary: Data frame with field_groups, count, and yield quartile predictions -#' - field_results: Data frame with field-level forecasts (yield_forecast_t_ha in t/ha) -#' -#' @details -#' **Data Requirements:** -#' - harvesting_data must include tonnage_ha column (yield in t/ha) for training -#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds" -#' - If either is missing, returns graceful fallback with NA values (not fake numbers) -#' -#' **Integration:** -#' This can be called from calculate_all_field_kpis() in cane_supply workflow to add -#' a new "Yield_Forecast" column to the 22-column KPI output. -#' -#' **Example:** -#' ```r -#' yield_result <- calculate_yield_prediction_kpi_cane_supply( -#' field_boundaries_sf, -#' harvesting_data, -#' file.path(data_dir, "combined_CI") -#' ) -#' # yield_result$summary has quartiles -#' # yield_result$field_results has per-field forecasts -#' ``` -calculate_yield_prediction_kpi_cane_supply <- function(field_boundaries_sf, - harvesting_data, - cumulative_CI_vals_dir) { - - # Call the shared yield prediction function from 80_utils_common.R - result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) - - return(result) -} + #' Build complete per-field KPI dataframe with all 22 columns @@ -336,10 +249,10 @@ calculate_all_field_kpis <- function(current_stats, }, # Column 10: Phase (based on Age_week) - Phase = sapply(Age_week, calculate_phase), + Phase = sapply(Age_week, get_phase_by_age), # Column 12: Germination_progress (binned Pct_pixels_CI_gte_2) - Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress), + Germination_progress = sapply(Pct_pixels_CI_gte_2, bin_percentage), # Column 13: Imminent_prob (from script 31 or NA) Imminent_prob = { diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 7bb5c8b..c0f9ce0 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -276,23 +276,34 @@ categorize_four_week_trend <- function(ci_values_list) { } } -#' Round cloud coverage to interval categories -round_cloud_to_intervals <- function(cloud_pct_clear) { - if (is.na(cloud_pct_clear)) { - return(NA_character_) - } - - if (cloud_pct_clear < 10) return("0-10%") - if (cloud_pct_clear < 20) return("10-20%") - if (cloud_pct_clear < 30) return("20-30%") - if (cloud_pct_clear < 40) return("30-40%") - if (cloud_pct_clear < 50) return("40-50%") - if (cloud_pct_clear < 60) return("50-60%") - if (cloud_pct_clear < 70) return("60-70%") - if (cloud_pct_clear < 80) return("70-80%") - if (cloud_pct_clear < 90) return("80-90%") - if (cloud_pct_clear < 95) return("90-95%") - return("95-100%") +#' Bin a percentage value into labeled intervals +#' +#' Universal binning function for percentages (0-100) with configurable precision. +#' +#' **Used for**: +#' - Cloud coverage binning: `bin_percentage(cloud_pct_clear)` → "0-10%", "90-95%", etc. +#' - Germination progress: `bin_percentage(pct_pixels_ci_gte_2)` → percentage bins +#' - Field acreage reporting: `bin_percentage(pct_area)` → standardized bins +#' +#' **Precision**: High-precision binning for 90-100% range (split at 95%) +#' to distinguish excellent conditions from marginal ones. +#' +#' @param pct Numeric value (0-100, typically a percentage) +#' @return Character bin label (e.g., "80-90%", "95-100%") or NA +#' +bin_percentage <- function(pct) { + if (is.na(pct)) return(NA_character_) + if (pct >= 95) return("95-100%") + else if (pct >= 90) return("90-95%") + else if (pct >= 80) return("80-90%") + else if (pct >= 70) return("70-80%") + else if (pct >= 60) return("60-70%") + else if (pct >= 50) return("50-60%") + else if (pct >= 40) return("40-50%") + else if (pct >= 30) return("30-40%") + else if (pct >= 20) return("20-30%") + else if (pct >= 10) return("10-20%") + else return("0-10%") } #' Get CI percentile range (10th to 90th) @@ -320,63 +331,45 @@ calculate_cv_trend <- function(cv_current, cv_previous) { return(round(cv_current - cv_previous, 4)) } -#' Calculate four-week CI trend -calculate_four_week_trend <- function(mean_ci_values) { - if (is.null(mean_ci_values) || length(mean_ci_values) == 0) { +#' Calculate regression slope for temporal trend analysis +#' +#' Uses linear regression (lm) to compute trend slope over a time series. +#' Automatically handles NA values, short series, and errors. +#' +#' **Used for**: +#' - 4-week CI trends (growth trajectories): `calculate_regression_slope(ci_values_4week, 2)` +#' - 8-week CV trends (field uniformity change): `calculate_regression_slope(cv_values_8week, 2)` +#' - Any other multi-week trend analysis +#' +#' **Pairing**: Output slope is categorized by `categorize_cv_trend_long_term(slope)` +#' to produce labels: "More uniform" / "Stable uniformity" / "Less uniform" +#' +#' @param values Numeric vector of values (variable length, auto-determines weeks) +#' @param decimal_places Rounding precision (default: 2 for slope values) +#' +#' @return Numeric slope (negative = improving, positive = worsening) or NA +#' +#' @details +#' Implements: lm(values ~ seq_along(values)), returns coefficient for sequence index +#' with error handling for degenerate cases (NULL, empty, <2 values). +#' +calculate_regression_slope <- function(values, decimal_places = 2) { + if (is.null(values) || length(values) == 0) { return(NA_real_) } - ci_clean <- mean_ci_values[!is.na(mean_ci_values)] + clean_values <- values[!is.na(values)] - if (length(ci_clean) < 2) { + if (length(clean_values) < 2) { return(NA_real_) } - # Use linear regression slope (like agronomic_support workflow) instead of simple difference - weeks <- seq_along(ci_clean) - lm_fit <- lm(ci_clean ~ weeks) - slope <- coef(lm_fit)["weeks"] - - return(round(as.numeric(slope), 3)) -} - -#' Categorize CV slope (8-week regression) into field uniformity interpretation -categorize_cv_slope <- function(slope) { - if (is.na(slope)) { - return(NA_character_) - } - - if (slope <= CV_SLOPE_IMPROVEMENT_MIN) { - return("Excellent uniformity") - } else if (slope < CV_SLOPE_HOMOGENOUS_MIN) { - return("Homogenous growth") - } else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) { - return("Homogenous growth") - } else if (slope <= CV_SLOPE_PATCHINESS_MAX) { - return("Minor patchiness") - } else { - return("Severe fragmentation") - } -} - -#' Calculate 8-week CV trend via linear regression slope -calculate_cv_trend_long_term <- function(cv_values) { - if (is.null(cv_values) || length(cv_values) == 0) { - return(NA_real_) - } - - cv_clean <- cv_values[!is.na(cv_values)] - - if (length(cv_clean) < 2) { - return(NA_real_) - } - - weeks <- seq_along(cv_clean) + weeks <- seq_along(clean_values) tryCatch({ - lm_fit <- lm(cv_clean ~ weeks) + lm_fit <- lm(clean_values ~ weeks) slope <- coef(lm_fit)["weeks"] - return(round(as.numeric(slope), 4)) + return(round(as.numeric(slope), decimal_places)) }, error = function(e) { return(NA_real_) }) @@ -1266,7 +1259,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i]) if (length(ci_values_4week) >= 2) { - current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week) + current_stats$Four_week_trend[i] <- calculate_regression_slope(ci_values_4week, 2) four_week_trends_calculated <- four_week_trends_calculated + 1 } } @@ -1283,7 +1276,7 @@ calculate_kpi_trends <- function(current_stats, prev_stats = NULL, cv_values_8week <- c(cv_values_8week, current_stats$CV[i]) if (length(cv_values_8week) >= 2) { - slope <- calculate_cv_trend_long_term(cv_values_8week) + slope <- calculate_regression_slope(cv_values_8week, 2) current_stats$CV_Trend_Long_Term[i] <- slope cv_long_term_calculated <- cv_long_term_calculated + 1 } diff --git a/r_app/91_CI_report_with_kpis_cane_supply.Rmd b/r_app/91_CI_report_with_kpis_cane_supply.Rmd index f9edca8..bda2d71 100644 --- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd +++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd @@ -962,7 +962,7 @@ tryCatch({ ## 1. About This Document -This document is the support document to the SmartCane data file. It includes the definitions, explanatory calculations and suggestions for interpretations of the data as provided. For additional questions please feel free to contact SmartCane support, through your contact person, or via info@smartcane.org. +This document is the support document to the SmartCane data file. It includes the definitions, explanatory calculations and suggestions for interpretations of the data as provided. For additional questions please feel free to contact SmartCane support, through your contact person, or via info@smartcane.ag. ## 2. About the Data File diff --git a/r_app/batch_pipeline_aura.R b/r_app/batch_pipeline_aura.R new file mode 100644 index 0000000..1b97fc7 --- /dev/null +++ b/r_app/batch_pipeline_aura.R @@ -0,0 +1,111 @@ +# ============================================================================ +# BATCH PIPELINE RUNNER: Scripts 40, 80, 90 for Multiple Dates +# ============================================================================ +# Purpose: Run weekly reporting pipeline for multiple dates (Dec 3, 2025 - Feb 4, 2026) +# Project: aura +# Usage: source("r_app/batch_pipeline_aura.R") +# ============================================================================ + +suppressPackageStartupMessages({ + library(lubridate) + library(rmarkdown) +}) + +# Configuration +PROJECT <- "aura" +START_DATE <- as.Date("2025-12-03") +END_DATE <- as.Date("2026-02-04") +OFFSET <- 7 + +# Generate date sequence (every 7 days) +date_sequence <- seq(START_DATE, END_DATE, by = "7 days") + +cat("\n========================================================\n") +cat("BATCH PIPELINE RUNNER for AURA Project\n") +cat("========================================================\n") +cat(sprintf("Project: %s\n", PROJECT)) +cat(sprintf("Date range: %s to %s\n", format(START_DATE), format(END_DATE))) +cat(sprintf("Interval: Every %d days\n", OFFSET)) +cat(sprintf("Total dates to process: %d\n", length(date_sequence))) +cat(sprintf("Dates: %s\n", paste(format(date_sequence), collapse = ", "))) +cat("========================================================\n\n") + +# Process each date +for (i in seq_along(date_sequence)) { + current_date <- date_sequence[i] + date_str <- format(current_date, "%Y-%m-%d") + + cat("\n") + cat(strrep("=", 70), "\n") + cat(sprintf("PROCESSING DATE: %s (%d of %d)\n", date_str, i, length(date_sequence))) + cat(strrep("=", 70), "\n\n") + + # ==== SCRIPT 40: Create Weekly Mosaic ==== + cat(sprintf("[%s] Running Script 40: Weekly Mosaic Creation\n", Sys.time())) + tryCatch({ + r_path <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" + script_40 <- "r_app/40_mosaic_creation_per_field.R" + cmd_40 <- c(script_40, date_str, as.character(OFFSET), PROJECT) + + result_40 <- system2(r_path, args = cmd_40) + + if (result_40 == 0) { + cat(sprintf("[%s] ✓ Script 40 completed successfully\n\n", Sys.time())) + } else { + cat(sprintf("[%s] ✗ Script 40 failed with exit code %d (continuing anyway)\n\n", Sys.time(), result_40)) + } + }, error = function(e) { + cat(sprintf("[ERROR] Script 40 error: %s (continuing anyway)\n\n", e$message)) + }) + + # ==== SCRIPT 80: Calculate KPIs ==== + cat(sprintf("[%s] Running Script 80: Calculate KPIs\n", Sys.time())) + tryCatch({ + r_path <- "C:\\Program Files\\R\\R-4.4.3\\bin\\x64\\Rscript.exe" + script_80 <- "r_app/80_calculate_kpis.R" + # Note: R80 argument order is [END_DATE] [PROJECT] [OFFSET] + cmd_80 <- c(script_80, date_str, PROJECT, as.character(OFFSET)) + + result_80 <- system2(r_path, args = cmd_80) + + if (result_80 == 0) { + cat(sprintf("[%s] ✓ Script 80 completed successfully\n\n", Sys.time())) + } else { + cat(sprintf("[%s] ✗ Script 80 failed with exit code %d (continuing anyway)\n\n", Sys.time(), result_80)) + } + }, error = function(e) { + cat(sprintf("[ERROR] Script 80 error: %s (continuing anyway)\n\n", e$message)) + }) + + # ==== SCRIPT 90: Generate Report ==== + cat(sprintf("[%s] Running Script 90: Generate Agronomic Support Report\n", Sys.time())) + tryCatch({ + output_filename <- sprintf("SmartCane_Report_agronomic_support_%s_%s.docx", PROJECT, date_str) + + render( + "r_app/90_CI_report_with_kpis_agronomic_support.Rmd", + params = list(data_dir = PROJECT, report_date = as.Date(date_str)), + output_file = output_filename, + output_dir = file.path("laravel_app/storage/app", PROJECT, "reports"), + quiet = FALSE + ) + + cat(sprintf("[%s] ✓ Script 90 completed successfully\n", Sys.time())) + cat(sprintf(" Output: laravel_app/storage/app/%s/reports/%s\n\n", PROJECT, output_filename)) + }, error = function(e) { + cat(sprintf("[%s] ✗ Script 90 failed: %s (continuing anyway)\n\n", Sys.time(), e$message)) + }) +} + +# Summary +cat("\n") +cat(strrep("=", 70), "\n") +cat("BATCH PROCESSING COMPLETE\n") +cat(strrep("=", 70), "\n") +cat(sprintf("Processed %d dates from %s to %s\n", + length(date_sequence), + format(START_DATE), + format(END_DATE))) +cat("Check output directory for generated reports\n") +cat(sprintf("Reports location: laravel_app/storage/app/%s/reports/\n", PROJECT)) +cat(strrep("=", 70), "\n\n")