Merge remote-tracking branch 'origin/code-improvements' into translation

This commit is contained in:
Nik Verweel 2026-02-18 16:02:27 +01:00
commit 24fd5bb8b3
22 changed files with 3288 additions and 3709 deletions

20
.gitignore vendored
View file

@ -9,6 +9,14 @@
__pycache__/
*.pyc
*.pyo
*.py[cod]
*.py[cod]
myenv/myenv/
pip-wheel-metadata/
dist/
*.egg-info/
*.egg/*.egg/
.ipynb_checkpoints
# R Output Files
*.Rout
@ -16,6 +24,7 @@ __pycache__/
*.RData
*.Rdata
.Rproj.user
.Rprofile
Rplots.pdf
*.pdf
@ -48,15 +57,24 @@ reports/
# Experiment Outputs (temporary plots, analysis artifacts)
python_app/harvest_detection_experiments/*/plots/
python_app/harvest_detection_experiments/*/*.ipynb_checkpoints/
CI_report_dashboard_planet_files/
# Cache Files
rosm.cache/
*.cache
renv/cache/
# Logs
*.log
package_manager.log
# Temporary Files
*.tmp
*.save
*.bak
*.swp
*.swo
package_manager.log
# Laravel Storage (contains user data and outputs)
laravel_app/storage/app/*/Data/
laravel_app/storage/app/*/reports/

View file

@ -5,9 +5,12 @@ laravel_app/
data_validation_tool/
python_app/harvest_detection_experiments/
python_app/experiments/
r_app/old_scripts/
r_app/experiments/
phase2_refinement/
webapps/
tools/
old_sh/
output/
renv/
*.py

43
python_app/.gitignore vendored
View file

@ -1,43 +0,0 @@
# .gitignore
# Ignore the virtual environment directory
myenv/
# Byte-compiled / optimized files
__pycache__/
*.py[cod]
*$py.class
# Ignore all .pyc files in any directory
**/*.pyc
# Ignore pip wheel metadata
pip-wheel-metadata/
# Ignore the `dist/` directory used for package distributions
dist/
# Ignore Python egg metadata, regenerated from source files by setuptools
*.egg-info/
*.egg-info/*
*.egg/
# Ignore Jupyter Notebook
.ipynb_checkpoints
# Ignore VSCode directory
.vscode/
# Ignore JetBrains IDEs directory
.idea/
# Ignore MacOS directory
.DS_Store
# Ignore other unwanted file
*.log
*.bak
*.swp
*.swo
*.swp

View file

@ -111,7 +111,7 @@ def main():
# [3/4] Run model predictions with two-step detection
print("\n[3/4] Running two-step harvest detection...")
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)")
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)")
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
phase1_threshold=0.3, phase1_consecutive=2)

View file

@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128,
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
# ============================================================================
def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame:
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
features = pd.DataFrame(index=ci_series.index)
@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
ma = ci_series.rolling(window=window, min_periods=1).mean()
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
# DOY normalized
if doy_series is not None:
features['DOY_normalized'] = doy_series / 450.0
# DAH normalized (Days After Harvest)
if dah_series is not None:
features['DAH_normalized'] = dah_series / 450.0
return features.fillna(0)
@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
data_df: DataFrame with Date and CI data (may be a window after a harvest)
feature_names: List of feature names to extract
ci_column: Name of CI column
season_anchor_day: Day in FULL sequence where this season started (for DOY reset)
DOY will be recalculated as: 1, 2, 3, ... from this point
season_anchor_day: Day in FULL sequence where this season started (for DAH reset)
DAH will be recalculated as: 1, 2, 3, ... from this point
lookback_start: Starting index in original full data (for season reset calculation)
Returns:
@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
# Compute all CI features
ci_series = data_df[ci_column].astype(float)
# Compute DOY (age/days since season start) - NOT day-of-year!
# DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
# Compute DAH (age/days since season start) - NOT day-of-year!
# DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
# It only resets to 1 after a harvest is detected (new season)
doy_series = None
if 'DOY_normalized' in feature_names:
dah_series = None
if 'DAH_normalized' in feature_names:
if season_anchor_day is not None and lookback_start >= season_anchor_day:
# Season was reset after harvest. Recalculate DOY as simple counter from 1
# This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ...
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
elif 'DOY' in data_df.columns:
# Use DOY directly from CSV - already calculated as continuous age counter
doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index)
# Season was reset after harvest. Recalculate DAH as simple counter from 1
# This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ...
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
elif 'DAH' in data_df.columns:
# Use DAH directly from CSV - already calculated as continuous age counter
dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index)
else:
# Fallback: create continuous age counter (1, 2, 3, ...)
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
all_features = compute_ci_features(ci_series, doy_series)
all_features = compute_ci_features(ci_series, dah_series)
# Select requested features
requested = [f for f in feature_names if f in all_features.columns]
@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
threshold=0.45, consecutive_days=2):
"""
Phase 1: Growing window detection with DOY season reset.
Phase 1: Growing window detection with DAH season reset.
For each detected harvest, reset DOY counter for the next season.
For each detected harvest, reset DAH counter for the next season.
This allows the model to detect multiple consecutive harvests in multi-year data.
"""
harvest_dates = []

View file

@ -44,7 +44,7 @@
"7d_std",
"14d_std",
"21d_std",
"DOY_normalized"
"DAH_normalized"
],
"model": {
"type": "LSTM",

20
r_app/.gitignore vendored
View file

@ -1,20 +0,0 @@
# .gitignore
# Ignore renv binary cache
renv/cache/
renv
# Ignore temporary files
*.tmp
*.swp
*.save
# Ignore files related to Rproj
.Rproj.user/
.Rhistory
.RData
.Rprofile
# Ignore OSX files
.DS_Store
CI_report_dashboard_planet_files/

View file

@ -48,10 +48,30 @@
# #' safe_log("Check input file", "WARNING")
# #' safe_log("Failed to load data", "ERROR")
# #'
# safe_log <- function(message, level = "INFO") {
# prefix <- sprintf("[%s]", level)
# cat(sprintf("%s %s\n", prefix, message))
# }
safe_log <- function(message, level = "INFO") {
# Build the full log message with timestamp
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
log_msg <- sprintf("[%s] [%s] %s", timestamp, level, message)
# Only output to console if NOT rendering with knitr
if (!isTRUE(getOption('knitr.in.progress'))) {
cat(log_msg, "\n")
}
# Always write to log file if available
if (exists("LOG_FILE", envir = .GlobalEnv)) {
log_file <- get("LOG_FILE", envir = .GlobalEnv)
if (!is.null(log_file) && nzchar(log_file)) {
tryCatch({
cat(log_msg, "\n", file = log_file, append = TRUE)
}, error = function(e) {
# Silently fail if log file can't be written
})
}
}
invisible(log_msg)
}
# #' SmartCane Debug Logging (Conditional)
# #'

View file

@ -14,7 +14,7 @@
# OUTPUT DATA:
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
# - Format: CSV (long format)
# - Columns: field, sub_field, Date, FitData, DOY, value
# - Columns: field, sub_field, Date, FitData, DAH, value
#
# USAGE:
# Rscript 21_convert_ci_rds_to_csv.R [project]
@ -38,7 +38,7 @@
# NOTES:
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
# - Handles both wide format and long format inputs from growth model
# - DOY (Day of Year): Calculated from date for seasonal analysis
# - DAH (Days After Harvest): Calculated from date; represents crop age in days
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
# - Used by: Python harvest detection models (harvest_date_prediction.py)
# - Exports complete growth curves with interpolated values for ML training
@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) {
filter(!is.na(FitData))
}
#' Create daily interpolated sequences with DOY for each field
#' Create daily interpolated sequences with DAH for each field
#'
#' For each field/sub_field combination, creates complete daily sequences from first to last date,
#' fills in measurements, and interpolates missing dates.
#'
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
#' @return Tibble with: field, sub_field, Date, FitData, DAH, value
create_interpolated_daily_sequences <- function(ci_data_long) {
ci_data_long %>%
group_by(field, sub_field) %>%
@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
Date = date_seq,
value = NA_real_,
FitData = NA_real_,
DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
)
# Fill in actual measurement values
@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
})
) %>%
unnest(data) %>%
select(field, sub_field, Date, FitData, DOY, value) %>%
select(field, sub_field, Date, FitData, DAH, value) %>%
arrange(field, Date)
}

View file

@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
# Add additional columns
CI <- CI %>%
dplyr::mutate(
DOY = seq(1, n(), 1),
DAH = seq(1, n(), 1),
model = paste0("Data", season, " : ", field_name),
season = season,
subField = field_name

View file

@ -141,12 +141,15 @@ suppressPackageStartupMessages({
library(writexl) # For writing Excel outputs (KPI summary tables)
library(progress) # For progress bars during field processing
# ML/Analysis (optional - only for harvest model inference)
tryCatch({
library(torch) # For PyTorch model inference (harvest readiness prediction)
}, error = function(e) {
message("Note: torch package not available - harvest model inference will be skipped")
})
# ML models (for yield prediction KPI)
library(caret) # For training Random Forest with cross-validation
library(CAST) # For Forward Feature Selection in caret models
# ML models (for yield prediction KPI)
library(caret) # For training Random Forest with cross-validation
library(CAST) # For Forward Feature Selection in caret models
})
# ============================================================================
@ -316,6 +319,9 @@ main <- function() {
# Use centralized paths from setup object (no need for file.path calls)
weekly_mosaic <- setup$weekly_mosaic_dir
daily_vals_dir <- setup$daily_ci_vals_dir
reports_dir <- setup$kpi_reports_dir
data_dir <- setup$data_dir
tryCatch({
source(here("r_app", "30_growth_model_utils.R"))
@ -332,8 +338,8 @@ main <- function() {
message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
message(strrep("=", 70))
# Prepare inputs for KPI calculation (already created by setup_project_directories)
reports_dir_kpi <- setup$kpi_reports_dir
# Prepare outputs and inputs for KPI calculation (already created by setup_project_directories)
reports_dir_kpi <- file.path(setup$reports_dir, "kpis")
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
# Load field boundaries for workflow (use data_dir from setup)
@ -356,18 +362,15 @@ main <- function() {
stop("ERROR loading field boundaries: ", e$message)
})
# Load harvesting data
if (!exists("harvesting_data")) {
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
}
# Load harvesting data for yield prediction (using common helper function)
harvesting_data <- load_harvest_data(setup$data_dir)
# Extract current week/year from end_date
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%G"))
# Call with correct signature
kpi_results <- calculate_all_kpis(
kpi_results <- calculate_all_field_analysis_agronomic_support(
field_boundaries_sf = field_boundaries_sf,
current_week = current_week,
current_year = current_year,
@ -376,9 +379,14 @@ main <- function() {
ci_rds_path = NULL,
harvesting_data = harvesting_data,
output_dir = reports_dir_kpi,
data_dir = setup$data_dir,
project_dir = project_dir
)
# Extract results
field_analysis_df <- kpi_results$field_analysis_df
export_paths <- kpi_results$export_paths
cat("\n=== KPI CALCULATION COMPLETE ===\n")
cat("Summary tables saved for Script 90 integration\n")
cat("Output directory:", reports_dir_kpi, "\n\n")
@ -389,66 +397,13 @@ main <- function() {
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70))
# Set reports_dir for CANE_SUPPLY workflow (used by export functions)
reports_dir <- setup$kpi_reports_dir
# Define variables needed for workflow functions
data_dir <- setup$data_dir
# Continue with existing per-field analysis code below
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
message(strrep("-", 70))
weeks <- calculate_week_numbers(end_date)
current_week <- weeks$current_week
current_year <- weeks$current_year
previous_week <- weeks$previous_week
previous_year <- weeks$previous_year
message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))
# Find per-field weekly mosaics
message("Finding per-field weekly mosaics...")
if (!dir.exists(weekly_mosaic)) {
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
"\nScript 40 (mosaic creation) must be run first."))
}
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
if (length(field_dirs) == 0) {
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
}
# Verify we have mosaics for this week
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
per_field_files <- c()
for (field in field_dirs) {
field_mosaic_dir <- file.path(weekly_mosaic, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files <- c(per_field_files, files)
}
}
if (length(per_field_files) == 0) {
stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
"\nExpected pattern:", single_file_pattern,
"\nChecked:", weekly_mosaic))
}
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
mosaic_mode <- "per-field"
mosaic_dir <- weekly_mosaic
# Load field boundaries
# Load field boundaries for workflow (use data_dir from setup)
message("\nLoading field boundaries for KPI calculation...")
tryCatch({
boundaries_result <- load_field_boundaries(data_dir)
boundaries_result <- load_field_boundaries(setup$data_dir)
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
field_boundaries_sf <- boundaries_result$field_boundaries_sf
@ -460,524 +415,35 @@ main <- function() {
stop("No fields loaded from boundaries")
}
message(paste(" Loaded", nrow(field_boundaries_sf), "fields"))
message(paste(" Loaded", nrow(field_boundaries_sf), "fields"))
}, error = function(e) {
stop("ERROR loading field boundaries: ", e$message)
})
message("Loading historical field data for trend calculations...")
# Load up to 8 weeks (max of 4-week and 8-week trend requirements)
# Function gracefully handles missing weeks and loads whatever exists
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
# Load harvesting data for yield prediction (using common helper function)
harvesting_data <- load_harvest_data(setup$data_dir)
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
# Extract current week/year from end_date
current_week <- as.numeric(format(end_date, "%V"))
current_year <- as.numeric(format(end_date, "%G"))
historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir,
num_weeks = num_weeks_to_load,
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf,
daily_vals_dir = daily_vals_dir)
# Load harvest.xlsx for planting dates (season_start)
message("\nLoading harvest data from harvest.xlsx for planting dates...")
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
harvesting_data <- tryCatch({
if (file.exists(harvest_file_path)) {
harvest_raw <- readxl::read_excel(harvest_file_path)
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
harvest_raw
} else {
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
NULL
}
}, error = function(e) {
message(paste(" ERROR loading harvest.xlsx:", e$message))
NULL
})
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
# Validate planting_dates
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
message("WARNING: No planting dates available. Using NA for all fields.")
planting_dates <- data.frame(
field_id = field_boundaries_sf$field,
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
stringsAsFactors = FALSE
)
}
# Build per-field configuration
message("\nPreparing mosaic configuration for statistics calculation...")
message(" ✓ Using per-field mosaic architecture (1 TIFF per field)")
# Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
field_grid <- list(
mosaic_dir = mosaic_dir,
mode = "per-field"
)
message("\nUsing modular RDS-based approach for weekly statistics...")
# Load/calculate CURRENT week stats (from tiles or RDS cache)
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
current_stats <- load_or_calculate_weekly_stats(
week_num = current_week,
year = current_year,
# Call the main orchestrator function from kpi_calculation_utils.R
workflow_results <- calculate_field_analysis_cane_supply(
setup = setup,
client_config = client_config,
end_date = end_date,
project_dir = project_dir,
weekly_mosaic = weekly_mosaic,
daily_vals_dir = daily_vals_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = field_grid$mosaic_dir,
reports_dir = reports_dir,
report_date = end_date
data_dir = data_dir
)
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
# Load/calculate PREVIOUS week stats (from RDS cache or tiles)
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
# Calculate report date for previous week (7 days before current)
prev_report_date <- end_date - 7
prev_stats <- load_or_calculate_weekly_stats(
week_num = previous_week,
year = previous_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = field_grid$mosaic_dir,
reports_dir = reports_dir,
report_date = prev_report_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))
# Apply trend calculations (requires both weeks)
message("\n3. Calculating trend columns...")
current_stats <- calculate_kpi_trends(current_stats, prev_stats,
project_dir = project_dir,
reports_dir = reports_dir,
current_week = current_week,
year = current_year)
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
# Load weekly harvest probabilities from script 31 (if available)
# Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
harvest_prob_file <- file.path(harvest_prob_dir,
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({
if (file.exists(harvest_prob_file)) {
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
col_types = readr::cols(.default = readr::col_character()))
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
prob_df %>%
select(field, imminent_prob, detected_prob) %>%
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
} else {
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
NULL
}
}, error = function(e) {
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
NULL
})
# ============================================================================
# CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R)
# ============================================================================
message("\nCalculating gap filling scores (2σ method)...")
# Process per-field mosaics
message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
process_gap_for_field <- function(field_file) {
field_id <- basename(dirname(field_file))
field_bounds <- field_boundaries_by_id[[field_id]]
if (is.null(field_bounds) || nrow(field_bounds) == 0) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
tryCatch({
field_raster <- terra::rast(field_file)
ci_band_name <- "CI"
if (!(ci_band_name %in% names(field_raster))) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
field_ci_band <- field_raster[[ci_band_name]]
names(field_ci_band) <- "CI"
gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
gap_scores <- gap_result$field_results
gap_scores$Field_id <- gap_scores$field
gap_scores <- gap_scores[, c("Field_id", "gap_score")]
stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
}, error = function(e) {
message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message))
data.frame(Field_id = field_id, gap_score = NA_real_)
})
}
# Process fields sequentially with progress bar
message(" Processing gap scores for ", length(per_field_files), " fields...")
pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
results_list <- lapply(seq_along(per_field_files), function(idx) {
result <- process_gap_for_field(per_field_files[[idx]])
utils::setTxtProgressBar(pb, idx)
result
})
close(pb)
gap_scores_df <- dplyr::bind_rows(results_list)
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
gap_scores_df <- gap_scores_df %>%
dplyr::group_by(Field_id) %>%
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
} else {
message(" WARNING: No gap scores calculated from per-field mosaics")
gap_scores_df <- NULL
}
# ============================================================================
# Build final output dataframe with all 22 columns (including Gap_score)
# ============================================================================
message("\nBuilding final field analysis output...")
# Pre-calculate acreages with geometry validation
# This avoids geometry errors during field_analysis construction
acreage_lookup <- tryCatch({
lookup_df <- field_boundaries_sf %>%
sf::st_drop_geometry() %>%
as.data.frame() %>%
mutate(
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
tryCatch({
sf::st_is_valid(field_boundaries_sf[idx, ])
}, error = function(e) FALSE)
}),
area_ha = 0
)
# Calculate area for valid geometries
for (idx in which(lookup_df$geometry_valid)) {
tryCatch({
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
lookup_df$area_ha[idx] <- area_m2 / 10000
}, error = function(e) {
lookup_df$area_ha[idx] <<- NA_real_
})
}
# Convert hectares to acres
lookup_df %>%
mutate(acreage = area_ha / 0.404686) %>%
select(field, acreage)
}, error = function(e) {
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
data.frame(field = character(0), acreage = numeric(0))
})
field_analysis_df <- current_stats %>%
mutate(
# Column 2: Farm_Section (user fills)
Farm_Section = NA_character_,
# Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
Field_name = Field_id,
# Column 4: Acreage (calculate from geometry)
Acreage = {
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
if_else(is.na(acreages_vec), 0, acreages_vec)
},
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
# Column 7: Four_week_trend (from current_stats)
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
Last_harvest_or_planting_date = {
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
},
# Column 9: Age_week (calculated from report date and planting date)
Age_week = {
sapply(seq_len(nrow(current_stats)), function(idx) {
planting_dt <- Last_harvest_or_planting_date[idx]
if (is.na(planting_dt)) {
return(NA_real_)
}
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0)
})
},
# Column 10: Phase (recalculate based on updated Age_week)
Phase = {
sapply(Age_week, function(age) {
if (is.na(age)) return(NA_character_)
if (age >= 0 & age < 4) return("Germination")
if (age >= 4 & age < 17) return("Tillering")
if (age >= 17 & age < 39) return("Grand Growth")
if (age >= 39) return("Maturation")
NA_character_
})
},
# Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends)
# Column 12: Germination_progress (calculated here from CI values)
# Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}),
# Column 13: Imminent_prob (from script 31 or NA if not available)
Imminent_prob = {
if (!is.null(imminent_prob_data)) {
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
},
# Column 14: Status_Alert (based on harvest probability + crop health status)
# Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA
Status_Alert = {
sapply(seq_len(nrow(current_stats)), function(idx) {
imminent_prob <- Imminent_prob[idx]
age_w <- Age_week[idx]
weekly_ci_chg <- Weekly_ci_change[idx]
mean_ci_val <- Mean_CI[idx]
# Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months)
if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) {
return("Ready for harvest-check")
}
# Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5)
if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) {
return("Strong decline in crop health")
}
# Priority 3: Harvested/bare (Mean CI < 1.5)
if (!is.na(mean_ci_val) && mean_ci_val < 1.5) {
return("Harvested/bare")
}
# Fallback: no alert
NA_character_
})
},
# Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
# Column 17: Already in current_stats (CV)
# Column 18: Already in current_stats (CV_Trend_Short_Term)
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
# Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
# 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01)
CV_Trend_Long_Term_Category = {
sapply(current_stats$CV_Trend_Long_Term, function(slope) {
if (is.na(slope)) {
return(NA_character_)
} else if (slope < -0.01) {
return("More uniform")
} else if (slope > 0.01) {
return("Less uniform")
} else {
return("Stable uniformity")
}
})
},
# Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
# Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}),
# Column 22: Gap_score (2σ below median - from kpi_utils.R)
Gap_score = {
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
# Debug: Print first few gap scores
message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)",
nrow(gap_scores_df),
paste(head(gap_scores_df$gap_score, 3), collapse=", ")))
message(sprintf(" First 3 Field_ids in gap_scores_df: %s",
paste(head(gap_scores_df$Field_id, 3), collapse=", ")))
message(sprintf(" First 3 Field_ids in current_stats: %s",
paste(head(current_stats$Field_id, 3), collapse=", ")))
gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
}
) %>%
select(
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
"Last_harvest_or_planting_date", "Age_week", "Phase",
"Germination_progress",
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
"Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
)
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)"))
export_paths <- export_field_analysis_excel(
field_analysis_df,
NULL,
project_dir,
current_week,
current_year,
reports_dir
)
cat("\n--- Per-field Results (first 10) ---\n")
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
if (length(available_cols) > 0) {
print(head(field_analysis_df[, available_cols], 10))
}
# ========== FARM-LEVEL KPI AGGREGATION ==========
# Aggregate the per-field analysis into farm-level summary statistics
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
# Filter to only fields that have actual data (non-NA CI and valid acreage)
field_data <- field_analysis_df %>%
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
filter(Acreage > 0)
if (nrow(field_data) > 0) {
if (nrow(field_data) > 0) {
# Create summary statistics
farm_summary <- list()
# 1. PHASE DISTRIBUTION
phase_dist <- field_data %>%
group_by(Phase) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Phase)
farm_summary$phase_distribution <- phase_dist
# 2. STATUS ALERT DISTRIBUTION
status_dist <- field_data %>%
group_by(Status_Alert) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Status_Alert)
farm_summary$status_distribution <- status_dist
# 3. CLOUD COVERAGE DISTRIBUTION
cloud_dist <- field_data %>%
group_by(Cloud_category) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Cloud_category)
farm_summary$cloud_distribution <- cloud_dist
# 4. OVERALL STATISTICS
farm_summary$overall_stats <- data.frame(
total_fields = nrow(field_data),
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
week = current_week,
year = current_year,
date = as.character(end_date)
)
# Print summaries
cat("\n--- PHASE DISTRIBUTION ---\n")
print(phase_dist)
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
print(status_dist)
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
print(cloud_dist)
cat("\n--- OVERALL FARM STATISTICS ---\n")
print(farm_summary$overall_stats)
farm_kpi_results <- farm_summary
} else {
farm_kpi_results <- NULL
}
} else {
farm_kpi_results <- NULL
}
# ========== FINAL SUMMARY ==========
cat("\n", strrep("=", 70), "\n")
cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
cat(strrep("=", 70), "\n")
cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
cat("Excel export:", export_paths$excel, "\n")
cat("RDS export:", export_paths$rds, "\n")
cat("CSV export:", export_paths$csv, "\n")
if (!is.null(farm_kpi_results)) {
cat("\nFarm-level KPIs: CALCULATED\n")
} else {
cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
}
cat("\n✓ Consolidated KPI calculation complete!\n")
cat(" - Per-field data exported\n")
cat(" - Farm-level KPIs calculated\n")
cat(" - All outputs in:", reports_dir, "\n\n")
# Extract results
field_analysis_df <- workflow_results$field_analysis_df
farm_kpi_results <- workflow_results$farm_kpi_results
export_paths <- workflow_results$export_paths
} else {
# Unknown client type - log warning and exit

File diff suppressed because it is too large Load diff

View file

@ -27,173 +27,685 @@ library(tidyr)
library(readxl)
library(writexl)
# ============================================================================
# ALERT THRESHOLDS & CONFIGURATION CONSTANTS
# ============================================================================
# CI change thresholds for alert categorization and status determination
# These values are project-standard and should be consistent across all workflows
CI_CHANGE_RAPID_GROWTH_THRESHOLD <- 0.5 # Weekly CI change for positive growth alert
CI_CHANGE_POSITIVE_GROWTH_THRESHOLD <- 0.2 # Weekly CI change for acceptable growth
CI_CHANGE_STABLE_THRESHOLD <- -0.2 # Weekly CI change for stable status (between -0.2 and +0.2)
CI_CHANGE_STRESS_TREND_THRESHOLD <- -0.3 # 4-week trend threshold for stress detection
CI_CHANGE_RAPID_DECLINE_THRESHOLD <- -0.5 # Weekly CI change for rapid decline alert
# Deprecated aliases (for backward compatibility if needed):
CI_CHANGE_DECLINE_THRESHOLD <- CI_CHANGE_RAPID_DECLINE_THRESHOLD # Weekly CI change threshold for decline alerts
CI_CHANGE_INCREASE_THRESHOLD <- CI_CHANGE_RAPID_GROWTH_THRESHOLD # Weekly CI change threshold for increase alerts
# ============================================================================
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
# ============================================================================
#' Placeholder: ANGATA harvest readiness assessment
#'
#' Future implementation will integrate ANGATA-specific harvest readiness criteria:
#' - Maturation phase detection (CI threshold-based)
#' - Field age tracking (days since planting)
#' - Weather-based ripeness indicators (if available)
#' - Historical yield correlations
#'
#' @param field_ci CI values for the field
#' @param field_age_days Days since planting
#'
#' @return Character string with harvest readiness assessment
assess_harvest_readiness <- function(field_ci, field_age_days = NULL) {
# Placeholder implementation
# Real version would check:
# - Mean CI > 3.5 (maturation threshold)
# - Age > 350 days
# - Weekly growth rate < threshold (mature plateau)
#' Calculate acreage for each field from geometry
#' @param field_boundaries_sf sf object with field geometries
#' @return data.frame with field and acreage columns
calculate_field_acreages <- function(field_boundaries_sf) {
tryCatch({
# Project to equal-area CRS (EPSG:6933) for accurate area calculations
field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933")
if (is.null(field_ci) || all(is.na(field_ci))) {
return("No data available")
lookup_df <- field_boundaries_proj %>%
sf::st_drop_geometry() %>%
as.data.frame() %>%
mutate(
geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) {
tryCatch({
sf::st_is_valid(field_boundaries_proj[idx, ])
}, error = function(e) FALSE)
}),
area_ha = 0
)
# Calculate area for valid geometries
valid_indices <- which(lookup_df$geometry_valid)
areas_ha <- vapply(valid_indices, function(idx) {
tryCatch({
area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ]))
area_m2 / 10000
}, error = function(e) NA_real_)
}, numeric(1))
lookup_df$area_ha[valid_indices] <- areas_ha
# Convert hectares to acres
lookup_df %>%
mutate(acreage = area_ha / 0.404686) %>%
# Aggregate by field to handle multi-row fields (e.g., sub_fields)
group_by(field) %>%
summarise(acreage = sum(acreage, na.rm = TRUE), .groups = "drop") %>%
select(field, acreage)
}, error = function(e) {
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
data.frame(field = character(0), acreage = numeric(0))
})
}
#' Calculate age in weeks from planting date
#'
#' @param planting_date Date of planting
#' @param reference_date Date to calculate age relative to (typically end_date)
#' @return Numeric age in weeks (rounded to nearest week)
calculate_age_week <- function(planting_date, reference_date) {
if (is.na(planting_date)) {
return(NA_real_)
}
round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0)
}
mean_ci <- mean(field_ci, na.rm = TRUE)
#' Assign crop phase based on age in weeks
#'
#' Determines crop phase from age in weeks using canonical PHASE_DEFINITIONS
#' from 80_utils_common.R for consistency across all workflows.
#'
#' @param age_week Numeric age in weeks
#' @return Character phase name (from PHASE_DEFINITIONS or "Unknown")
#'
#' @details
#' Uses the shared PHASE_DEFINITIONS to ensure identical phase boundaries
#' across all scripts. This wrapper delegates to get_phase_by_age() which
#' is the authoritative phase lookup function.
#'
#' Phase boundaries (from PHASE_DEFINITIONS):
#' - Germination: 0-6 weeks
#' - Tillering: 4-16 weeks
#' - Grand Growth: 17-39 weeks
#' - Maturation: 39+ weeks
calculate_phase <- function(age_week) {
# Delegate to canonical get_phase_by_age() from 80_utils_common.R
# This ensures all phase boundaries are consistent across workflows
get_phase_by_age(age_week)
}
if (mean_ci > 3.5) {
return("Ready for harvest")
} else if (mean_ci > 2.5) {
return("Approaching harvest readiness")
#' Bin percentage into 10% intervals with special handling for 90-100%
#'
#' @param pct Numeric percentage value (0-100)
#' @return Character bin label
bin_percentage <- function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}
#' Calculate germination progress from CI threshold percentage
#'
#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2
#' @return Character bin label
calculate_germination_progress <- function(pct_pixels_ci_gte_2) {
bin_percentage(pct_pixels_ci_gte_2)
}
#' Categorize CV trend (long-term slope) into qualitative labels
#'
#' @param cv_slope Numeric slope from CV trend analysis
#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform"
categorize_cv_trend_long_term <- function(cv_slope) {
if (is.na(cv_slope)) {
return(NA_character_)
} else if (cv_slope < -0.01) {
return("More uniform")
} else if (cv_slope > 0.01) {
return("Less uniform")
} else {
return("Not ready - continue monitoring")
return("Stable uniformity")
}
}
#' Placeholder: ANGATA supply chain status flags
#' Determine status alert for CANE_SUPPLY client (harvest/milling workflow)
#'
#' Future implementation will add supply chain-specific status indicators:
#' - Harvest scheduling readiness
#' - Equipment availability impact
#' - Transportation/logistics flags
#' - Quality parameter flags
#' Alerts focus on: harvest readiness, crop health monitoring, exception detection
#' Uses WEEKLY trends (Four_week_trend) not daily noise for consistency
#' Designed for harvest/milling clients who manage expectation, not daily operations
#'
#' @param field_analysis Data frame with field analysis results
#' Priority order:
#' 1. harvest_ready → Schedule harvest operations
#' 2. harvested_bare → Record completion / detect unexpected harvest
#' 3. stress_detected → Monitor crop health (consistent decline)
#' 4. germination_delayed → Early warning for young fields
#' 5. growth_on_track → Positive signal (no action needed)
#' 6. NA → Normal growth (no alert)
#'
#' @return Data frame with supply chain status columns
assess_supply_chain_status <- function(field_analysis) {
# Placeholder: return field analysis as-is
# Real version would add columns for:
# - schedule_ready (bool)
# - harvest_window_days (numeric)
# - transportation_priority (char)
# - quality_flags (char)
#' @param imminent_prob Numeric harvest probability (0-1)
#' @param age_week Numeric age in weeks since planting/harvest
#' @param mean_ci Numeric mean Chlorophyll Index
#' @param four_week_trend Numeric 4-week trend in CI (slope of growth)
#' @param weekly_ci_change Numeric week-over-week CI change
#' @param cv Numeric coefficient of variation (field uniformity)
#' @return Character status alert code or NA
calculate_status_alert <- function(imminent_prob, age_week, mean_ci,
four_week_trend, weekly_ci_change, cv) {
return(field_analysis)
# Priority 1: HARVEST READY - highest business priority
# Field is mature (≥12 months) AND harvest model predicts imminent harvest
if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) {
return("harvest_ready")
}
# Priority 2: HARVESTED/BARE - indicator of completion (or unexpected harvest)
# Mean CI dropped below vegetative threshold
if (!is.na(mean_ci) && mean_ci < 1.5) {
return("harvested_bare")
}
# Priority 3: STRESS DETECTED - consistent health decline (weekly trend)
# Uses Four_week_trend (smooth trend) NOT daily fluctuation to avoid noise
# Crop declining but not yet bare → opportunity to investigate
# References: CI_CHANGE_STRESS_TREND_THRESHOLD for 4-week trend detection
if (!is.na(four_week_trend) && four_week_trend < CI_CHANGE_STRESS_TREND_THRESHOLD &&
!is.na(mean_ci) && mean_ci > 1.5) {
return("stress_detected")
}
# Priority 4: GERMINATION DELAYED - early warning for young fields
# Age 4-8 weeks is typical germination window; low CI = slow start
if (!is.na(age_week) && age_week >= 4 && age_week <= 8 &&
!is.na(mean_ci) && mean_ci < 1.5) {
return("germination_delayed")
}
# Priority 5: GROWTH ON TRACK - positive operational status
# Field is healthy, uniform, and growing steadily (no action needed)
# Conditions: good uniformity (CV < 0.15) AND stable/positive weekly trend
# References: CI_CHANGE_STABLE_THRESHOLD (±0.2 = stable, no change)
if (!is.na(cv) && cv < 0.15 &&
!is.na(four_week_trend) && four_week_trend >= CI_CHANGE_STABLE_THRESHOLD &&
!is.na(weekly_ci_change) && weekly_ci_change >= CI_CHANGE_STABLE_THRESHOLD) {
return("growth_on_track")
}
# Default: NORMAL GROWTH (no specific alert)
# Field is growing but may have minor variability; continues normal monitoring
NA_character_
}
#' Calculate yield prediction for CANE_SUPPLY workflows (Wrapper)
#'
#' This function wraps the shared yield prediction model from 80_utils_common.R
#' to provide CANE_SUPPLY clients (e.g., ANGATA) with ML-based yield forecasting.
#'
#' Uses Random Forest with Forward Feature Selection trained on:
#' - Cumulative Canopy Index (CI) from growth model
#' - Days After Harvest (DAH) / crop age
#' - CI-per-day (growth velocity)
#'
#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
#' - Top 25%: High-yield fields
#' - Average: Mid-range yield fields
#' - Lowest 25%: Lower-yield fields
#'
#' @param field_boundaries_sf SF object with field geometries
#' @param harvesting_data Data frame with harvest history (must have tonnage_ha column)
#' @param cumulative_CI_vals_dir Directory with combined CI RDS files
#'
#' @return List with:
#' - summary: Data frame with field_groups, count, and yield quartile predictions
#' - field_results: Data frame with field-level forecasts (yield_forecast_t_ha in t/ha)
#'
#' @details
#' **Data Requirements:**
#' - harvesting_data must include tonnage_ha column (yield in t/ha) for training
#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
#' - If either is missing, returns graceful fallback with NA values (not fake numbers)
#'
#' **Integration:**
#' This can be called from calculate_all_field_kpis() in cane_supply workflow to add
#' a new "Yield_Forecast" column to the 22-column KPI output.
#'
#' **Example:**
#' ```r
#' yield_result <- calculate_yield_prediction_kpi_cane_supply(
#' field_boundaries_sf,
#' harvesting_data,
#' file.path(data_dir, "combined_CI")
#' )
#' # yield_result$summary has quartiles
#' # yield_result$field_results has per-field forecasts
#' ```
calculate_yield_prediction_kpi_cane_supply <- function(field_boundaries_sf,
harvesting_data,
cumulative_CI_vals_dir) {
# Call the shared yield prediction function from 80_utils_common.R
result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir)
return(result)
}
#' Build complete per-field KPI dataframe with all 22 columns
#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats
#' @param planting_dates data.frame with field_id and planting_date columns
#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL)
#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL)
#' @param field_boundaries_sf sf object with field geometries
#' @param end_date Date object for current report date
#' @return data.frame with all 22 KPI columns
calculate_all_field_kpis <- function(current_stats,
planting_dates,
imminent_prob_data,
gap_scores_df,
field_boundaries_sf,
end_date) {
message("\nBuilding final field analysis output...")
# Pre-calculate acreages
acreage_lookup <- calculate_field_acreages(field_boundaries_sf)
field_analysis_df <- current_stats %>%
mutate(
# Column 2: Farm_Section (user fills manually)
Farm_Section = NA_character_,
# Column 3: Field_name (from GeoJSON)
Field_name = Field_id,
# Column 4: Acreage (from geometry)
Acreage = {
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
if_else(is.na(acreages_vec), 0, acreages_vec)
},
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx)
Last_harvest_or_planting_date = {
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
},
# Column 9: Age_week (calculated)
Age_week = {
sapply(seq_len(nrow(current_stats)), function(idx) {
calculate_age_week(Last_harvest_or_planting_date[idx], end_date)
})
},
# Column 10: Phase (based on Age_week)
Phase = sapply(Age_week, calculate_phase),
# Column 12: Germination_progress (binned Pct_pixels_CI_gte_2)
Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress),
# Column 13: Imminent_prob (from script 31 or NA)
Imminent_prob = {
if (!is.null(imminent_prob_data)) {
as.numeric(imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)])
} else {
rep(NA_real_, nrow(current_stats))
}
},
# Column 14: Status_Alert (multi-priority logic for harvest/milling workflow)
Status_Alert = {
sapply(seq_len(nrow(current_stats)), function(idx) {
calculate_status_alert(
imminent_prob = Imminent_prob[idx],
age_week = Age_week[idx],
mean_ci = Mean_CI[idx],
four_week_trend = Four_week_trend[idx],
weekly_ci_change = Weekly_ci_change[idx],
cv = CV[idx]
)
})
},
# Column 19b: CV_Trend_Long_Term_Category (categorical slope)
CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term),
# Column 21: Cloud_pct_clear (binned into intervals)
Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage),
# Column 22: Gap_score (2σ method)
Gap_score = {
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
}
) %>%
select(
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
"Last_harvest_or_planting_date", "Age_week", "Phase",
"Germination_progress",
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
"Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
)
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns"))
return(field_analysis_df)
}
#' Aggregate per-field data into farm-level KPI summary
#'
#' @param field_analysis_df data.frame with per-field KPI data
#' @param current_week Numeric current week number
#' @param current_year Numeric current year
#' @param end_date Date object for current report date
#' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats
calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) {
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
# Filter to only fields with actual data
field_data <- field_analysis_df %>%
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
filter(Acreage > 0)
if (nrow(field_data) == 0) {
message("No valid field data for farm-level aggregation")
return(NULL)
}
farm_summary <- list()
# 1. PHASE DISTRIBUTION
phase_dist <- field_data %>%
group_by(Phase) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Phase)
farm_summary$phase_distribution <- phase_dist
# 2. STATUS ALERT DISTRIBUTION
status_dist <- field_data %>%
group_by(Status_Alert) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Status_Alert)
farm_summary$status_distribution <- status_dist
# 3. CLOUD COVERAGE DISTRIBUTION
cloud_dist <- field_data %>%
group_by(Cloud_category) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Cloud_category)
farm_summary$cloud_distribution <- cloud_dist
# 4. OVERALL STATISTICS
farm_summary$overall_stats <- data.frame(
total_fields = nrow(field_data),
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
week = current_week,
year = current_year,
date = as.character(end_date)
)
# Print summaries
cat("\n--- PHASE DISTRIBUTION ---\n")
print(phase_dist)
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
print(status_dist)
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
print(cloud_dist)
cat("\n--- OVERALL FARM STATISTICS ---\n")
print(farm_summary$overall_stats)
return(farm_summary)
}
# ============================================================================
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
# ============================================================================
#' Orchestrate ANGATA weekly field analysis and reporting
#' Main orchestrator for CANE_SUPPLY per-field KPI workflow
#'
#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows.
#' Currently uses common utilities; future versions will add client-specific logic.
#' This function coordinates all KPI calculations for the per-field analysis workflow.
#' It loads historical data, calculates current/previous week statistics, computes
#' all 22 KPI columns, and aggregates farm-level summaries.
#'
#' @param field_boundaries_sf SF object with field geometries
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year
#' @param mosaic_dir Directory containing weekly mosaics
#' @param field_boundaries_path Path to field GeoJSON
#' @param harvesting_data Data frame with harvest data (optional)
#' @param output_dir Directory for exports
#' @param data_dir Base data directory
#'
#' @return List with field analysis results
#'
#' @details
#' This function:
#' 1. Loads weekly mosaic and extracts field statistics
#' 2. Calculates field statistics (using common utilities)
#' 3. Prepares field analysis summary
#' 4. Exports to Excel/CSV/RDS
#' 5. (Future) Applies ANGATA-specific assessments
#'
calculate_field_analysis_cane_supply <- function(
#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.)
#' @param client_config List with workflow configuration (script_91_compatible, outputs)
#' @param end_date Date object for current report date
#' @param project_dir Character project identifier
#' @param weekly_mosaic Character path to weekly mosaic directory
#' @param daily_vals_dir Character path to daily values directory
#' @param field_boundaries_sf sf object with field geometries
#' @param data_dir Character path to data directory
#' @return List with field_analysis_df, farm_kpi_results, export_paths
calculate_field_analysis_cane_supply <- function(setup,
client_config,
end_date,
project_dir,
weekly_mosaic,
daily_vals_dir,
field_boundaries_sf,
current_week,
current_year,
mosaic_dir,
field_boundaries_path = NULL,
harvesting_data = NULL,
output_dir = file.path(PROJECT_DIR, "output"),
data_dir = NULL
) {
data_dir) {
message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============")
message("\n", strrep("=", 70))
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70))
# Load current week mosaic
message("Loading current week mosaic...")
current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year)
reports_dir <- file.path(setup$reports_dir, "kpis")
if (is.null(current_mosaic)) {
warning(paste("Could not load current week mosaic for week", current_week, current_year))
return(NULL)
# ========== PHASE 1: WEEKLY ANALYSIS SETUP ==========
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
message(strrep("-", 70))
weeks <- calculate_week_numbers(end_date)
current_week <- weeks$current_week
current_year <- weeks$current_year
previous_week <- weeks$previous_week
previous_year <- weeks$previous_year
message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))
# Find per-field weekly mosaics
message("Finding per-field weekly mosaics...")
if (!dir.exists(weekly_mosaic)) {
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
"\nScript 40 (mosaic creation) must be run first."))
}
# Extract field statistics
message("Extracting field statistics from current mosaic...")
field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
# Load previous week stats for comparison
message("Loading historical data for trends...")
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
previous_stats <- NULL
previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year)
if (!is.null(previous_mosaic)) {
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
if (length(field_dirs) == 0) {
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
}
# Calculate 4-week historical trend
message("Calculating field trends...")
ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds")
# Verify we have mosaics for this week
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
per_field_files <- c()
for (field in field_dirs) {
field_mosaic_dir <- file.path(weekly_mosaic, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files <- c(per_field_files, files)
}
}
field_analysis <- calculate_field_statistics(
field_stats = field_stats,
previous_stats = previous_stats,
if (length(per_field_files) == 0) {
stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
"\nExpected pattern:", single_file_pattern,
"\nChecked:", weekly_mosaic))
}
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
# ========== PHASE 2: LOAD HISTORICAL DATA ==========
message("\nLoading historical field data for trend calculations...")
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
historical_data <- load_historical_field_data(
project_dir, current_week, current_year, reports_dir,
num_weeks = num_weeks_to_load,
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf,
daily_vals_dir = daily_vals_dir
)
# ========== PHASE 3: LOAD PLANTING DATES ==========
message("\nLoading harvest data from harvest.xlsx for planting dates...")
# Use load_harvest_data() from 80_utils_common.R for consistency with 80_calculate_kpis.R
harvesting_data <- load_harvest_data(data_dir)
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
message("WARNING: No planting dates available. Using NA for all fields.")
planting_dates <- data.frame(
field_id = field_boundaries_sf$field,
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
stringsAsFactors = FALSE
)
}
# ========== PHASE 4: CALCULATE WEEKLY STATISTICS ==========
message("\nUsing modular RDS-based approach for weekly statistics...")
# Current week
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
current_stats <- load_or_calculate_weekly_stats(
week_num = current_week,
year = current_year,
ci_rds_path = ci_rds_path,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
harvesting_data = harvesting_data
mosaic_dir = weekly_mosaic,
reports_dir = reports_dir,
report_date = end_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
# Previous week
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
prev_report_date <- end_date - 7
prev_stats <- load_or_calculate_weekly_stats(
week_num = previous_week,
year = previous_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = weekly_mosaic,
reports_dir = reports_dir,
report_date = prev_report_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
# ========== PHASE 5: CALCULATE TRENDS ==========
message("\n3. Calculating trend columns...")
current_stats <- calculate_kpi_trends(
current_stats, prev_stats,
project_dir = project_dir,
reports_dir = reports_dir,
current_week = current_week,
year = current_year
)
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
# ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
message("\n4. Loading harvest probabilities from script 31...")
# Use get_harvest_output_path() to safely build path (stored in kpi_reports_dir)
harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year)
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({
if (file.exists(harvest_prob_file)) {
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
col_types = readr::cols(.default = readr::col_character()))
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
prob_df %>%
select(field, imminent_prob, detected_prob) %>%
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
} else {
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
NULL
}
}, error = function(e) {
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
NULL
})
# ========== PHASE 7: CALCULATE GAP SCORES ==========
gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf)
# ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ==========
field_analysis_df <- calculate_all_field_kpis(
current_stats = current_stats,
planting_dates = planting_dates,
imminent_prob_data = imminent_prob_data,
gap_scores_df = gap_scores_df,
field_boundaries_sf = field_boundaries_sf,
end_date = end_date
)
if (is.null(field_analysis)) {
message("Could not generate field analysis")
return(NULL)
}
# Generate summary
message("Generating summary statistics...")
summary_df <- generate_field_analysis_summary(field_analysis)
# Export
message("Exporting field analysis...")
# ========== PHASE 9: EXPORT PER-FIELD RESULTS ==========
export_paths <- export_field_analysis_excel(
field_analysis,
summary_df,
PROJECT_DIR,
field_analysis_df,
NULL,
project_dir,
current_week,
current_year,
output_dir
reports_dir
)
message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n"))
# cat("\n--- Per-field Results (first 10) ---\n")
# available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
# available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
# if (length(available_cols) > 0) {
# print(head(field_analysis_df[, available_cols], 10))
# }
result <- list(
field_analysis = field_analysis,
summary = summary_df,
exports = export_paths
)
# ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ==========
# farm_kpi_results <- calculate_farm_level_kpis(
# field_analysis_df,
# current_week,
# current_year,
# end_date
# )
return(result)
# For now, farm-level KPIs are not implemented in CANE_SUPPLY workflow
farm_kpi_results <- NULL
# ========== RETURN RESULTS ==========
return(list(
field_analysis_df = field_analysis_df,
farm_kpi_results = farm_kpi_results,
export_paths = export_paths
))
}
# ============================================================================

View file

@ -8,10 +8,34 @@
# - Field statistics extraction
# - Week/year calculations for consistent date handling
# - Excel/CSV/RDS export utilities
# - Yield prediction using ML models (Random Forest with Feature Selection)
#
# Used by: 80_calculate_kpis.R, all client-specific utils files
#
# NOTE: Libraries required by yield prediction (caret, CAST, here) are loaded
# in the main script 80_calculate_kpis.R, not here. This keeps dependencies
# centralized in the orchestrator script.
# ============================================================================
# ============================================================================
# LOAD PROJECT CONFIGURATION (Guard against re-sourcing)
# ============================================================================
# Ensure parameters_project.R has been sourced to provide global configuration
# (PROJECT, data_dir, field_boundaries_path, etc.). Use a sentinel to avoid double-sourcing.
if (!exists("PROJECT", envir = .GlobalEnv)) {
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
# Fallback: try relative path if here() doesn't work
tryCatch({
source("parameters_project.R")
}, error = function(e2) {
warning(paste("Could not source parameters_project.R:", e2$message,
"- using defaults or expecting caller to set PROJECT/data_dir"))
})
})
}
# ============================================================================
# CONSTANTS (from 80_calculate_kpis.R)
# ============================================================================
@ -355,11 +379,10 @@ calculate_cv_trend_long_term <- function(cv_values) {
}
#' Calculate Gap Filling Score KPI (2σ method)
#' @param ci_raster Current week CI raster
#' @param field_boundaries Field boundaries
#' @return Data frame with field-level gap filling scores
#' @param ci_raster Current week CI raster (single band)
#' @param field_boundaries Field boundaries (sf or SpatVector)
#' @return List with summary data frame and field-level results data frame
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
safe_log("Calculating Gap Filling Score KPI (placeholder)")
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
@ -368,19 +391,11 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
field_boundaries_vect <- field_boundaries
}
# Ensure field_boundaries_vect is valid and matches field_boundaries dimensions
n_fields_vect <- length(field_boundaries_vect)
n_fields_sf <- nrow(field_boundaries)
if (n_fields_sf != n_fields_vect) {
warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length."))
}
field_results <- data.frame()
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
field_vect <- field_boundaries_vect[i]
# Extract CI values using helper function
@ -391,16 +406,17 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
# Gap score using 2σ below median to detect outliers
median_ci <- median(valid_values)
sd_ci <- sd(valid_values)
outlier_threshold <- median_ci - (2 * sd_ci)
outlier_threshold <- median_ci - (1 * sd_ci)
low_ci_pixels <- sum(valid_values < outlier_threshold)
total_pixels <- length(valid_values)
gap_score <- (low_ci_pixels / total_pixels) * 100
gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
# Classify gap severity
gap_level <- dplyr::case_when(
gap_score < 10 ~ "Minimal",
gap_score < 25 ~ "Moderate",
TRUE ~ "Significant"
gap_score >= 25 ~ "Significant",
TRUE ~ NA_character_
)
field_results <- rbind(field_results, data.frame(
@ -412,7 +428,6 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
outlier_threshold = outlier_threshold
))
} else {
# Not enough valid data, fill with NA row
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
@ -423,9 +438,99 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
))
}
}
# Summarize results
gap_summary <- field_results %>%
dplyr::group_by(gap_level) %>%
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
return(list(summary = gap_summary, field_results = field_results))
}
#' Calculate gap filling scores for all per-field mosaics (wrapper)
#'
#' This wrapper handles per-field mosaic structure by iterating over
#' individual field files and calling the basic KPI function
#'
#' @param per_field_files Character vector of paths to per-field mosaic TIFFs
#' @param field_boundaries_sf sf object with field geometries
#' @return data.frame with Field_id and gap_score columns
calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
message("\nCalculating gap filling scores (2σ method)...")
message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
process_gap_for_field <- function(field_file) {
field_id <- basename(dirname(field_file))
field_bounds <- field_boundaries_by_id[[field_id]]
if (is.null(field_bounds) || nrow(field_bounds) == 0) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
tryCatch({
field_raster <- terra::rast(field_file)
ci_band_name <- "CI"
if (!(ci_band_name %in% names(field_raster))) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
field_ci_band <- field_raster[[ci_band_name]]
names(field_ci_band) <- "CI"
gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
return(data.frame(Field_id = field_id, gap_score = NA_real_))
}
gap_scores <- gap_result$field_results
gap_scores$Field_id <- gap_scores$field
gap_scores <- gap_scores[, c("Field_id", "gap_score")]
stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
}, error = function(e) {
message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message))
data.frame(Field_id = field_id, gap_score = NA_real_)
})
}
# Process fields sequentially with progress bar
message(" Processing gap scores for ", length(per_field_files), " fields...")
pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
results_list <- lapply(seq_along(per_field_files), function(idx) {
result <- process_gap_for_field(per_field_files[[idx]])
utils::setTxtProgressBar(pb, idx)
result
})
close(pb)
gap_scores_df <- dplyr::bind_rows(results_list)
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
gap_scores_df <- gap_scores_df %>%
dplyr::group_by(Field_id) %>%
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
# Guard against all-NA values which would produce Inf/-Inf warnings
if (any(is.finite(gap_scores_df$gap_score))) {
min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2)
max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2)
message(paste(" Gap score range:", min_score, "-", max_score, "%"))
} else {
message(" Gap score range: All values are NA (no valid gap scores)")
}
} else {
message(" WARNING: No gap scores calculated from per-field mosaics")
gap_scores_df <- NULL
}
return(gap_scores_df)
}
# ============================================================================
# HELPER FUNCTIONS
@ -517,6 +622,88 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
})
}
# ============================================================================
# DATA LOADING HELPERS
# ============================================================================
#' Load and validate harvest data from harvest.xlsx
#'
#' Encapsulates harvest data loading with validation, type coercion, and error handling.
#' Returns a data frame with required columns (field, year, tonnage_ha) or an empty
#' data frame with proper structure if loading fails.
#'
#' @param data_dir Path to data directory containing harvest.xlsx
#'
#' @return data.frame with columns: field (character), year (numeric), tonnage_ha (numeric)
#' - On success: data frame with N rows of harvest records
#' - On failure: empty data frame with correct structure (0 rows, 3 columns)
#'
#' @details
#' **File Location**: Expected at `file.path(data_dir, "harvest.xlsx")`
#'
#' **Validation**:
#' - Checks file existence before reading
#' - Validates required columns: field, year, tonnage_ha
#' - Coerces year and tonnage_ha to numeric
#' - Logs status messages for debugging
#'
#' **Error Handling**:
#' - Missing file: Returns empty DF (logs NOTE)
#' - Missing columns: Returns empty DF (logs WARNING)
#' - Read errors: Returns empty DF (logs WARNING)
#' - Always returns valid data frame structure (won't return NULL)
#'
#' **Usage**:
#' ```r
#' harvesting_data <- load_harvest_data(setup$data_dir)
#' # harvesting_data is guaranteed to be a data.frame with 3 columns
#' # even if harvest.xlsx is unavailable or invalid
#' ```
load_harvest_data <- function(data_dir) {
harvesting_data <- NULL
harvest_file <- file.path(data_dir, "harvest.xlsx")
if (file.exists(harvest_file)) {
tryCatch({
harvesting_data <- readxl::read_excel(harvest_file)
# Ensure required columns are present
required_cols <- c("field", "year", "tonnage_ha")
if (all(required_cols %in% names(harvesting_data))) {
# Convert to data frame and ensure column types
harvesting_data <- as.data.frame(harvesting_data)
# CRITICAL: Coerce field to character to preserve leading zeros (e.g., "01", "02")
harvesting_data$field <- as.character(harvesting_data$field)
harvesting_data$year <- as.numeric(harvesting_data$year)
harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha)
message(paste(" ✓ Loaded harvest data:", nrow(harvesting_data), "records from harvest.xlsx"))
return(harvesting_data)
} else {
message(paste(" WARNING: harvest.xlsx missing required columns. Expected: field, year, tonnage_ha"))
harvesting_data <- NULL
}
}, error = function(e) {
message(paste(" WARNING: Could not read harvest.xlsx:", e$message))
})
} else {
message(paste(" NOTE: harvest.xlsx not found at", harvest_file))
}
# Fallback: create empty data frame if loading failed
if (is.null(harvesting_data)) {
message(" WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)")
harvesting_data <- data.frame(
field = character(), # Explicitly character to preserve leading zeros when data is added
year = numeric(),
tonnage_ha = numeric(),
stringsAsFactors = FALSE
)
}
return(harvesting_data)
}
# ============================================================================
# FIELD STATISTICS EXTRACTION
# ============================================================================
@ -678,13 +865,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
NULL
}
output_subdir <- file.path(reports_dir, "field_analysis")
if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE)
output_dir <- file.path(reports_dir)
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
excel_path <- file.path(output_dir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
# Build sheets list dynamically
@ -709,14 +896,14 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
)
)
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(reports_dir, rds_filename)
rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(output_dir, rds_filename)
saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
csv_path <- file.path(output_subdir, csv_filename)
csv_path <- file.path(output_dir, csv_filename)
write_csv(field_df_rounded, csv_path)
message(paste("✓ Field analysis CSV exported to:", csv_path))
@ -1304,7 +1491,7 @@ prepare_predictions <- function(predictions, newdata) {
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
Age_days = newdata$DAH,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
@ -1313,3 +1500,258 @@ prepare_predictions <- function(predictions, newdata) {
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
# ============================================================================
# YIELD PREDICTION KPI (SHARED ML-BASED MODEL FOR ALL CLIENT TYPES)
# ============================================================================
#' Helper function for graceful fallback when training data unavailable
#'
#' @param field_boundaries Field boundaries (sf or SpatVector)
#' @return List with summary and field_results (both with NA values)
create_fallback_result <- function(field_boundaries) {
# Convert to SpatVector if needed (for terra::project)
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries <- terra::vect(field_boundaries)
}
field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection
field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares
total_area <- sum(field_areas)
summary_result <- data.frame(
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
count = c(0, 0, 0, nrow(field_boundaries)),
value = c(NA_real_, NA_real_, NA_real_, round(total_area, 1)),
stringsAsFactors = FALSE
)
field_results <- data.frame(
field = character(0),
sub_field = character(0),
Age_days = numeric(0),
yield_forecast_t_ha = numeric(0),
season = numeric(0),
stringsAsFactors = FALSE
)
return(list(summary = summary_result, field_results = field_results))
}
#' Calculate yield prediction KPI using Random Forest with Feature Selection
#'
#' Trains a Random Forest model on historical harvest data with cumulative CI,
#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
#'
#' @param field_boundaries Field boundaries (sf or SpatVector)
#' @param harvesting_data Data frame with harvest data including tonnage_ha column
#' @param cumulative_CI_vals_dir Directory containing "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
#'
#' @return List with:
#' - summary: Data frame with field_groups, count, value (quartiles and total area)
#' - field_results: Data frame with field-level yield forecasts (yield_forecast_t_ha)
#'
#' @details
#' **Training Data Requirements:**
#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
#' - harvesting_data must have tonnage_ha column with numeric yield values
#' - Training stops gracefully if either is missing (returns NA values, not fake numbers)
#'
#' **Model Specifications:**
#' - Algorithm: Random Forest (caret + CAST)
#' - Feature Selection: Forward Feature Selection (CAST::ffs)
#' - Cross-validation: 5-fold CV
#' - Predictors: cumulative_CI, DAH, CI_per_day
#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
#' - Output: Field-level yield forecasts grouped by quartile
#'
#' **Error Handling:**
#' - Missing tonnage_ha: Returns graceful fallback with NA (not zero) values
#' - No training data: Logs WARNING, returns empty field_results
#' - RDS file missing: Returns graceful fallback
#' - Prediction errors: Wrapped in tryCatch, returns fallback on failure
calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) {
safe_log("Calculating yield prediction KPI using Random Forest with Feature Selection")
tryCatch({
# Check if tonnage_ha column is present and has valid data
if (is.null(harvesting_data) ||
!("tonnage_ha" %in% names(harvesting_data)) ||
all(is.na(harvesting_data$tonnage_ha))) {
safe_log("No harvest data available: lacking tonnage_ha column or all values are NA", "WARNING")
return(create_fallback_result(field_boundaries))
}
# Check if CI quadrant RDS file exists
ci_quadrant_path <- file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
if (!file.exists(ci_quadrant_path)) {
safe_log(paste("CI quadrant file not found at:", ci_quadrant_path), "WARNING")
return(create_fallback_result(field_boundaries))
}
# Load CI quadrant data and fill missing field/sub_field values
CI_quadrant <- readRDS(ci_quadrant_path) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup()
# Rename year column to season for consistency
harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year)
# Join CI and yield data
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed,
by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DAH)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_))
# Define predictors and response variables
predictors <- c("cumulative_CI", "DAH", "CI_per_day")
response <- "tonnage_ha"
# Prepare training dataset (fields with harvest data)
CI_and_yield_test <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
# Check if we have minimum training data
if (nrow(CI_and_yield_test) == 0) {
safe_log("No training data available: no fields with tonnage_ha observations", "WARNING")
return(create_fallback_result(field_boundaries))
}
# Pre-clean training data: remove rows with any NAs in predictors or response
# This is required because CAST::ffs doesn't support na.rm parameter
CI_and_yield_test <- CI_and_yield_test %>%
dplyr::filter(!dplyr::if_any(dplyr::all_of(c(predictors, response)), is.na))
if (nrow(CI_and_yield_test) == 0) {
safe_log("No complete training data after removing NAs in predictors/response", "WARNING")
return(create_fallback_result(field_boundaries))
}
# Prepare prediction dataset (fields without harvest data, mature fields only)
prediction_yields <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = FALSE
)
# Train the model with forward feature selection
set.seed(202) # For reproducibility
safe_log("Training Random Forest model with Forward Feature Selection...")
model_ffs_rf <- CAST::ffs(
CI_and_yield_test[, predictors],
CI_and_yield_test[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5
)
# Predict yields on validation data (same as training data for RMSE calculation)
pred_ffs_rf <- prepare_predictions(
stats::predict(model_ffs_rf, newdata = CI_and_yield_test),
CI_and_yield_test
)
# Extract cross-validated RMSE from the model object (more honest than in-sample RMSE)
# The CAST::ffs model stores CV results in $results data frame
# We extract the RMSE from the best model (lowest RMSE across folds)
if (!is.null(model_ffs_rf$results) && "RMSE" %in% names(model_ffs_rf$results)) {
# Get minimum RMSE from cross-validation results (best model from feature selection)
rmse_value <- min(model_ffs_rf$results$RMSE, na.rm = TRUE)
safe_log(paste("Yield prediction RMSE (cross-validated):", round(rmse_value, 2), "t/ha"))
} else {
# Fallback: compute in-sample RMSE if CV results unavailable, but label it clearly
rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_test$tonnage_ha)^2, na.rm = TRUE))
safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
}
# Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
if (nrow(prediction_yields) > 0) {
pred_rf_current_season <- prepare_predictions(
stats::predict(model_ffs_rf, newdata = prediction_yields),
prediction_yields
) %>%
dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
} else {
pred_rf_current_season <- data.frame()
}
# Calculate summary statistics for KPI
if (nrow(pred_rf_current_season) > 0) {
safe_log(paste("Number of fields with yield predictions:", nrow(pred_rf_current_season)))
safe_log(paste("Predicted yield range:",
round(min(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
"-",
round(max(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
"t/ha"))
# Calculate quartiles for grouping
yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha,
probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
# Count fields in each quartile
top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE)
average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] &
pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE)
lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE)
# Calculate total area
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
# Handle both sf and SpatVector inputs for area calculation
if (inherits(field_boundaries, "sf")) {
field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933")
field_areas <- sf::st_area(field_boundaries_projected) / 10000 # m² to hectares
} else {
field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933")
field_areas <- terra::expanse(field_boundaries_projected) / 10000
}
total_area <- sum(as.numeric(field_areas))
safe_log(paste("Total area:", round(total_area, 1), "hectares"))
# Build summary result
result <- data.frame(
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)),
value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1),
round(yield_quartiles[1], 1), round(total_area, 1)),
stringsAsFactors = FALSE
)
# Prepare field-level results
field_level_results <- pred_rf_current_season %>%
dplyr::select(field, Age_days, predicted_Tcha, season) %>%
dplyr::rename(yield_forecast_t_ha = predicted_Tcha)
safe_log("✓ Yield prediction complete")
return(list(summary = result, field_results = field_level_results))
} else {
safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
return(list(summary = create_fallback_result(field_boundaries)$summary,
field_results = data.frame()))
}
}, error = function(e) {
safe_log(paste("Error in yield prediction:", e$message), "ERROR")
return(create_fallback_result(field_boundaries))
})
}

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,7 @@
params:
ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx
report_date: "2025-09-30"
report_date: "2026-02-04"
data_dir: "angata"
mail_day: "Wednesday"
borders: FALSE
@ -61,6 +61,10 @@ suppressPackageStartupMessages({
library(flextable) # For formatted tables in Word output (professional table styling)
})
# Configure tmap for static plotting (required for legend.outside to work)
tmap_mode("plot") # CRITICAL: Must be "plot" mode for legends outside to render properly
tmap_options(component.autoscale = FALSE)
# Load custom utility functions
tryCatch({
source("r_app/report_utils.R")
@ -140,12 +144,12 @@ week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
# Candidate filenames we expect (exact and common variants)
expected_summary_names <- c(
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
paste0(project_dir, "_kpi_summary_tables.rds"),
"kpi_summary_tables.rds",
paste0("kpi_summary_tables_", week_suffix, ".rds"),
paste0("kpi_summary_tables_", date_suffix, ".rds")
paste0(project_dir, "_field_analysis_", week_suffix, ".rds"),
paste0(project_dir, "_field_analysis_", date_suffix, ".rds"),
paste0(project_dir, "_field_analysis.rds"),
"field_analysis.rds",
paste0("field_analysis_", week_suffix, ".rds"),
paste0("field_analysis_", date_suffix, ".rds")
)
expected_field_details_names <- c(
@ -165,13 +169,26 @@ try_load_from_dir <- function(dir, candidates) {
return(NULL)
}
# Try primary directory first
# Try primary directory first (field_level/)
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
# If not found, try parent directory (kpis/) where RDS is often saved by Script 80
if (is.null(summary_file) || is.null(field_details_file)) {
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
parent_dir <- dirname(kpi_data_dir) # One level up: reports/kpis/
safe_log(paste("KPI files not found in", kpi_data_dir, "—trying parent directory:", parent_dir))
if (is.null(summary_file)) {
summary_file <- try_load_from_dir(parent_dir, expected_summary_names)
}
if (is.null(field_details_file)) {
field_details_file <- try_load_from_dir(parent_dir, expected_field_details_names)
}
}
# If still not found, perform a workspace-wide search (slower) limited to laravel_app storage
if (is.null(summary_file) || is.null(field_details_file)) {
safe_log(paste("KPI files not found in", kpi_data_dir, "or parent directory—searching workspace for RDS files"))
# List rds files under laravel_app/storage/app recursively
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
# Try to match by expected names
@ -483,7 +500,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready
```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
```{r overview_map, fig.width=7, fig.height=6, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
# Create a hexbin overview map with ggplot
tryCatch({
# Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
@ -510,6 +527,8 @@ tryCatch({
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
# Process polygons into points
# IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid
# st_centroid warnings about longitude/latitude data, then transform back to WGS84
points_processed <- field_boundaries_sf %>%
st_make_valid() %>%
mutate(
@ -525,8 +544,9 @@ tryCatch({
analysis_data %>% select(Field_id, Status_trigger),
by = c("field" = "Field_id")
) %>%
st_transform(crs = TARGET_CRS) %>%
st_transform(crs = 32736) %>% # UTM zone 36S (southern Africa)
st_centroid() %>%
st_transform(crs = TARGET_CRS) %>%
bind_cols(st_coordinates(.))
# Validate coordinates - check for NaN, Inf, or missing values
@ -553,30 +573,8 @@ tryCatch({
labels_vec[length(labels_vec)] <- ">30"
labels_vec[1] <- "0.1"
# Create dummy point to anchor hexbin grids for consistency
dummy_point <- data.frame(
field = NA,
sub_field = NA,
area_ac = 0,
Status_trigger = NA,
X = min(points_processed$X, na.rm = TRUE),
Y = min(points_processed$Y, na.rm = TRUE),
geometry = NA
)
# Convert dummy point to sf and add xy coordinates
dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
# Mark dummy point with anchor flag before binding
# Referenced: dummy_point, st_as_sf, st_coordinates, area_ac
dummy_point$anchor_dummy <- TRUE
# Add dummy point to ensure consistent hexbin grid anchoring
points_ready <- rbind(points_ready, dummy_point)
points_not_ready <- rbind(points_not_ready, dummy_point)
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
# Use actual data bounds without dummy points to avoid column mismatch
x_limits <- c(
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
@ -653,22 +651,17 @@ tryCatch({
)
)
# Remove dummy point rows after grid anchoring to prevent dummy cells in plot
# Referenced: points_ready, points_not_ready, anchor_dummy flag filtering
points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE)
points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE)
}, error = function(e) {
warning("Error creating hexbin map:", e$message)
})
```
\newpage
## 1.2 Key Performance Indicators
```{r combined_kpi_table, echo=FALSE, results='asis'}
# Create summary KPI table from field_analysis_summary data
# This shows: Phases, Triggers, Area Change, and Total Farm acreage
# Create consolidated KPI table from field_analysis data
# Shows: Phases, Triggers, Area Change, Cloud Influence, and Total Farm
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
# Load field analysis data
@ -679,24 +672,46 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
!is.data.frame(summary_data$field_analysis_summary)) {
# Create summary by aggregating by Status_Alert and Phase categories
# This groups fields by their phase and status to show distribution
phase_summary <- field_analysis_df %>%
filter(!is.na(Phase)) %>%
group_by(Phase) %>%
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
summarise(
Acreage = sum(Acreage, na.rm = TRUE),
Field_count = n_distinct(Field_id),
.groups = "drop"
) %>%
mutate(Category = Phase) %>%
select(Category, Acreage)
select(Category, Acreage, Field_count)
# Try to create Status trigger summary - use Status_Alert if available, otherwise use empty
# Create Status trigger summary - includes both active alerts and "No active triggers"
trigger_summary <- tryCatch({
field_analysis_df %>%
# Active alerts (fields with non-NA Status_Alert)
active_alerts <- field_analysis_df %>%
filter(!is.na(Status_Alert), Status_Alert != "") %>%
group_by(Status_Alert) %>%
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
summarise(
Acreage = sum(Acreage, na.rm = TRUE),
Field_count = n_distinct(Field_id),
.groups = "drop"
) %>%
mutate(Category = Status_Alert) %>%
select(Category, Acreage)
select(Category, Acreage, Field_count)
# No active triggers (fields with NA Status_Alert)
no_alerts <- field_analysis_df %>%
filter(is.na(Status_Alert) | Status_Alert == "") %>%
summarise(
Acreage = sum(Acreage, na.rm = TRUE),
Field_count = n_distinct(Field_id),
.groups = "drop"
) %>%
mutate(Category = "No active triggers") %>%
select(Category, Acreage, Field_count)
# Combine active alerts and no-alert fields
bind_rows(active_alerts, no_alerts)
}, error = function(e) {
data.frame(Category = character(), Acreage = numeric())
data.frame(Category = character(), Acreage = numeric(), Field_count = numeric())
})
# Combine into summary
@ -709,25 +724,38 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
# Phase names and trigger names to extract from summary
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected",
"Germination Complete", "Germination Started", "No Active Trigger",
"Ready for harvest-check", "Strong decline in crop health", "Harvested/bare")
# Trigger names now include both active alerts AND "No active triggers" (calculated dynamically above)
trigger_names <- c("harvest_ready", "harvested_bare", "stress_detected",
"germination_delayed", "growth_on_track", "No active triggers")
# Extract phase distribution - match on category names directly
if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
# Phase rows with field count
phase_rows <- field_analysis_summary %>%
filter(Category %in% phase_names) %>%
select(Category, Acreage) %>%
select(Category, Acreage, Field_count) %>%
mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1)
# Extract status triggers - match on category names directly
# Trigger rows with field count
trigger_rows <- field_analysis_summary %>%
filter(Category %in% trigger_names) %>%
select(Category, Acreage) %>%
mutate(KPI_Group = "STATUS TRIGGERS", .before = 1)
select(Category, Acreage, Field_count) %>%
mutate(KPI_Group = "OPERATIONAL ALERTS", .before = 1)
# If no triggers found, add a placeholder row
if (nrow(trigger_rows) == 0) {
trigger_rows <- data.frame(
KPI_Group = "OPERATIONAL ALERTS",
Category = "No active triggers",
Acreage = 0,
Field_count = 0,
stringsAsFactors = FALSE
)
}
# Calculate area change from field_analysis data
total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE)
total_fields <- n_distinct(field_analysis_df$Field_id)
# Parse Weekly_ci_change to determine improvement/decline
parse_ci_change <- function(change_str) {
@ -741,10 +769,20 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change)
improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE)
declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE)
stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 &
field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE)
# Area change rows with field count
improving_df <- field_analysis_df %>%
filter(ci_change_numeric > 0.2)
stable_df <- field_analysis_df %>%
filter(ci_change_numeric >= -0.2 & ci_change_numeric <= 0.2)
declining_df <- field_analysis_df %>%
filter(ci_change_numeric < -0.2)
improving_acreage <- sum(improving_df$Acreage, na.rm = TRUE)
improving_field_count <- n_distinct(improving_df$Field_id)
stable_acreage <- sum(stable_df$Acreage, na.rm = TRUE)
stable_field_count <- n_distinct(stable_df$Field_id)
declining_acreage <- sum(declining_df$Acreage, na.rm = TRUE)
declining_field_count <- n_distinct(declining_df$Field_id)
improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0)
declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0)
@ -761,24 +799,54 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
KPI_Group = "AREA CHANGE",
Category = c("Improving", "Stable", "Declining"),
Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)),
Field_count = c(improving_field_count, stable_field_count, declining_field_count),
Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")),
stringsAsFactors = FALSE
)
# Cloud influence rows with field count - aggregate by Cloud_category
cloud_rows <- tryCatch({
field_analysis_df %>%
filter(!is.na(Cloud_category)) %>%
group_by(Cloud_category) %>%
summarise(
Acreage = sum(Acreage, na.rm = TRUE),
Field_count = n_distinct(Field_id),
.groups = "drop"
) %>%
mutate(
KPI_Group = "CLOUD INFLUENCE",
Category = Cloud_category,
Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"),
Acreage = round(Acreage, 2)
) %>%
select(KPI_Group, Category, Acreage, Field_count, Percent)
}, error = function(e) {
data.frame(
KPI_Group = character(),
Category = character(),
Acreage = numeric(),
Field_count = numeric(),
Percent = character()
)
})
# Total farm row
total_row <- data.frame(
KPI_Group = "TOTAL FARM",
Category = "Total Acreage",
Acreage = round(total_acreage, 2),
Field_count = total_fields,
Percent = "100%",
stringsAsFactors = FALSE
)
# Combine all rows with percentages for all
# Combine all rows
combined_df <- bind_rows(
phase_pcts,
trigger_pcts,
area_change_rows,
cloud_rows,
total_row
)
@ -789,7 +857,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
KPI_display = if_else(row_number() == 1, KPI_Group, "")
) %>%
ungroup() %>%
select(KPI_display, Category, Acreage, Percent)
select(KPI_display, Category, Acreage, Percent, Field_count)
# Render as flextable with merged cells
ft <- flextable(combined_df) %>%
@ -797,7 +865,8 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
KPI_display = "KPI Category",
Category = "Item",
Acreage = "Acreage",
Percent = "Percent"
Percent = "Percentage of total fields",
Field_count = "# Fields"
) %>%
merge_v(j = "KPI_display") %>%
autofit()
@ -807,8 +876,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
phase_count <- nrow(phase_rows)
trigger_count <- nrow(trigger_rows)
area_count <- nrow(area_change_rows)
cloud_count <- nrow(cloud_rows)
# Add lines after phases, triggers, and area change groups (before totals)
# Add lines after phases, triggers, area change, and cloud groups (before totals)
if (phase_count > 0) {
ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1))
}
@ -818,6 +888,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
if (area_count > 0) {
ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1))
}
if (cloud_count > 0) {
ft <- ft %>% hline(i = phase_count + trigger_count + area_count + cloud_count, border = officer::fp_border(width = 1))
}
ft
} else {
@ -828,40 +901,6 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
}
```
## Cloud Coverage Summary
```{r cloud_coverage_summary, echo=FALSE}
# Display cloud coverage summary aggregated by category
# Cloud coverage data is included in the field_analysis RDS from Script 80
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
field_analysis_df <- summary_data$field_analysis
# Aggregate cloud coverage by category
cloud_summary <- field_analysis_df %>%
filter(!is.na(Cloud_category)) %>%
group_by(Cloud_category) %>%
summarise(
"Number of Fields" = n(),
"Total Acreage" = round(sum(Acreage, na.rm = TRUE), 1),
.groups = "drop"
) %>%
rename("Cloud Category" = Cloud_category) %>%
arrange(`Cloud Category`)
if (nrow(cloud_summary) > 0) {
# Create flextable
ft <- flextable(cloud_summary) %>%
autofit() %>%
theme_vanilla()
ft
} else {
cat("Cloud coverage data not available for summary.\n")
}
} else {
cat("Field analysis data not available for cloud coverage summary.\n")
}
```
```{r data, message=TRUE, warning=TRUE, include=FALSE}
# All data comes from the field analysis performed in 09_field_analysis_weekly.R
# The report renders KPI tables and field summaries from that data
@ -951,9 +990,9 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
```{r ci_fig, echo=FALSE, fig.align='right', out.width='40%', fig.cap="Chlorophyll Index Example"}
knitr::include_graphics("CI_graph_example.png")
```
<div align="center">
![Chlorophyll Index Example](CI_graph_example.png)
</div>
### Data File Structure and Columns
@ -1025,15 +1064,42 @@ Both algorithms are not always in sync, and can have contradictory results. Wide
## Report Metadata
```{r report_metadata, echo=FALSE}
# Calculate total area from field analysis data
total_area_acres <- 0
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
total_area_acres <- sum(summary_data$field_analysis$Acreage, na.rm = TRUE)
}
# Calculate total fields
total_fields_count <- 0
if (exists("AllPivots0")) {
total_fields_count <- nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise())
} else if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
total_fields_count <- n_distinct(summary_data$field_analysis$Field_id)
}
metadata_info <- data.frame(
Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"),
Metric = c(
"Report Generated",
"Data Source",
"Analysis Period",
"Total Fields [number]",
"Total Area [acres]",
"Next Update",
"Service provided",
"Starting date service"
),
Value = c(
format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
paste("Project", toupper(project_dir)),
paste("Week", current_week, "of", year),
ifelse(exists("AllPivots0"), nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise()), "Unknown"),
"Next Wednesday"
)
ifelse(total_fields_count > 0, total_fields_count, "Unknown"),
ifelse(total_area_acres > 0, round(total_area_acres, 0), "Unknown"),
"Next Wednesday",
"Cane Supply Office - Weekly",
"23 dec 2025"
),
stringsAsFactors = FALSE
)
ft <- flextable(metadata_info) %>%
@ -1043,4 +1109,4 @@ ft <- flextable(metadata_info) %>%
ft
```
*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team.*
*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team at info@smartcane.ag.*

View file

@ -239,7 +239,7 @@
#
# OUTPUT:
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
# - Columns: field, sub_field, Date, FitData, DOY, value
# - Columns: field, sub_field, Date, FitData, DAH, value
#
# PARAMETERS:
# PROJECT: angata, chemba, xinavane, esa, simba
@ -438,8 +438,8 @@
# rmarkdown::render(
rmarkdown::render(
"r_app/90_CI_report_with_kpis_agronomic_support.Rmd",
params = list(data_dir = "aura", report_date = as.Date("2022-12-08")),
output_file = "SmartCane_Report_agronomic_support_aura_2022-12-08.docx",
params = list(data_dir = "aura", report_date = as.Date("2026-02-04")),
output_file = "SmartCane_Report_agronomic_support_aura_2026-02-04.docx",
output_dir = "laravel_app/storage/app/aura/reports"
)
#
@ -450,7 +450,7 @@ rmarkdown::render(
rmarkdown::render(
"r_app/91_CI_report_with_kpis_cane_supply.Rmd",
params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
output_file = "SmartCane_Report_basemap_test.docx",
output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
output_dir = "laravel_app/storage/app/angata/reports"
)
#

View file

@ -33,8 +33,14 @@ suppressPackageStartupMessages({
})
# ==============================================================================
# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION
# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
# ==============================================================================
# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
# ~240 days ≈ 8 months, typical sugarcane maturity window
DAH_MATURITY_THRESHOLD <- 240
# Maps project names to client types for pipeline control
# This determines which scripts run and what outputs they produce
@ -45,7 +51,8 @@ CLIENT_TYPE_MAP <- list(
"esa" = "agronomic_support",
"simba" = "agronomic_support",
"john" = "agronomic_support",
"huss" = "agronomic_support"
"huss" = "agronomic_support",
"aura" = "agronomic_support"
)
#' Get client type for a project
@ -162,13 +169,11 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
# TIER 5: MOSAICS (Script 40 output)
weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
field_tiles_ci_dir <- here(weekly_mosaic_dir, "field_tiles_CI")
weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max")
# TIER 6: KPI & REPORTING (Scripts 80/90/91 output)
reports_dir <- here(laravel_storage_dir, "reports")
kpi_reports_dir <- here(reports_dir, "kpis", "field_level")
kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats")
kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis")
kpi_reports_dir <- here(reports_dir, "kpis")
# TIER 7: SUPPORT (various scripts)
vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction
@ -180,8 +185,8 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
merged_tif_folder, field_tiles_dir, field_tiles_ci_dir,
extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
growth_model_interpolated_dir,
weekly_mosaic_dir, field_tiles_ci_dir,
reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
weekly_mosaic_dir, weekly_tile_max_dir,
reports_dir, kpi_reports_dir,
data_dir, vrt_dir, harvest_dir, log_dir
)
@ -215,13 +220,11 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
# TIER 5: Mosaics
weekly_mosaic_dir = weekly_mosaic_dir,
field_tiles_ci_dir = field_tiles_ci_dir,
weekly_tile_max_dir = weekly_tile_max_dir,
# TIER 6: KPI & reporting
reports_dir = reports_dir,
kpi_reports_dir = kpi_reports_dir,
kpi_field_stats_dir = kpi_field_stats_dir,
kpi_field_analysis_dir = kpi_field_analysis_dir,
# TIER 7: Support
data_dir = data_dir,
@ -511,7 +514,63 @@ setup_logging <- function(log_dir) {
# ==============================================================================
# Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths
#' Detect mosaic mode from project structure
#'
#' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics
#'
#' @param project_dir Character. Project name
#' @return Character. "tiled" or "single-file"
detect_mosaic_mode <- function(project_dir) {
# Per-field architecture is standard - always return "single-file"
# unless weekly_tile_max directory exists with content
mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) {
return("tiled")
}
return("single-file")
}
#' Detect grid size from tile directory structure
#'
#' For per-field architecture, returns "unknown" (grid-based tiling is legacy)
#'
#' @param project_dir Character. Project name
#' @return Character. Grid size ("unknown" for per-field)
detect_grid_size <- function(project_dir) {
# Per-field architecture doesn't use grid-based organization
return("unknown")
}
#' Get project storage path
#'
#' @param project_dir Character. Project name
#' @param subdir Character. Optional subdirectory (default NULL)
#' @return Character. Full path
get_project_storage_path <- function(project_dir, subdir = NULL) {
path <- file.path("laravel_app", "storage", "app", project_dir)
if (!is.null(subdir)) {
path <- file.path(path, subdir)
}
return(path)
}
#' Get mosaic directory
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
if (mosaic_mode == "auto") {
mosaic_mode <- detect_mosaic_mode(project_dir)
}
if (mosaic_mode == "tiled") {
get_project_storage_path(project_dir, "weekly_tile_max")
} else {
get_project_storage_path(project_dir, "weekly_mosaic")
}
}
#' Get KPI directory based on client type
#'
@ -552,6 +611,20 @@ check_harvest_output_exists <- function(project_dir, week_num, year_num) {
file.exists(path)
}
#' Get mosaic verification directory
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
base <- file.path("laravel_app", "storage", "app", project_dir)
if (mosaic_mode == "tiled") {
file.path(base, "weekly_tile_max")
} else {
file.path(base, "weekly_mosaic")
}
}
#' Check if mosaic files exist for a specific week
#'

View file

@ -34,12 +34,13 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
#' @param pivot_spans Additional boundary data for the field
#' @param show_legend Whether to show the legend (default: FALSE)
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
#' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom")
#' @param week Week number to display in the title
#' @param age Age of the crop in weeks
#' @param borders Whether to display field borders (default: FALSE)
#' @return A tmap object with the CI map
#'
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, colorblind = FALSE){
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week, age, borders = FALSE, colorblind = FALSE){
# Input validation
if (missing(pivot_raster) || is.null(pivot_raster)) {
stop("pivot_raster is required")
@ -64,25 +65,29 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
map <- tm_shape(pivot_raster, unit = "m")
# Add raster with continuous spectrum (fixed scale 8-1 for consistent comparison, reversed)
map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
limits = c(1,8)),
col.legend = tm_legend(title = "CI",
orientation = if(legend_is_portrait) "portrait" else "landscape",
map <- map + tm_raster(
"CI",
col.scale = tm_scale_continuous(
values = palette,
limits = c(1, 8),
ticks = seq(1, 8, by = 1),
outliers.trunc = c(TRUE, TRUE)
),
col.legend = tm_legend(
title = "CI",
orientation = if (legend_is_portrait) "portrait" else "landscape",
show = show_legend,
position = if(show_legend) tm_pos_out("left", "center") else c("left", "bottom"),
position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"),
reverse = TRUE
))
)
)
# Add layout elements
map <- map + tm_title(text = paste0("Max CI week ", week,"\n", age, " weeks (", age * 7, " days) old"),
size = 0.7)
# Add layout configuration to prevent legend rescaling
map <- map + tm_layout(legend.position = c("left", "bottom"),
legend.outside = FALSE,
inner.margins = 0.05,
asp = 1) # Force 1:1 aspect ratio for consistent sizing
# Add bounds/view settings for fixed aspect ratio
map <- map + tm_view(asp = 1)
map <- map + tm_layout(
main.title = paste0("Max CI week ", week,"\n", age, " weeks (", age * 7, " days) old"),
main.title.size = 0.7,
#legend.height = 0.85, # Constrain vertical legend height to not exceed map
asp = 1 # Fixed aspect ratio
)
# Add borders if requested
if (borders) {
@ -104,13 +109,14 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
#' @param pivot_spans Additional boundary data for the field
#' @param show_legend Whether to show the legend (default: FALSE)
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
#' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom")
#' @param week_1 First week number for comparison
#' @param week_2 Second week number for comparison
#' @param age Age of the crop in weeks
#' @param borders Whether to display field borders (default: TRUE)
#' @return A tmap object with the CI difference map
#'
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, colorblind = FALSE){
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week_1, week_2, age, borders = TRUE, colorblind = FALSE){
# Input validation
if (missing(pivot_raster) || is.null(pivot_raster)) {
stop("pivot_raster is required")
@ -135,26 +141,30 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
map <- tm_shape(pivot_raster, unit = "m")
# Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale, reversed)
map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
map <- map + tm_raster(
"CI",
col.scale = tm_scale_continuous(
values = palette,
limits = c(-3, 3),
ticks = seq(-3, 3, by = 1),
midpoint = 0,
limits = c(-3, 3)),
col.legend = tm_legend(title = "CI diff.",
orientation = if(legend_is_portrait) "portrait" else "landscape",
outliers.trunc = c(TRUE, TRUE)
),
col.legend = tm_legend(
title = "CI diff.",
orientation = if (legend_is_portrait) "portrait" else "landscape",
show = show_legend,
position = if(show_legend) tm_pos_out("right", "center") else c("left", "bottom"),
position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"),
reverse = TRUE
))
)
)
# Add layout elements
map <- map + tm_title(text = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"),
size = 0.7)
# Add layout configuration to prevent legend rescaling
map <- map + tm_layout(legend.position = c("right", "bottom"),
legend.outside = FALSE,
inner.margins = 0.05,
asp = 1) # Force 1:1 aspect ratio for consistent sizing
# Add bounds/view settings for fixed aspect ratio
map <- map + tm_view(asp = 1)
map <- map + tm_layout(
main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"),
main.title.size = 0.7,
#legend.height = 0.85, # Constrain vertical legend height to not exceed map
asp = 1 # Fixed aspect ratio
)
# Add borders if requested
if (borders) {
@ -269,18 +279,16 @@ ci_plot <- function(pivotName,
# Create historical maps only if data is available
# Build list with all available maps - order matches original: [m2, m1, current, diff_1w, diff_3w]
# Widths match original hardcoded: c(0.23, 0.18, 0.18, 0.18, 0.23)
maps_to_arrange <- list()
widths_to_use <- c()
field_heading_note <- ""
# Try to create 2-week ago map (legend on left)
if (!is.null(singlePivot_m2)) {
CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2,
show_legend = TRUE, legend_is_portrait = TRUE,
legend_position = "left",
week = week_minus_2, age = age - 2, borders = borders, colorblind = colorblind_friendly)
maps_to_arrange <- c(maps_to_arrange, list(CImap_m2))
widths_to_use <- c(widths_to_use, 0.24)
}
# Try to create 1-week ago map
@ -289,12 +297,10 @@ ci_plot <- function(pivotName,
show_legend = FALSE, legend_is_portrait = FALSE,
week = week_minus_1, age = age - 1, borders = borders, colorblind = colorblind_friendly)
maps_to_arrange <- c(maps_to_arrange, list(CImap_m1))
widths_to_use <- c(widths_to_use, 0.17)
}
# Always add current week map (center position)
maps_to_arrange <- c(maps_to_arrange, list(CImap))
widths_to_use <- c(widths_to_use, 0.17)
# Try to create 1-week difference map
if (!is.null(abs_CI_last_week)) {
@ -302,21 +308,17 @@ ci_plot <- function(pivotName,
show_legend = FALSE, legend_is_portrait = FALSE,
week_1 = week, week_2 = week_minus_1, age = age, borders = borders, colorblind = colorblind_friendly)
maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_last_week))
widths_to_use <- c(widths_to_use, 0.17)
}
# Try to create 3-week difference map (legend on right)
if (!is.null(abs_CI_three_week)) {
CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2,
show_legend = TRUE, legend_is_portrait = TRUE,
legend_position = "right",
week_1 = week, week_2 = week_minus_3, age = age, borders = borders, colorblind = colorblind_friendly)
maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_three_week))
widths_to_use <- c(widths_to_use, 0.24)
}
# Normalize widths to sum to 1
widths_to_use <- widths_to_use / sum(widths_to_use)
# Add note if historical data is limited
if (length(maps_to_arrange) == 1) {
field_heading_note <- " (Current week only - historical data not yet available)"
@ -324,8 +326,21 @@ ci_plot <- function(pivotName,
field_heading_note <- " (Limited historical data)"
}
# Arrange the maps with normalized widths
tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths_to_use)))
# Arrange the maps in a row with more width for first and last (for legends)
# Give maps with legends (1st and 5th) more space: 23%, middle maps get 18% each
widths <- if (length(maps_to_arrange) == 5) {
c(0.23, 0.18, 0.18, 0.18, 0.23)
} else if (length(maps_to_arrange) == 4) {
c(0.25, 0.25, 0.25, 0.25) # Equal if only 4 maps
} else if (length(maps_to_arrange) == 3) {
c(0.33, 0.33, 0.34) # Equal if only 3 maps
} else if (length(maps_to_arrange) == 2) {
c(0.5, 0.5) # Equal if only 2 maps
} else {
NULL # Single map or other cases
}
tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths)))
# Output heading and map to R Markdown
age_months <- round(age / 4.348, 1)
@ -342,10 +357,10 @@ ci_plot <- function(pivotName,
#' Creates a plot showing Chlorophyll Index data over time for a pivot field
#'
#' @param pivotName The name or ID of the pivot field to visualize
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days")
#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days")
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
@ -378,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
# Process data
data_ci2 <- data_ci %>%
dplyr::mutate(CI_rate = cumulative_CI / DOY,
dplyr::mutate(CI_rate = cumulative_CI / DAH,
week = lubridate::week(Date)) %>%
dplyr::group_by(field) %>%
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
@ -433,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
# Determine x-axis variable based on x_unit parameter
x_var <- if (x_unit == "days") {
if (facet_on) "Date" else "DOY"
if (facet_on) "Date" else "DAH"
} else {
"week"
}
@ -443,14 +458,21 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
"weeks" = "Week Number")
# Calculate dynamic max values for breaks
max_doy <- max(plot_data$DOY, na.rm = TRUE) + 20
max_dah <- max(plot_data$DAH, na.rm = TRUE) + 20
max_week <- max(as.numeric(plot_data$week), na.rm = TRUE) + ceiling(20 / 7)
# Create plot with either facets by season or overlay by DOY/week
# Create plot with either facets by season or overlay by DAH/week
if (facet_on) {
g <- ggplot2::ggplot(data = plot_data) +
ggplot2::facet_wrap(~season, scales = "free_x") +
ggplot2::geom_line(ggplot2::aes_string(x = x_var, y = "ci_value", col = "sub_field", group = "sub_field")) +
ggplot2::geom_line(
ggplot2::aes(
x = .data[[x_var]],
y = .data[["ci_value"]],
col = .data[["sub_field"]],
group = .data[["sub_field"]]
)
) +
ggplot2::labs(title = paste("Plot of", y_label),
color = "Field Name",
y = y_label,
@ -463,7 +485,9 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
axis.title.x.top = ggplot2::element_text(size = 8),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.justification = c(1, 0),
legend.position = "inside",
legend.position.inside = c(1, 0),
legend.title = ggplot2::element_text(size = 8),
legend.text = ggplot2::element_text(size = 8)) +
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
@ -482,32 +506,46 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
benchmark_subset <- benchmark_data %>%
dplyr::filter(ci_type == ci_type_filter) %>%
dplyr::mutate(
benchmark_x = if (x_var == "DOY") {
DOY
benchmark_x = if (x_var == "DAH") {
DAH
} else if (x_var == "week") {
DOY / 7 # Approximate conversion
DAH / 7 # Approximate conversion
} else {
DOY # For Date, use DOY as is (may not align perfectly)
DAH # For Date, use DAH as is (may not align perfectly)
}
)
ggplot2::geom_smooth(
data = benchmark_subset,
ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
ggplot2::aes(
x = .data[["benchmark_x"]],
y = .data[["benchmark_value"]],
group = factor(.data[["percentile"]])
),
color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE
)
}
} +
# Plot older seasons with lighter lines
ggplot2::geom_line(
data = plot_data %>% dplyr::filter(!is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 0.7, alpha = 0.4
ggplot2::aes(
x = .data[[x_var]],
y = .data[["ci_value"]],
col = .data[["season"]],
group = .data[["season"]]
),
linewidth = 0.7, alpha = 0.4
) +
# Plot latest season with thicker, more prominent line
ggplot2::geom_line(
data = plot_data %>% dplyr::filter(is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 1.5, alpha = 1
ggplot2::aes(
x = .data[[x_var]],
y = .data[["ci_value"]],
col = .data[["season"]],
group = .data[["season"]]
),
linewidth = 1.5, alpha = 1
) +
ggplot2::labs(title = paste("Plot of", y_label, "for Field", pivotName, title_suffix),
color = "Season",
@ -515,8 +553,8 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
x = x_label) +
color_scale +
{
if (x_var == "DOY") {
ggplot2::scale_x_continuous(breaks = seq(0, max_doy, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
if (x_var == "DAH") {
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "week") {
ggplot2::scale_x_continuous(breaks = seq(0, max_week, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
}
@ -525,7 +563,9 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
axis.title.x.top = ggplot2::element_text(size = 8),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.justification = c(1, 0),
legend.position = "inside",
legend.position.inside = c(1, 0),
legend.title = ggplot2::element_text(size = 8),
legend.text = ggplot2::element_text(size = 8)) +
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
@ -561,7 +601,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
# Determine x-axis variable based on x_unit parameter
x_var <- if (x_unit == "days") {
if (facet_on) "Date" else "DOY"
if (facet_on) "Date" else "DAH"
} else {
"week"
}
@ -578,7 +618,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
}
# Calculate dynamic max values for breaks
max_doy_both <- max(plot_data_both$DOY, na.rm = TRUE) + 20
max_dah_both <- max(plot_data_both$DAH, na.rm = TRUE) + 20
max_week_both <- max(as.numeric(plot_data_both$week), na.rm = TRUE) + ceiling(20 / 7)
# Create the faceted plot
@ -588,12 +628,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
if (!is.null(benchmark_data)) {
benchmark_subset <- benchmark_data %>%
dplyr::mutate(
benchmark_x = if (x_var == "DOY") {
DOY
benchmark_x = if (x_var == "DAH") {
DAH
} else if (x_var == "week") {
DOY / 7
DAH / 7
} else {
DOY
DAH
},
ci_type_label = case_when(
ci_type == "value" ~ "10-Day Rolling Mean CI",
@ -603,8 +643,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
)
ggplot2::geom_smooth(
data = benchmark_subset,
ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
ggplot2::aes(
x = .data[["benchmark_x"]],
y = .data[["benchmark_value"]],
group = factor(.data[["percentile"]])
),
color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE
)
}
} +
@ -612,14 +656,24 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
# Plot older seasons with lighter lines
ggplot2::geom_line(
data = plot_data_both %>% dplyr::filter(!is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 0.7, alpha = 0.4
ggplot2::aes(
x = .data[[x_var]],
y = .data[["ci_value"]],
col = .data[["season"]],
group = .data[["season"]]
),
linewidth = 0.7, alpha = 0.4
) +
# Plot latest season with thicker, more prominent line
ggplot2::geom_line(
data = plot_data_both %>% dplyr::filter(is_latest),
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
size = 1.5, alpha = 1
ggplot2::aes(
x = .data[[x_var]],
y = .data[["ci_value"]],
col = .data[["season"]],
group = .data[["season"]]
),
linewidth = 1.5, alpha = 1
) +
ggplot2::labs(title = paste("CI Analysis for Field", pivotName),
color = "Season",
@ -627,8 +681,8 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
x = x_label) +
color_scale +
{
if (x_var == "DOY") {
ggplot2::scale_x_continuous(breaks = seq(0, max_doy_both, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
if (x_var == "DAH") {
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "week") {
ggplot2::scale_x_continuous(breaks = seq(0, max_week_both, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
} else if (x_var == "Date") {
@ -639,7 +693,9 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
axis.title.x.top = ggplot2::element_text(size = 8),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.justification = c(1, 0),
legend.position = "inside",
legend.position.inside = c(1, 0),
legend.title = ggplot2::element_text(size = 8),
legend.text = ggplot2::element_text(size = 8)) +
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
@ -659,9 +715,11 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]]))
g_both <- g_both +
ggplot2::geom_point(data = dummy_data,
ggplot2::aes_string(x = x_var, y = "ci_value"),
alpha = 0, size = 0) # Invisible points to set scale
ggplot2::geom_point(
data = dummy_data,
ggplot2::aes(x = .data[[x_var]], y = .data[["ci_value"]]),
alpha = 0, size = 0
) # Invisible points to set scale
# Display the combined faceted plot
subchunkify(g_both, 2.8, 10)
@ -698,7 +756,9 @@ cum_ci_plot2 <- function(pivotName){
x = "Date", y = "CI Rate") +
theme_minimal() +
theme(axis.text.x = element_text(hjust = 0.5),
legend.justification = c(1, 0), legend.position = c(1, 0),
legend.justification = c(1, 0),
legend.position = "inside",
legend.position.inside = c(1, 0),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
annotate("text", x = midpoint_date, y = 2, label = "No data available", size = 6, hjust = 0.5)
@ -750,30 +810,9 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed
target_year <- lubridate::isoyear(target_date)
# Primary approach: Try single-file mosaic path first
# Load single-file mosaic for the given week
path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
# Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles
if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) {
# Try to locate tile-based mosaics in weekly_tile_max instead
tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path)
# Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.)
if (dir.exists(tile_mosaic_path)) {
tile_files <- list.files(tile_mosaic_path,
pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"),
full.names = TRUE)
if (length(tile_files) > 0) {
# Found tiles - return the first tile as primary, note that multiple tiles exist
safe_log(paste("Single-file mosaic not found for week", target_week, target_year,
"but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO")
# Return first tile - caller should aggregate if needed
path_to_week <- tile_files[1] # Return first tile; downstream can handle multiple tiles
}
}
}
# Log the path calculation
safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")
@ -788,11 +827,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
#' Computes historical percentile benchmarks for CI data per estate
#'
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns
#' @param estate_name Name of the estate/client to filter data for
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data
#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data
#'
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
# Input validation
@ -821,7 +860,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
# Prepare data for both CI types
data_prepared <- data_filtered %>%
dplyr::ungroup() %>% # Ensure no existing groupings
dplyr::select(DOY, value, cumulative_CI, season) %>%
dplyr::select(DAH, value, cumulative_CI, season) %>%
tidyr::pivot_longer(
cols = c("value", "cumulative_CI"),
names_to = "ci_type",
@ -829,9 +868,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
) %>%
dplyr::filter(!is.na(ci_value)) # Remove NA values
# Compute percentiles for each DOY and ci_type
# Compute percentiles for each DAH and ci_type
benchmarks <- data_prepared %>%
dplyr::group_by(DOY, ci_type) %>%
dplyr::group_by(DAH, ci_type) %>%
dplyr::summarise(
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
@ -839,7 +878,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
n_observations = n(),
.groups = 'drop'
) %>%
dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data
dplyr::filter(n_observations >= min_seasons) %>% # Only include DAHs with sufficient data
tidyr::pivot_longer(
cols = c(p10, p50, p90),
names_to = "percentile",
@ -856,7 +895,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
# Rename columns for clarity
benchmarks <- benchmarks %>%
dplyr::select(DOY, ci_type, percentile, benchmark_value)
dplyr::select(DAH, ci_type, percentile, benchmark_value)
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
@ -1043,7 +1082,7 @@ get_field_priority_level <- function(cv, morans_i) {
#'
#' @param field_name Name of the field to summarize
#' @param field_details_table Data frame with field-level KPI details
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns
#' @param report_date Report date (used for filtering current season data)
#' @return Formatted text string with field KPI summary
#'
@ -1064,10 +1103,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
}
current_season <- current_season_data %>% pull(season)
# Get the most recent DOY from the current season
# Get the most recent DAH from the current season
field_age_data <- CI_quadrant %>%
filter(field == field_name, season == current_season) %>%
pull(DOY)
pull(DAH)
field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_
# Filter data for this specific field
@ -1082,7 +1121,7 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
# For categorical data, take the most common value or highest risk level
field_summary <- field_data %>%
summarise(
field_size = sum(`Field Size (ha)`, na.rm = TRUE),
field_size = sum(`Field Size (acres)`, na.rm = TRUE),
uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)),
max_gap_score = max(`Gap Score`, na.rm = TRUE),
@ -1117,10 +1156,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
}
kpi_text <- paste0(
"Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels,
"Size: ", round(field_summary$field_size * 0.404686, 1), " ha | Mean CI: ", round(field_summary$avg_mean_ci, 2),
" | Growth Uniformity: ", field_summary$uniformity_levels,
" | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1),
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk,
" | Mean CI: ", round(field_summary$avg_mean_ci, 2)
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk
)
# Wrap in smaller text HTML tags for Word output
@ -1134,4 +1173,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
})
}
#' Normalize field_details_table column structure
#'
#' Standardizes column names and ensures all expected KPI columns exist.
#' Handles Field → Field_id rename and injects missing columns as NA.
#'
#' @param field_details_table data.frame to normalize
#' @return data.frame with standardized column structure
normalize_field_details_columns <- function(field_details_table) {
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
return(field_details_table)
}
# Rename Field → Field_id if needed
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
}
# Ensure all expected KPI columns exist; add as NA if missing
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Patchiness_Risk")
for (col in expected_cols) {
if (!col %in% names(field_details_table)) {
field_details_table[[col]] <- NA
}
}
return(field_details_table)
}

Binary file not shown.

2035
renv.lock

File diff suppressed because it is too large Load diff

View file

@ -15,5 +15,12 @@
"vcs.ignore.cellar": true,
"vcs.ignore.library": true,
"vcs.ignore.local": true,
"vcs.manage.ignores": true
"vcs.manage.ignores": true,
"ignored.directories": [
"old_sh",
"r_app/old_scripts",
"r_app/experiments",
"python_app/experiments",
"webapps"
]
}