diff --git a/python_app/.gitignore b/python_app/.gitignore index 7c3d9f1..4fca4c8 100644 --- a/python_app/.gitignore +++ b/python_app/.gitignore @@ -39,5 +39,7 @@ dist/ *.bak *.swp *.swo -*.swp + +*.png + diff --git a/python_app/22_harvest_baseline_prediction.py b/python_app/22_harvest_baseline_prediction.py index 4184608..f20f937 100644 --- a/python_app/22_harvest_baseline_prediction.py +++ b/python_app/22_harvest_baseline_prediction.py @@ -111,7 +111,7 @@ def main(): # [3/4] Run model predictions with two-step detection print("\n[3/4] Running two-step harvest detection...") - print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)") + print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)") refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device, phase1_threshold=0.3, phase1_consecutive=2) diff --git a/python_app/harvest_date_pred_utils.py b/python_app/harvest_date_pred_utils.py index 012a9f2..d746e3a 100644 --- a/python_app/harvest_date_pred_utils.py +++ b/python_app/harvest_date_pred_utils.py @@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128, # FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline) # ============================================================================ -def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame: +def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame: """Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV).""" features = pd.DataFrame(index=ci_series.index) @@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p ma = ci_series.rolling(window=window, min_periods=1).mean() features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6) - # DOY normalized - if doy_series is not None: - features['DOY_normalized'] = doy_series / 450.0 + # DAH normalized (Days After Harvest) + if dah_series is not None: + features['DAH_normalized'] = dah_series / 450.0 return features.fillna(0) @@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: data_df: DataFrame with Date and CI data (may be a window after a harvest) feature_names: List of feature names to extract ci_column: Name of CI column - season_anchor_day: Day in FULL sequence where this season started (for DOY reset) - DOY will be recalculated as: 1, 2, 3, ... from this point + season_anchor_day: Day in FULL sequence where this season started (for DAH reset) + DAH will be recalculated as: 1, 2, 3, ... from this point lookback_start: Starting index in original full data (for season reset calculation) Returns: @@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: # Compute all CI features ci_series = data_df[ci_column].astype(float) - # Compute DOY (age/days since season start) - NOT day-of-year! - # DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) + # Compute DAH (age/days since season start) - NOT day-of-year! + # DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) # It only resets to 1 after a harvest is detected (new season) - doy_series = None - if 'DOY_normalized' in feature_names: + dah_series = None + if 'DAH_normalized' in feature_names: if season_anchor_day is not None and lookback_start >= season_anchor_day: - # Season was reset after harvest. Recalculate DOY as simple counter from 1 - # This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ... - doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) - elif 'DOY' in data_df.columns: - # Use DOY directly from CSV - already calculated as continuous age counter - doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index) + # Season was reset after harvest. Recalculate DAH as simple counter from 1 + # This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ... + dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) + elif 'DAH' in data_df.columns: + # Use DAH directly from CSV - already calculated as continuous age counter + dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index) else: # Fallback: create continuous age counter (1, 2, 3, ...) - doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) + dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) - all_features = compute_ci_features(ci_series, doy_series) + all_features = compute_ci_features(ci_series, dah_series) # Select requested features requested = [f for f in feature_names if f in all_features.columns] @@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame: def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device, threshold=0.45, consecutive_days=2): """ - Phase 1: Growing window detection with DOY season reset. + Phase 1: Growing window detection with DAH season reset. - For each detected harvest, reset DOY counter for the next season. + For each detected harvest, reset DAH counter for the next season. This allows the model to detect multiple consecutive harvests in multi-year data. """ harvest_dates = [] diff --git a/python_app/model_config.json b/python_app/model_config.json index 3be1268..c0dcbd3 100644 --- a/python_app/model_config.json +++ b/python_app/model_config.json @@ -44,7 +44,7 @@ "7d_std", "14d_std", "21d_std", - "DOY_normalized" + "DAH_normalized" ], "model": { "type": "LSTM", diff --git a/r_app/.gitignore b/r_app/.gitignore index ec29223..3efde43 100644 --- a/r_app/.gitignore +++ b/r_app/.gitignore @@ -9,6 +9,12 @@ renv *.swp *.save +# Ignore ALL PNG files by default (generated outputs, analysis plots, etc.) +*.png + +# EXCEPTIONS: Explicitly track intentional PNG assets +# Uncomment or add lines below for PNG files that should be committed to git +!CI_graph_example.png # Ignore files related to Rproj .Rproj.user/ .Rhistory diff --git a/r_app/00_common_utils.R b/r_app/00_common_utils.R index 49a7b58..e7e9a44 100644 --- a/r_app/00_common_utils.R +++ b/r_app/00_common_utils.R @@ -48,10 +48,30 @@ # #' safe_log("Check input file", "WARNING") # #' safe_log("Failed to load data", "ERROR") # #' -# safe_log <- function(message, level = "INFO") { -# prefix <- sprintf("[%s]", level) -# cat(sprintf("%s %s\n", prefix, message)) -# } +safe_log <- function(message, level = "INFO") { + # Build the full log message with timestamp + timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") + log_msg <- sprintf("[%s] [%s] %s", timestamp, level, message) + + # Only output to console if NOT rendering with knitr + if (!isTRUE(getOption('knitr.in.progress'))) { + cat(log_msg, "\n") + } + + # Always write to log file if available + if (exists("LOG_FILE", envir = .GlobalEnv)) { + log_file <- get("LOG_FILE", envir = .GlobalEnv) + if (!is.null(log_file) && nzchar(log_file)) { + tryCatch({ + cat(log_msg, "\n", file = log_file, append = TRUE) + }, error = function(e) { + # Silently fail if log file can't be written + }) + } + } + + invisible(log_msg) +} # #' SmartCane Debug Logging (Conditional) # #' diff --git a/r_app/21_convert_ci_rds_to_csv.R b/r_app/21_convert_ci_rds_to_csv.R index 78af1bb..282e87f 100644 --- a/r_app/21_convert_ci_rds_to_csv.R +++ b/r_app/21_convert_ci_rds_to_csv.R @@ -14,7 +14,7 @@ # OUTPUT DATA: # - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/ # - Format: CSV (long format) -# - Columns: field, sub_field, Date, FitData, DOY, value +# - Columns: field, sub_field, Date, FitData, DAH, value # # USAGE: # Rscript 21_convert_ci_rds_to_csv.R [project] @@ -38,7 +38,7 @@ # NOTES: # - Data source: Uses interpolated CI data from Script 30 (growth model output) # - Handles both wide format and long format inputs from growth model -# - DOY (Day of Year): Calculated from date for seasonal analysis +# - DAH (Days After Harvest): Calculated from date; represents crop age in days # - Python integration: CSV format compatible with pandas/scikit-learn workflows # - Used by: Python harvest detection models (harvest_date_prediction.py) # - Exports complete growth curves with interpolated values for ML training @@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) { filter(!is.na(FitData)) } -#' Create daily interpolated sequences with DOY for each field +#' Create daily interpolated sequences with DAH for each field #' #' For each field/sub_field combination, creates complete daily sequences from first to last date, #' fills in measurements, and interpolates missing dates. #' #' @param ci_data_long Long format tibble: field, sub_field, Date, FitData -#' @return Tibble with: field, sub_field, Date, FitData, DOY, value +#' @return Tibble with: field, sub_field, Date, FitData, DAH, value create_interpolated_daily_sequences <- function(ci_data_long) { ci_data_long %>% group_by(field, sub_field) %>% @@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) { Date = date_seq, value = NA_real_, FitData = NA_real_, - DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ... + DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ... ) # Fill in actual measurement values @@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) { }) ) %>% unnest(data) %>% - select(field, sub_field, Date, FitData, DOY, value) %>% + select(field, sub_field, Date, FitData, DAH, value) %>% arrange(field, Date) } diff --git a/r_app/30_growth_model_utils.R b/r_app/30_growth_model_utils.R index 647b811..cca107e 100644 --- a/r_app/30_growth_model_utils.R +++ b/r_app/30_growth_model_utils.R @@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, # Add additional columns CI <- CI %>% dplyr::mutate( - DOY = seq(1, n(), 1), + DAH = seq(1, n(), 1), model = paste0("Data", season, " : ", field_name), season = season, subField = field_name diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index c0f5a2b..f73a002 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -141,6 +141,10 @@ suppressPackageStartupMessages({ library(writexl) # For writing Excel outputs (KPI summary tables) library(progress) # For progress bars during field processing + # ML models (for yield prediction KPI) + library(caret) # For training Random Forest with cross-validation + library(CAST) # For Forward Feature Selection in caret models + # ML/Analysis (optional - only for harvest model inference) tryCatch({ library(torch) # For PyTorch model inference (harvest readiness prediction) @@ -316,6 +320,9 @@ main <- function() { # Use centralized paths from setup object (no need for file.path calls) weekly_mosaic <- setup$weekly_mosaic_dir daily_vals_dir <- setup$daily_ci_vals_dir + reports_dir <- setup$kpi_reports_dir + data_dir <- setup$data_dir + tryCatch({ source(here("r_app", "30_growth_model_utils.R")) @@ -332,8 +339,8 @@ main <- function() { message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)") message(strrep("=", 70)) - # Prepare inputs for KPI calculation (already created by setup_project_directories) - reports_dir_kpi <- setup$kpi_reports_dir + # Prepare outputs and inputs for KPI calculation (already created by setup_project_directories) + reports_dir_kpi <- file.path(setup$reports_dir, "kpis") cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir # Load field boundaries for workflow (use data_dir from setup) @@ -356,18 +363,15 @@ main <- function() { stop("ERROR loading field boundaries: ", e$message) }) - # Load harvesting data - if (!exists("harvesting_data")) { - warning("harvesting_data not loaded. TCH KPI will use placeholder values.") - harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) - } + # Load harvesting data for yield prediction (using common helper function) + harvesting_data <- load_harvest_data(setup$data_dir) # Extract current week/year from end_date current_week <- as.numeric(format(end_date, "%V")) current_year <- as.numeric(format(end_date, "%G")) # Call with correct signature - kpi_results <- calculate_all_kpis( + kpi_results <- calculate_all_field_analysis_agronomic_support( field_boundaries_sf = field_boundaries_sf, current_week = current_week, current_year = current_year, @@ -376,8 +380,13 @@ main <- function() { ci_rds_path = NULL, harvesting_data = harvesting_data, output_dir = reports_dir_kpi, + data_dir = setup$data_dir, project_dir = project_dir ) + + # Extract results + field_analysis_df <- kpi_results$field_analysis_df + export_paths <- kpi_results$export_paths cat("\n=== KPI CALCULATION COMPLETE ===\n") cat("Summary tables saved for Script 90 integration\n") @@ -389,599 +398,57 @@ main <- function() { message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message(strrep("=", 70)) - # Set reports_dir for CANE_SUPPLY workflow (used by export functions) - reports_dir <- setup$kpi_reports_dir - data_dir <- setup$data_dir - - # Continue with existing per-field analysis code below - - message("\n", strrep("-", 70)) - message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") - message(strrep("-", 70)) - - weeks <- calculate_week_numbers(end_date) - current_week <- weeks$current_week - current_year <- weeks$current_year - previous_week <- weeks$previous_week - previous_year <- weeks$previous_year - - message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year)) - # Find per-field weekly mosaics - message("Finding per-field weekly mosaics...") + # Define variables needed for workflow functions + data_dir <- setup$data_dir - if (!dir.exists(weekly_mosaic)) { - stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, - "\nScript 40 (mosaic creation) must be run first.")) - } - - field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) - field_dirs <- field_dirs[field_dirs != ""] - - if (length(field_dirs) == 0) { - stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, - "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) - } - - # Verify we have mosaics for this week - single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) - per_field_files <- c() - for (field in field_dirs) { - field_mosaic_dir <- file.path(weekly_mosaic, field) - files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - if (length(files) > 0) { - per_field_files <- c(per_field_files, files) - } - } - - if (length(per_field_files) == 0) { - stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year, - "\nExpected pattern:", single_file_pattern, - "\nChecked:", weekly_mosaic)) - } - - message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) - - mosaic_mode <- "per-field" - mosaic_dir <- weekly_mosaic - - - - # Load field boundaries + # Load field boundaries for workflow (use data_dir from setup) + message("\nLoading field boundaries for KPI calculation...") tryCatch({ - boundaries_result <- load_field_boundaries(data_dir) - + boundaries_result <- load_field_boundaries(setup$data_dir) + if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { field_boundaries_sf <- boundaries_result$field_boundaries_sf } else { field_boundaries_sf <- boundaries_result } - + if (nrow(field_boundaries_sf) == 0) { stop("No fields loaded from boundaries") } - - message(paste(" Loaded", nrow(field_boundaries_sf), "fields")) + + message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields")) }, error = function(e) { stop("ERROR loading field boundaries: ", e$message) }) + + # Load harvesting data for yield prediction (using common helper function) + harvesting_data <- load_harvest_data(setup$data_dir) + + # Extract current week/year from end_date + current_week <- as.numeric(format(end_date, "%V")) + current_year <- as.numeric(format(end_date, "%G")) + - message("Loading historical field data for trend calculations...") - # Load up to 8 weeks (max of 4-week and 8-week trend requirements) - # Function gracefully handles missing weeks and loads whatever exists - num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8 - message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data...")) - - # Only auto-generate on first call (not in recursive calls from within load_historical_field_data) - allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) - - historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir, - num_weeks = num_weeks_to_load, - auto_generate = allow_auto_gen, - field_boundaries_sf = field_boundaries_sf, - daily_vals_dir = daily_vals_dir) - - # Load harvest.xlsx for planting dates (season_start) - message("\nLoading harvest data from harvest.xlsx for planting dates...") - harvest_file_path <- file.path(data_dir, "harvest.xlsx") - - harvesting_data <- tryCatch({ - if (file.exists(harvest_file_path)) { - harvest_raw <- readxl::read_excel(harvest_file_path) - harvest_raw$season_start <- as.Date(harvest_raw$season_start) - harvest_raw$season_end <- as.Date(harvest_raw$season_end) - message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) - harvest_raw - } else { - message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) - NULL - } - }, error = function(e) { - message(paste(" ERROR loading harvest.xlsx:", e$message)) - NULL - }) - - planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) - - # Validate planting_dates - if (is.null(planting_dates) || nrow(planting_dates) == 0) { - message("WARNING: No planting dates available. Using NA for all fields.") - planting_dates <- data.frame( - field_id = field_boundaries_sf$field, - planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), - stringsAsFactors = FALSE - ) - } - # Build per-field configuration - message("\nPreparing mosaic configuration for statistics calculation...") - message(" ✓ Using per-field mosaic architecture (1 TIFF per field)") - - # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif - field_grid <- list( - mosaic_dir = mosaic_dir, - mode = "per-field" - ) - - message("\nUsing modular RDS-based approach for weekly statistics...") - - # Load/calculate CURRENT week stats (from tiles or RDS cache) - message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...") - current_stats <- load_or_calculate_weekly_stats( - week_num = current_week, - year = current_year, + # Call the main orchestrator function from kpi_calculation_utils.R + workflow_results <- calculate_field_analysis_cane_supply( + setup = setup, + client_config = client_config, + end_date = end_date, project_dir = project_dir, + weekly_mosaic = weekly_mosaic, + daily_vals_dir = daily_vals_dir, field_boundaries_sf = field_boundaries_sf, - mosaic_dir = field_grid$mosaic_dir, - reports_dir = reports_dir, - report_date = end_date + data_dir = data_dir ) - - message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week")) - - # Load/calculate PREVIOUS week stats (from RDS cache or tiles) - message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") - - # Calculate report date for previous week (7 days before current) - prev_report_date <- end_date - 7 - - prev_stats <- load_or_calculate_weekly_stats( - week_num = previous_week, - year = previous_year, - project_dir = project_dir, - field_boundaries_sf = field_boundaries_sf, - mosaic_dir = field_grid$mosaic_dir, - reports_dir = reports_dir, - report_date = prev_report_date - ) - - message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) - message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", "))) - message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV)))) - - # Apply trend calculations (requires both weeks) - message("\n3. Calculating trend columns...") - current_stats <- calculate_kpi_trends(current_stats, prev_stats, - project_dir = project_dir, - reports_dir = reports_dir, - current_week = current_week, - year = current_year) - - message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) - - # Load weekly harvest probabilities from script 31 (if available) - # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level) - message("\n4. Loading harvest probabilities from script 31...") - harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") - harvest_prob_file <- file.path(harvest_prob_dir, - sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) - message(paste(" Looking for:", harvest_prob_file)) - - imminent_prob_data <- tryCatch({ - if (file.exists(harvest_prob_file)) { - prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE, - col_types = readr::cols(.default = readr::col_character())) - message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) - prob_df %>% - select(field, imminent_prob, detected_prob) %>% - rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) - } else { - message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) - NULL - } - }, error = function(e) { - message(paste(" WARNING: Could not load harvest probabilities:", e$message)) - NULL - }) - - # ============================================================================ - # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R) - # ============================================================================ - - message("\nCalculating gap filling scores (2σ method)...") - - # Process per-field mosaics - message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) - - field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) - - process_gap_for_field <- function(field_file) { - field_id <- basename(dirname(field_file)) - field_bounds <- field_boundaries_by_id[[field_id]] - - if (is.null(field_bounds) || nrow(field_bounds) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - - tryCatch({ - field_raster <- terra::rast(field_file) - ci_band_name <- "CI" - if (!(ci_band_name %in% names(field_raster))) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - field_ci_band <- field_raster[[ci_band_name]] - names(field_ci_band) <- "CI" - - gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) - - if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - - gap_scores <- gap_result$field_results - gap_scores$Field_id <- gap_scores$field - gap_scores <- gap_scores[, c("Field_id", "gap_score")] - - stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) - }, error = function(e) { - message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) - data.frame(Field_id = field_id, gap_score = NA_real_) - }) - } - - # Process fields sequentially with progress bar - message(" Processing gap scores for ", length(per_field_files), " fields...") - pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) - results_list <- lapply(seq_along(per_field_files), function(idx) { - result <- process_gap_for_field(per_field_files[[idx]]) - utils::setTxtProgressBar(pb, idx) - result - }) - close(pb) - - gap_scores_df <- dplyr::bind_rows(results_list) - - if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { - gap_scores_df <- gap_scores_df %>% - dplyr::group_by(Field_id) %>% - dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") - - message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) - message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) - } else { - message(" WARNING: No gap scores calculated from per-field mosaics") - gap_scores_df <- NULL - } - - # ============================================================================ - # Build final output dataframe with all 22 columns (including Gap_score) - # ============================================================================ - - message("\nBuilding final field analysis output...") - - # Pre-calculate acreages with geometry validation - # This avoids geometry errors during field_analysis construction - acreage_lookup <- tryCatch({ - lookup_df <- field_boundaries_sf %>% - sf::st_drop_geometry() %>% - as.data.frame() %>% - mutate( - geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { - tryCatch({ - sf::st_is_valid(field_boundaries_sf[idx, ]) - }, error = function(e) FALSE) - }), - area_ha = 0 - ) + # Extract results + field_analysis_df <- workflow_results$field_analysis_df + farm_kpi_results <- workflow_results$farm_kpi_results + export_paths <- workflow_results$export_paths - # Calculate area for valid geometries - for (idx in which(lookup_df$geometry_valid)) { - tryCatch({ - area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) - lookup_df$area_ha[idx] <- area_m2 / 10000 - }, error = function(e) { - lookup_df$area_ha[idx] <<- NA_real_ - }) - } - - # Convert hectares to acres - lookup_df %>% - mutate(acreage = area_ha / 0.404686) %>% - select(field, acreage) - }, error = function(e) { - message(paste("Warning: Could not calculate acreages from geometries -", e$message)) - data.frame(field = character(0), acreage = numeric(0)) - }) - - field_analysis_df <- current_stats %>% - mutate( - # Column 2: Farm_Section (user fills) - Farm_Section = NA_character_, - # Column 3: Field_name (from GeoJSON - already have Field_id, can look up) - Field_name = Field_id, - # Column 4: Acreage (calculate from geometry) - Acreage = { - acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] - if_else(is.na(acreages_vec), 0, acreages_vec) - }, - # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change) - # Column 7: Four_week_trend (from current_stats) - # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start) - Last_harvest_or_planting_date = { - planting_dates$planting_date[match(Field_id, planting_dates$field_id)] - }, - # Column 9: Age_week (calculated from report date and planting date) - Age_week = { - sapply(seq_len(nrow(current_stats)), function(idx) { - planting_dt <- Last_harvest_or_planting_date[idx] - if (is.na(planting_dt)) { - return(NA_real_) - } - round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0) - }) - }, - # Column 10: Phase (recalculate based on updated Age_week) - Phase = { - sapply(Age_week, function(age) { - if (is.na(age)) return(NA_character_) - if (age >= 0 & age < 4) return("Germination") - if (age >= 4 & age < 17) return("Tillering") - if (age >= 17 & age < 39) return("Grand Growth") - if (age >= 39) return("Maturation") - NA_character_ - }) - }, - # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends) - # Column 12: Germination_progress (calculated here from CI values) - # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% - Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) { - if (is.na(pct)) return(NA_character_) - if (pct >= 95) return("95-100%") - else if (pct >= 90) return("90-95%") - else if (pct >= 80) return("80-90%") - else if (pct >= 70) return("70-80%") - else if (pct >= 60) return("60-70%") - else if (pct >= 50) return("50-60%") - else if (pct >= 40) return("40-50%") - else if (pct >= 30) return("30-40%") - else if (pct >= 20) return("20-30%") - else if (pct >= 10) return("10-20%") - else return("0-10%") - }), - # Column 13: Imminent_prob (from script 31 or NA if not available) - Imminent_prob = { - if (!is.null(imminent_prob_data)) { - imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] - } else { - rep(NA_real_, nrow(current_stats)) - } - }, - # Column 14: Status_Alert (based on harvest probability + crop health status) - # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA - Status_Alert = { - sapply(seq_len(nrow(current_stats)), function(idx) { - imminent_prob <- Imminent_prob[idx] - age_w <- Age_week[idx] - weekly_ci_chg <- Weekly_ci_change[idx] - mean_ci_val <- Mean_CI[idx] - - # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months) - if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) { - return("Ready for harvest-check") - } - - # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5) - if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) { - return("Strong decline in crop health") - } - - # Priority 3: Harvested/bare (Mean CI < 1.5) - if (!is.na(mean_ci_val) && mean_ci_val < 1.5) { - return("Harvested/bare") - } - - # Fallback: no alert - NA_character_ - }) - }, - # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles) - # Column 17: Already in current_stats (CV) - # Column 18: Already in current_stats (CV_Trend_Short_Term) - # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value) - # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope) - # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01) - CV_Trend_Long_Term_Category = { - sapply(current_stats$CV_Trend_Long_Term, function(slope) { - if (is.na(slope)) { - return(NA_character_) - } else if (slope < -0.01) { - return("More uniform") - } else if (slope > 0.01) { - return("Less uniform") - } else { - return("Stable uniformity") - } - }) - }, - # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category) - # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% - Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) { - if (is.na(pct)) return(NA_character_) - if (pct >= 95) return("95-100%") - else if (pct >= 90) return("90-95%") - else if (pct >= 80) return("80-90%") - else if (pct >= 70) return("70-80%") - else if (pct >= 60) return("60-70%") - else if (pct >= 50) return("50-60%") - else if (pct >= 40) return("40-50%") - else if (pct >= 30) return("30-40%") - else if (pct >= 20) return("20-30%") - else if (pct >= 10) return("10-20%") - else return("0-10%") - }), - # Column 22: Gap_score (2σ below median - from kpi_utils.R) - Gap_score = { - if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { - # Debug: Print first few gap scores - message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)", - nrow(gap_scores_df), - paste(head(gap_scores_df$gap_score, 3), collapse=", "))) - message(sprintf(" First 3 Field_ids in gap_scores_df: %s", - paste(head(gap_scores_df$Field_id, 3), collapse=", "))) - message(sprintf(" First 3 Field_ids in current_stats: %s", - paste(head(current_stats$Field_id, 3), collapse=", "))) - - gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] - } else { - rep(NA_real_, nrow(current_stats)) - } - } - ) %>% - select( - all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", - "Last_harvest_or_planting_date", "Age_week", "Phase", - "Germination_progress", - "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", - "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", - "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) - ) - - message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)")) - - export_paths <- export_field_analysis_excel( - field_analysis_df, - NULL, - project_dir, - current_week, - current_year, - reports_dir - ) - - cat("\n--- Per-field Results (first 10) ---\n") - available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") - available_cols <- available_cols[available_cols %in% names(field_analysis_df)] - if (length(available_cols) > 0) { - print(head(field_analysis_df[, available_cols], 10)) - } - - # ========== FARM-LEVEL KPI AGGREGATION ========== - # Aggregate the per-field analysis into farm-level summary statistics - - cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") - - # Filter to only fields that have actual data (non-NA CI and valid acreage) - field_data <- field_analysis_df %>% - filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% - filter(Acreage > 0) - - if (nrow(field_data) > 0) { - - if (nrow(field_data) > 0) { - # Create summary statistics - farm_summary <- list() - - # 1. PHASE DISTRIBUTION - phase_dist <- field_data %>% - group_by(Phase) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Phase) - - farm_summary$phase_distribution <- phase_dist - - # 2. STATUS ALERT DISTRIBUTION - status_dist <- field_data %>% - group_by(Status_Alert) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Status_Alert) - - farm_summary$status_distribution <- status_dist - - # 3. CLOUD COVERAGE DISTRIBUTION - cloud_dist <- field_data %>% - group_by(Cloud_category) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Cloud_category) - - farm_summary$cloud_distribution <- cloud_dist - - # 4. OVERALL STATISTICS - farm_summary$overall_stats <- data.frame( - total_fields = nrow(field_data), - total_acreage = sum(field_data$Acreage, na.rm = TRUE), - mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), - median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), - mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4), - week = current_week, - year = current_year, - date = as.character(end_date) - ) - - # Print summaries - cat("\n--- PHASE DISTRIBUTION ---\n") - print(phase_dist) - - cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") - print(status_dist) - - cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") - print(cloud_dist) - - cat("\n--- OVERALL FARM STATISTICS ---\n") - print(farm_summary$overall_stats) - - farm_kpi_results <- farm_summary - } else { - farm_kpi_results <- NULL - } - } else { - farm_kpi_results <- NULL - } - - # ========== FINAL SUMMARY ========== - - cat("\n", strrep("=", 70), "\n") - cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n") - cat(strrep("=", 70), "\n") - cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n") - cat("Excel export:", export_paths$excel, "\n") - cat("RDS export:", export_paths$rds, "\n") - cat("CSV export:", export_paths$csv, "\n") - - if (!is.null(farm_kpi_results)) { - cat("\nFarm-level KPIs: CALCULATED\n") - } else { - cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n") - } - - cat("\n✓ Consolidated KPI calculation complete!\n") - cat(" - Per-field data exported\n") - cat(" - Farm-level KPIs calculated\n") - cat(" - All outputs in:", reports_dir, "\n\n") - - } else { - # Unknown client type - log warning and exit - warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) + } else { + # Unknown client type - log warning and exit + warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "") cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n") cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n") diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 421b35f..9408b2b 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -1,8 +1,8 @@ # 80_UTILS_AGRONOMIC_SUPPORT.R # ============================================================================ -# SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support) +# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support) # -# Contains all 6 KPI calculation functions and helpers: +# Contains all 6 AURA KPI calculation functions and helpers: # - Field uniformity KPI (CV-based, spatial autocorrelation) # - Area change KPI (week-over-week CI changes) # - TCH forecasted KPI (tonnage projections from harvest data) @@ -12,7 +12,7 @@ # - KPI reporting (summary tables, field details, text interpretation) # - KPI export (Excel, RDS, data export) # -# Orchestrator: calculate_all_kpis() +# Orchestrator: calculate_all_field_analysis_agronomic_support() # Dependencies: 00_common_utils.R (safe_log), sourced from common # Used by: 80_calculate_kpis.R (when client_type == "agronomic_support") # ============================================================================ @@ -41,31 +41,8 @@ library(spdep) # These are now sourced from common utils and shared by all client types. # ============================================================================ -#' Prepare harvest predictions and ensure proper alignment with field data -prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") { - if (is.null(harvest_model) || is.null(field_data)) { - return(NULL) - } - - tryCatch({ - scenario_factor <- switch(scenario, - "pessimistic" = 0.85, - "realistic" = 1.0, - "optimistic" = 1.15, - 1.0) - - predictions <- field_data %>% - mutate(tch_forecasted = field_data$mean_ci * scenario_factor) - - return(predictions) - }, error = function(e) { - message(paste("Error preparing predictions:", e$message)) - return(NULL) - }) -} - # ============================================================================ -# KPI CALCULATION FUNCTIONS (6 KPIS) +# AURA KPI CALCULATION FUNCTIONS (6 KPIS) # ============================================================================ #' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation @@ -75,40 +52,82 @@ prepare_predictions <- function(harvest_model, field_data, scenario = "optimisti #' #' @param ci_pixels_by_field List of CI pixel arrays for each field #' @param field_boundaries_sf SF object with field geometries -#' @param ci_raster Raster object with CI values (for spatial autocorrelation) +#' @param ci_band Raster band with CI values #' #' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation -calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_raster = NULL) { - results_list <- list() +calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL, + mosaic_dir = NULL, week_file = NULL) { + result <- data.frame( + field_idx = integer(), + cv_value = numeric(), + morans_i = numeric(), + uniformity_score = numeric(), + uniformity_category = character(), + interpretation = character(), + stringsAsFactors = FALSE + ) + + # Determine if we're using per-field structure + is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) for (field_idx in seq_len(nrow(field_boundaries_sf))) { ci_pixels <- ci_pixels_by_field[[field_idx]] if (is.null(ci_pixels) || length(ci_pixels) == 0) { - results_list[[length(results_list) + 1]] <- list( + result <- rbind(result, data.frame( field_idx = field_idx, cv_value = NA_real_, morans_i = NA_real_, uniformity_score = NA_real_, - interpretation = "No data" - ) + uniformity_category = "No data", + interpretation = "No data", + stringsAsFactors = FALSE + )) next } cv_val <- calculate_cv(ci_pixels) + # Calculate Moran's I morans_i <- NA_real_ - if (!is.null(ci_raster)) { - morans_result <- calculate_spatial_autocorrelation(ci_raster, field_boundaries_sf[field_idx, ]) - if (is.list(morans_result)) { - morans_i <- morans_result$morans_i - } else { - morans_i <- morans_result + if (is_per_field) { + # Load individual field raster for per-field structure + field_name <- field_boundaries_sf$field[field_idx] + field_mosaic_path <- file.path(mosaic_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_raster <- terra::rast(field_mosaic_path)[["CI"]] + single_field <- field_boundaries_sf[field_idx, ] + morans_result <- calculate_spatial_autocorrelation(field_raster, single_field) + + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + }, error = function(e) { + message(paste(" Warning: Spatial autocorrelation failed for field", field_name, ":", e$message)) + }) } + } else if (!is.null(ci_band) && inherits(ci_band, "SpatRaster")) { + # Use single raster for single-file structure + tryCatch({ + single_field <- field_boundaries_sf[field_idx, ] + morans_result <- calculate_spatial_autocorrelation(ci_band, single_field) + + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + }, error = function(e) { + message(paste(" Warning: Spatial autocorrelation failed for field", field_idx, ":", e$message)) + }) } # Normalize CV (0-1 scale, invert so lower CV = higher score) - cv_normalized <- min(cv_val / 0.3, 1) # 0.3 = threshold for CV + cv_normalized <- min(cv_val / 0.3, 1) cv_score <- 1 - cv_normalized # Normalize Moran's I (-1 to 1 scale, shift to 0-1) @@ -123,30 +142,34 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ # Interpretation if (is.na(cv_val)) { interpretation <- "No data" + uniformity_category <- "No data" } else if (cv_val < 0.08) { interpretation <- "Excellent uniformity" + uniformity_category <- "Excellent" } else if (cv_val < 0.15) { interpretation <- "Good uniformity" + uniformity_category <- "Good" } else if (cv_val < 0.25) { interpretation <- "Acceptable uniformity" + uniformity_category <- "Acceptable" } else if (cv_val < 0.4) { interpretation <- "Poor uniformity" + uniformity_category <- "Poor" } else { interpretation <- "Very poor uniformity" + uniformity_category <- "Very poor" } - results_list[[length(results_list) + 1]] <- list( + result <- rbind(result, data.frame( field_idx = field_idx, cv_value = cv_val, morans_i = morans_i, uniformity_score = round(uniformity_score, 3), - interpretation = interpretation - ) - } - - # Convert accumulated list to data frame in a single operation - result <- do.call(rbind, lapply(results_list, as.data.frame)) - + uniformity_category = uniformity_category, + interpretation = interpretation, + stringsAsFactors = FALSE + )) + } return(result) } @@ -158,70 +181,167 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ #' @param previous_stats Previous week field statistics #' #' @return Data frame with field-level CI changes -calculate_area_change_kpi <- function(current_stats, previous_stats) { - result <- calculate_change_percentages(current_stats, previous_stats) +calculate_area_change_kpi <- function(current_stats, previous_stats, field_boundaries_sf = NULL) { - # Add interpretation - result$interpretation <- NA_character_ - - for (i in seq_len(nrow(result))) { - change <- result$mean_ci_pct_change[i] - - if (is.na(change)) { - result$interpretation[i] <- "No previous data" - } else if (change > 15) { - result$interpretation[i] <- "Rapid growth" - } else if (change > 5) { - result$interpretation[i] <- "Positive growth" - } else if (change > -5) { - result$interpretation[i] <- "Stable" - } else if (change > -15) { - result$interpretation[i] <- "Declining" - } else { - result$interpretation[i] <- "Rapid decline" - } + # Initialize field index vector + field_idx_vec <- seq_len(nrow(current_stats)) + if (!is.null(field_boundaries_sf) && "Field_id" %in% names(current_stats)) { + field_idx_vec <- match(current_stats$Field_id, field_boundaries_sf$field) } + # Initialize result data frame + result <- data.frame( + field_idx = field_idx_vec, + mean_ci_abs_change = NA_real_, + interpretation = NA_character_, + stringsAsFactors = FALSE + ) + + # Handle case where previous stats is NULL or empty + if (is.null(previous_stats) || nrow(previous_stats) == 0) { + result$interpretation <- "No previous data" + return(result) + } + + # Match fields between current and previous stats + # Handle both naming conventions (Field_id vs field_idx) + if ("Field_id" %in% names(current_stats)) { + current_field_col <- "Field_id" + prev_field_col <- "Field_id" + ci_col <- "Mean_CI" + } else { + current_field_col <- "field_idx" + prev_field_col <- "field_idx" + ci_col <- "mean_ci" + } + + # Create lookup for previous stats + prev_lookup <- setNames( + previous_stats[[ci_col]], + previous_stats[[prev_field_col]] + ) + + # Calculate percentage change for each field + for (i in seq_len(nrow(current_stats))) { + current_field_id <- current_stats[[current_field_col]][i] + current_ci <- current_stats[[ci_col]][i] + + # Find matching previous CI value + prev_ci <- prev_lookup[[as.character(current_field_id)]] + + if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci)) { + # Calculate absolute change (CI units) + abs_change <- current_ci - prev_ci + result$mean_ci_abs_change[i] <- round(abs_change, 2) + + # Add interpretation + if (abs_change > 0.5) { + result$interpretation[i] <- "Rapid growth" + } else if (abs_change > 0.2) { + result$interpretation[i] <- "Positive growth" + } else if (abs_change >= -0.2) { + result$interpretation[i] <- "Stable" + } else if (abs_change >= -0.5) { + result$interpretation[i] <- "Declining" + } else { + result$interpretation[i] <- "Rapid decline" + } + } else { + result$interpretation[i] <- "No previous data" + } + } return(result) } #' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare) #' -#' Projects final harvest tonnage based on CI growth trajectory +#' Projects final harvest tonnage based on historical yield data and CI growth trajectory. +#' Uses a Random Forest model trained on harvest data to predict yields for mature fields. +#' Delegates to calculate_yield_prediction_kpi() in 80_utils_common.R. #' -#' @param field_statistics Current field statistics -#' @param harvesting_data Historical harvest data (with yield observations) -#' @param field_boundaries_sf Field geometries +#' @param field_statistics Current field statistics (dataframe with Mean_CI or mean_ci column) +#' @param harvesting_data Historical harvest data frame (with tonnage_ha column) +#' @param field_boundaries_sf SF object with field geometries +#' @param cumulative_CI_vals_dir Directory with combined CI RDS files (optional) +#' @param data_dir Project data directory (from setup_project_directories or parameters_project.R) +#' Used to build cumulative_CI_vals_dir path if not provided directly (optional) +#' @param project_dir Deprecated: only used if data_dir not provided (optional) #' -#' @return Data frame with field-level TCH forecasts -calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) { - result <- data.frame( - field_idx = field_statistics$field_idx, - mean_ci = field_statistics$mean_ci, - tch_forecasted = NA_real_, - tch_lower_bound = NA_real_, - tch_upper_bound = NA_real_, - confidence = NA_character_, - stringsAsFactors = FALSE - ) +#' @return Data frame with field-level yield forecasts ready for orchestrator +#' Columns: field_idx, tch_forecasted (yields in t/ha) +calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, + field_boundaries_sf = NULL, + cumulative_CI_vals_dir = NULL, + data_dir = NULL, + project_dir = NULL) { - # Base TCH model: TCH = 50 + (CI * 10) - # This is a simplified model; production use should include more variables + # Use common utils yield prediction function (handles all ML logic) + # This replaces the previous linear model (TCH = 50 + CI*10) with proper ML prediction - for (i in seq_len(nrow(result))) { - if (is.na(result$mean_ci[i])) { - result$confidence[i] <- "No data" - next - } - - if (is.na(result$mean_ci[i])) { - result$tch_forecasted[i] <- NA_real_ - result$tch_lower_bound[i] <- NA_real_ - result$tch_upper_bound[i] <- NA_real_ - result$confidence[i] <- "No data" + # Validate required parameters + if (is.null(field_boundaries_sf)) { + safe_log("field_boundaries_sf is NULL in calculate_tch_forecasted_kpi", "WARNING") + return(data.frame( + field_idx = integer(), + tch_forecasted = numeric(), + stringsAsFactors = FALSE + )) + } + + # Determine cumulative CI directory + if (is.null(cumulative_CI_vals_dir)) { + # Priority 1: Use provided data_dir parameter + if (!is.null(data_dir)) { + cumulative_CI_vals_dir <- file.path(data_dir, "extracted_ci", "cumulative_vals") + } else if (exists("data_dir", envir = .GlobalEnv)) { + # Priority 2: Fallback to global data_dir from parameters_project.R + cumulative_CI_vals_dir <- file.path(get("data_dir", envir = .GlobalEnv), "extracted_ci", "cumulative_vals") + } else { + # Priority 3: Last resort - log warning and fail gracefully + safe_log("Missing project data directory configuration: provide data_dir parameter or ensure parameters_project.R has set data_dir globally", "WARNING") + safe_log("No training data available for yield prediction", "WARNING") + return(data.frame( + field_idx = integer(), + tch_forecasted = numeric(), + stringsAsFactors = FALSE + )) } } + # Call the shared yield prediction function from common utils + yield_result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) + + # Extract field-level results from the list + field_results <- yield_result$field_results + + # Convert to format expected by orchestrator + # If no predictions, return empty data frame + if (is.null(field_results) || nrow(field_results) == 0) { + return(data.frame( + field_idx = integer(), + tch_forecasted = numeric(), + stringsAsFactors = FALSE + )) + } + + # Map field names to field_idx using field_boundaries_sf + result <- field_results %>% + mutate( + field_idx = match(field, field_boundaries_sf$field), + tch_forecasted = yield_forecast_t_ha + ) %>% + filter(!is.na(field_idx)) %>% + select(field_idx, tch_forecasted) + + # Ensure result has proper structure even if empty + if (nrow(result) == 0) { + return(data.frame( + field_idx = integer(), + tch_forecasted = numeric(), + stringsAsFactors = FALSE + )) + } + return(result) } @@ -283,295 +403,300 @@ calculate_growth_decline_kpi <- function(ci_values_list) { return(result) } -#' KPI 5: Calculate weed presence indicator + #' -#' Detects field fragmentation/patchiness (potential weed/pest pressure) +#' Combines two complementary metrics for comprehensive heterogeneity assessment: +#' - Gini Coefficient: Distribution inequality of CI values (0=uniform, 1=unequal) +#' - Moran's I: Spatial autocorrelation (-1 to +1, indicates clustering vs dispersal) #' #' @param ci_pixels_by_field List of CI pixel arrays for each field +#' @param field_boundaries_sf SF object with field geometries +#' @param mosaic_dir Directory path to per-field mosaic files (for Moran's I) +#' @param week_file Week file pattern (for Moran's I calculation) +#' @param mean_ci_values Optional vector of mean CI values per field #' -#' @return Data frame with fragmentation indicators -calculate_weed_presence_kpi <- function(ci_pixels_by_field) { +#' @return Data frame with gini_coefficient, morans_i, patchiness_risk, patchiness_interpretation +calculate_patchiness_kpi <- function(ci_pixels_by_field, field_boundaries_sf = NULL, + mosaic_dir = NULL, week_file = NULL, mean_ci_values = NULL) { + + n_fields <- length(ci_pixels_by_field) + result <- data.frame( - field_idx = seq_len(length(ci_pixels_by_field)), - cv_value = NA_real_, - low_ci_percent = NA_real_, - fragmentation_index = NA_real_, - weed_pressure_risk = NA_character_, + field_idx = seq_len(n_fields), + gini_coefficient = NA_real_, + morans_i = NA_real_, + patchiness_risk = NA_character_, + patchiness_interpretation = NA_character_, stringsAsFactors = FALSE ) - for (field_idx in seq_len(length(ci_pixels_by_field))) { - ci_pixels <- ci_pixels_by_field[[field_idx]] + # Determine if per-field structure available + is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) && !is.null(field_boundaries_sf) + + for (i in seq_len(n_fields)) { + ci_pixels <- ci_pixels_by_field[[i]] if (is.null(ci_pixels) || length(ci_pixels) == 0) { - result$weed_pressure_risk[field_idx] <- "No data" + result$patchiness_risk[i] <- "No data" + result$patchiness_interpretation[i] <- "No data" next } ci_pixels <- ci_pixels[!is.na(ci_pixels)] if (length(ci_pixels) == 0) { - result$weed_pressure_risk[field_idx] <- "No data" + result$patchiness_risk[i] <- "No data" + result$patchiness_interpretation[i] <- "No data" next } - cv_val <- calculate_cv(ci_pixels) - low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100 - fragmentation <- cv_val * low_ci_pct / 100 - - result$cv_value[field_idx] <- cv_val - result$low_ci_percent[field_idx] <- round(low_ci_pct, 2) - result$fragmentation_index[field_idx] <- round(fragmentation, 3) - - if (is.na(fragmentation)) { - result$weed_pressure_risk[field_idx] <- "No data" - } else if (fragmentation > 0.15) { - result$weed_pressure_risk[field_idx] <- "High" - } else if (fragmentation > 0.08) { - result$weed_pressure_risk[field_idx] <- "Medium" - } else if (fragmentation > 0.04) { - result$weed_pressure_risk[field_idx] <- "Low" - } else { - result$weed_pressure_risk[field_idx] <- "Minimal" + # ========================================= + # METRIC 1: Calculate Gini Coefficient + # ========================================= + gini <- NA_real_ + if (length(ci_pixels) > 1) { + ci_sorted <- sort(ci_pixels) + n <- length(ci_sorted) + numerator <- 2 * sum(seq_len(n) * ci_sorted) + denominator <- n * sum(ci_sorted) + gini <- (numerator / denominator) - (n + 1) / n + gini <- max(0, min(1, gini)) # Clamp to 0-1 } + result$gini_coefficient[i] <- gini + + # ========================================= + # METRIC 2: Calculate Moran's I (spatial clustering) + # ========================================= + morans_i <- NA_real_ + if (is_per_field) { + field_name <- field_boundaries_sf$field[i] + field_mosaic_path <- file.path(mosaic_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_raster <- terra::rast(field_mosaic_path)[["CI"]] + single_field <- field_boundaries_sf[i, ] + morans_result <- calculate_spatial_autocorrelation(field_raster, single_field) + + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + }, error = function(e) { + safe_log(paste("Warning: Moran's I failed for field", field_name, ":", e$message), "WARNING") + }) + } + } + result$morans_i[i] <- morans_i + + # ========================================= + # RISK DETERMINATION: Gini + Moran's I combination + # ========================================= + # Logic: + # - High Gini (>0.3) + High Moran's I (>0.85) = High patchiness (localized clusters) + # - High Gini + Low Moran's I = Medium patchiness (scattered heterogeneity) + # - Low Gini (<0.15) = Minimal patchiness (uniform) + # - Moderate Gini = Low to Medium patchiness + + if (is.na(gini)) { + result$patchiness_risk[i] <- "No data" + } else if (gini < 0.15) { + result$patchiness_risk[i] <- "Minimal" + } else if (gini < 0.30) { + # Low-to-moderate Gini + if (!is.na(morans_i) && morans_i > 0.85) { + result$patchiness_risk[i] <- "Medium" # Some clustering + } else { + result$patchiness_risk[i] <- "Low" + } + } else if (gini < 0.50) { + # High Gini + if (!is.na(morans_i) && morans_i > 0.85) { + result$patchiness_risk[i] <- "High" # Localized problem clusters + } else { + result$patchiness_risk[i] <- "Medium" # Scattered issues + } + } else { + # Very high Gini (>0.5) + result$patchiness_risk[i] <- "High" + } + + # ========================================= + # INTERPRETATION: Combined Gini + Moran's I narrative + # ========================================= + result$patchiness_interpretation[i] <- dplyr::case_when( + is.na(gini) ~ "No data", + gini < 0.15 & (is.na(morans_i) | morans_i < 0.75) ~ + "Excellent uniformity - minimal patchiness", + gini < 0.30 & (is.na(morans_i) | morans_i < 0.75) ~ + "Good uniformity - low patchiness", + gini < 0.30 & !is.na(morans_i) & morans_i > 0.85 ~ + "Moderate uniformity with localized clustering", + gini < 0.50 & (is.na(morans_i) | morans_i < 0.75) ~ + "Poor uniformity - scattered heterogeneity", + gini < 0.50 & !is.na(morans_i) & morans_i > 0.85 ~ + "Poor uniformity with clustered problem areas", + gini >= 0.50 ~ + "Severe heterogeneity - requires field investigation", + TRUE ~ "Mixed heterogeneity" + ) } return(result) } -# #' Calculate Gap Filling Score KPI (placeholder) -# #' @param ci_raster Current week CI raster -# #' @param field_boundaries Field boundaries -# #' @return Data frame with field-level gap filling scores -# calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { -# # Handle both sf and SpatVector inputs -# if (!inherits(field_boundaries, "SpatVector")) { -# field_boundaries_vect <- terra::vect(field_boundaries) -# } else { -# field_boundaries_vect <- field_boundaries -# } -# results_list <- list() - -# # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions -# n_fields_vect <- length(field_boundaries_vect) -# n_fields_sf <- nrow(field_boundaries) - -# if (n_fields_sf != n_fields_vect) { -# warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) -# } - -# for (i in seq_len(n_fields_vect)) { -# field_vect <- field_boundaries_vect[i] - -# # Extract CI values using helper function -# ci_values <- extract_ci_values(ci_raster, field_vect) -# valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] - -# if (length(valid_values) > 1) { -# # Calculate % of valid (non-NA) values = gap filling success -# total_pixels <- length(ci_values) -# valid_pixels <- length(valid_values) -# gap_filling_success <- (valid_pixels / total_pixels) * 100 -# na_percent <- ((total_pixels - valid_pixels) / total_pixels) * 100 - -# results_list[[length(results_list) + 1]] <- list( -# field_idx = i, -# gap_filling_success = round(gap_filling_success, 2), -# na_percent_pre_interpolation = round(na_percent, 2), -# mean_ci = round(mean(valid_values), 2) -# ) -# } else { -# # Not enough valid data -# results_list[[length(results_list) + 1]] <- list( -# field_idx = i, -# gap_filling_success = NA_real_, -# na_percent_pre_interpolation = NA_real_, -# mean_ci = NA_real_ -# ) -# } -# } - - # Convert accumulated list to data frame in a single operation - field_results <- do.call(rbind, lapply(results_list, as.data.frame)) - - return(field_results) -} # ============================================================================ # KPI ORCHESTRATOR AND REPORTING # ============================================================================ -#' Create summary tables for all 6 KPIs (AGGREGATED farm-level summaries) +#' Create summary tables for all 6 KPIs #' -#' @param all_kpis List containing results from all 6 KPI functions (per-field data) +#' @param all_kpis List containing results from all 6 KPI functions #' -#' @return List of summary data frames ready for reporting (farm-level aggregates) +#' @return List of summary data frames ready for reporting create_summary_tables <- function(all_kpis) { - - # ========================================== - # 1. UNIFORMITY SUMMARY (count by interpretation) - # ========================================== - uniformity_summary <- all_kpis$uniformity %>% - group_by(interpretation) %>% - summarise( - field_count = n(), - avg_cv = mean(cv_value, na.rm = TRUE), - avg_morans_i = mean(morans_i, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - Status = interpretation, - `Field Count` = field_count, - `Avg CV` = avg_cv, - `Avg Moran's I` = avg_morans_i - ) - - # ========================================== - # 2. AREA CHANGE SUMMARY (improving/stable/declining counts) - # ========================================== - area_change_summary <- all_kpis$area_change %>% - group_by(interpretation) %>% - summarise( - field_count = n(), - avg_ci_change = mean(mean_ci_pct_change, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - Status = interpretation, - `Field Count` = field_count, - `Avg CI Change %` = avg_ci_change - ) - - # ========================================== - # 3. TCH FORECAST SUMMARY (yield statistics) - # ========================================== - tch_summary <- all_kpis$tch_forecasted %>% - summarise( - avg_tch = mean(tch_forecasted, na.rm = TRUE), - min_tch = min(tch_forecasted, na.rm = TRUE), - max_tch = max(tch_forecasted, na.rm = TRUE), - avg_ci = mean(mean_ci, na.rm = TRUE), - fields_with_data = sum(!is.na(tch_forecasted)) - ) %>% - rename( - `Avg Forecast (t/ha)` = avg_tch, - `Min (t/ha)` = min_tch, - `Max (t/ha)` = max_tch, - `Avg CI` = avg_ci, - `Fields` = fields_with_data - ) - - # ========================================== - # 4. GROWTH DECLINE SUMMARY (trend interpretation) - # ========================================== - growth_summary <- all_kpis$growth_decline %>% - group_by(trend_interpretation) %>% - summarise( - field_count = n(), - avg_trend = mean(four_week_trend, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - Trend = trend_interpretation, - `Field Count` = field_count, - `Avg 4-Week Trend` = avg_trend - ) - - # ========================================== - # 5. WEED PRESSURE SUMMARY (risk level counts) - # ========================================== - weed_summary <- all_kpis$weed_presence %>% - group_by(weed_pressure_risk) %>% - summarise( - field_count = n(), - avg_fragmentation = mean(fragmentation_index, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - `Risk Level` = weed_pressure_risk, - `Field Count` = field_count, - `Avg Fragmentation` = avg_fragmentation - ) - - # ========================================== - # 6. GAP FILLING SUMMARY - # ========================================== - gap_summary <- if (!is.null(all_kpis$gap_filling) && is.data.frame(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { - all_kpis$gap_filling %>% - summarise( - avg_gap_filling = mean(gap_filling_success, na.rm = TRUE), - avg_na_percent = mean(na_percent_pre_interpolation, na.rm = TRUE), - fields_with_data = n() - ) %>% - rename( - `Avg Gap Filling Success %` = avg_gap_filling, - `Avg NA % Pre-Interpolation` = avg_na_percent, - `Fields Analyzed` = fields_with_data - ) - } else { - data.frame(`Avg Gap Filling Success %` = NA_real_, `Avg NA % Pre-Interpolation` = NA_real_, `Fields Analyzed` = 0, check.names = FALSE) - } - - # Return as list (each element is a farm-level summary table) kpi_summary <- list( - uniformity = uniformity_summary, - area_change = area_change_summary, - tch_forecast = tch_summary, - growth_decline = growth_summary, - weed_pressure = weed_summary, - gap_filling = gap_summary + uniformity = all_kpis$uniformity %>% + select(field_idx, cv_value, uniformity_category, interpretation), + + area_change = all_kpis$area_change %>% + select(field_idx, mean_ci_abs_change, interpretation), + + tch_forecast = all_kpis$tch_forecasted %>% + select(field_idx, tch_forecasted), + + growth_decline = all_kpis$growth_decline %>% + select(field_idx, four_week_trend, trend_interpretation, decline_severity), + + patchiness = all_kpis$patchiness %>% + select(field_idx, gini_coefficient, morans_i, patchiness_interpretation, patchiness_risk), + + gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { + all_kpis$gap_filling %>% + select(field_idx, gap_score, gap_level) + } else { + NULL + } ) - return(kpi_summary) } -#' Create detailed field-by-field KPI report +#' Create detailed field-by-field KPI report (ALL KPIs in one row) #' -#' @param field_df Data frame with field identifiers and acreage -#' @param all_kpis List with all KPI results #' @param field_boundaries_sf SF object with field boundaries +#' @param all_kpis List with all KPI results +#' @param current_week Current week number +#' @param current_year Current year #' -#' @return Data frame with one row per field, all KPI columns (renamed for reporting compatibility) -create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { - result <- field_df %>% - left_join( - all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation), - by = c("field_idx") - ) %>% - left_join( - all_kpis$area_change %>% select(field_idx, mean_ci_pct_change), - by = c("field_idx") - ) %>% - left_join( - all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted, mean_ci), - by = c("field_idx") - ) %>% - left_join( - all_kpis$growth_decline %>% select(field_idx, decline_severity), - by = c("field_idx") - ) %>% - left_join( - all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk), - by = c("field_idx") - ) %>% - # Rename columns to match reporting script expectations - rename( - Field = field_name, - `Growth Uniformity` = uniformity_interpretation, - `Yield Forecast (t/ha)` = tch_forecasted, - `Decline Risk` = decline_severity, - `Weed Risk` = weed_pressure_risk, - `CI Change %` = mean_ci_pct_change, - `Mean CI` = mean_ci, - `CV Value` = cv_value - ) %>% - # Add placeholder columns expected by reporting script (will be populated from other sources) +#' @return Data frame with one row per field, all KPI columns +create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_week, current_year, current_stats = NULL) { + + # Start with field identifiers AND field_idx for joining + result <- field_boundaries_sf %>% + sf::st_drop_geometry() %>% mutate( - `Field Size (ha)` = NA_real_, - `Gap Score` = NA_real_ + field_idx = row_number(), + Field_id = field, + Field_name = field, + Week = current_week, + Year = current_year ) %>% - select(field_idx, Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, - `Gap Score`, `Decline Risk`, `Weed Risk`, `CI Change %`, `Mean CI`, `CV Value`) + select(field_idx, Field_id, Field_name, Week, Year) + + # ============================================ + # GROUP 0: MEAN CI (from field statistics) + # ============================================ + if (!is.null(current_stats)) { + result <- result %>% + left_join( + current_stats %>% + select(Field_id, Mean_CI), + by = "Field_id" + ) + } else { + result$Mean_CI <- NA_real_ + } + + # ============================================ + # GROUP 1: FIELD UNIFORMITY (KPI 1) + # ============================================ + result <- result %>% + left_join( + all_kpis$uniformity %>% + select(field_idx, CV = cv_value, + Uniformity_Category = uniformity_category, + Uniformity_Interpretation = interpretation), + by = "field_idx" + ) + + # ============================================ + # GROUP 2: GROWTH & TREND ANALYSIS (KPI 2 + KPI 4) + # ============================================ + # KPI 2: Area Change + result <- result %>% + left_join( + all_kpis$area_change %>% + select(field_idx, Weekly_CI_Change = mean_ci_abs_change, + Area_Change_Interpretation = interpretation), + by = "field_idx" + ) + + # KPI 4: Growth Decline + result <- result %>% + left_join( + all_kpis$growth_decline %>% + select(field_idx, Four_Week_Trend = four_week_trend, + Trend_Interpretation = trend_interpretation, + Decline_Severity = decline_severity), + by = "field_idx" + ) + + # ============================================ + # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5) + # ============================================ + # KPI 5: Field Patchiness (Gini + Moran's I combination) + result <- result %>% + left_join( + all_kpis$patchiness %>% + select(field_idx, Gini_Coefficient = gini_coefficient, + Morans_I = morans_i, + Patchiness_Interpretation = patchiness_interpretation, + Patchiness_Risk = patchiness_risk), + by = "field_idx" + ) + + # ============================================ + # GROUP 4: YIELD FORECAST (KPI 3) + # ============================================ + result <- result %>% + left_join( + all_kpis$tch_forecasted %>% + select(field_idx, TCH_Forecasted = tch_forecasted), + by = "field_idx" + ) + + # ============================================ + # GROUP 5: DATA QUALITY / GAP FILLING (KPI 6) + # ============================================ + # Add gap filling if available + if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { + result <- result %>% + left_join( + all_kpis$gap_filling %>% + select(field_idx, Gap_Score = gap_score, Gap_Level = gap_level), + by = "field_idx" + ) + } + + # Remove field_idx from final output + result <- result %>% + select(-field_idx) + + # Round numeric columns + result <- result %>% + mutate(across(where(is.numeric), ~ round(., 2))) return(result) } @@ -583,7 +708,7 @@ create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { #' @return Character string with formatted KPI summary text create_field_kpi_text <- function(all_kpis) { text_parts <- c( - "## KPI ANALYSIS SUMMARY\n", + "## AURA KPI ANALYSIS SUMMARY\n", "### Field Uniformity\n", paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n", "### Growth Trends\n", @@ -595,115 +720,37 @@ create_field_kpi_text <- function(all_kpis) { return(paste(text_parts, collapse = "")) } -#' Export detailed KPI data to Excel/RDS +#' Export detailed KPI data to Excel/RDS #' -#' @param all_kpis List with all KPI results (per-field data) -#' @param kpi_summary List with summary tables (farm-level aggregates) -#' @param project_dir Project name (for filename) +#' @param field_detail_df Data frame with all KPI columns (one row per field) +#' @param kpi_summary List with summary tables (optional, for metadata) #' @param output_dir Directory for output files #' @param week Week number #' @param year Year -#' @param field_boundaries_sf SF object with field boundaries (optional, for field_details_table) -#' +#' @param project_dir Project name #' @return List of output file paths -export_kpi_data <- function(all_kpis, kpi_summary, project_dir, output_dir, week, year, field_boundaries_sf = NULL) { - # Ensure output directory exists - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE) - } +export_kpi_data <- function(field_detail_df, kpi_summary, output_dir, week, year, project_dir) { - # Create unified field details table if field_boundaries_sf is provided - field_details_table <- NULL - if (!is.null(field_boundaries_sf)) { - tryCatch({ - # Create a basic field_df from the boundaries - # Robust field name extraction with multiple fallbacks - field_name <- NA_character_ - - # Check for 'name' column in the data.frame - if ("name" %in% names(field_boundaries_sf)) { - field_name <- field_boundaries_sf$name - } else if ("properties" %in% names(field_boundaries_sf)) { - # Extract from properties column (may be a list-column) - props <- field_boundaries_sf$properties - if (is.list(props) && length(props) > 0 && "name" %in% names(props[[1]])) { - field_name <- sapply(props, function(x) ifelse(is.null(x$name), NA_character_, x$name)) - } else if (!is.list(props)) { - # Try direct access if properties is a simple column - field_name <- props - } - } - - # Ensure field_name is a character vector of appropriate length - if (length(field_name) != nrow(field_boundaries_sf)) { - field_name <- rep(NA_character_, nrow(field_boundaries_sf)) - } - - # Replace only NA elements with fallback names, keeping valid names intact - na_indices <- which(is.na(field_name)) - if (length(na_indices) > 0) { - field_name[na_indices] <- paste0("Field_", na_indices) - } - - field_df <- data.frame( - field_idx = 1:nrow(field_boundaries_sf), - field_name = field_name, - stringsAsFactors = FALSE - ) - - field_details_table <- create_field_detail_table(field_df, all_kpis, field_boundaries_sf) - message(paste("✓ Field details table created with", nrow(field_details_table), "fields")) - }, error = function(e) { - message(paste("WARNING: Could not create field_details_table:", e$message)) - }) - } - - # Export all KPI tables to a single Excel file - use project_dir" - excel_file <- file.path(output_dir, paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".xlsx")) - - sheets <- list( - "Uniformity" = as.data.frame(kpi_summary$uniformity), - "Area_Change" = as.data.frame(kpi_summary$area_change), - "TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast), - "Growth_Decline" = as.data.frame(kpi_summary$growth_decline), - "Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure), - "Gap_Filling" = as.data.frame(kpi_summary$gap_filling) + # Use the common export function from 80_utils_common.R + export_paths <- export_field_analysis_excel( + field_df = field_detail_df, + summary_df = NULL, # No separate summary sheet for agronomic support + project_dir = project_dir, + current_week = week, + year = year, + reports_dir = output_dir ) - write_xlsx(sheets, excel_file) - message(paste("✓ KPI data exported to:", excel_file)) - - # Export to RDS for programmatic access (CRITICAL: Both per-field AND summary tables) - # The reporting script expects: summary_tables (list of 6 summary tables) - # We also provide: all_kpis (per-field data) and field_details (unified field view) - rds_file <- file.path(output_dir, paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".rds")) - - # Create the export structure that reporting scripts expect - export_data <- list( - summary_tables = kpi_summary, # Farm-level aggregates (6 KPI summaries) - all_kpis = all_kpis, # Per-field data (6 KPI per-field tables) - field_details = field_details_table # Unified field-level detail table - ) - - saveRDS(export_data, rds_file) - message(paste("✓ KPI RDS exported to:", rds_file)) - message(" Structure: list($summary_tables, $all_kpis, $field_details)") - - # Return including field_details for orchestrator to capture - return(list( - excel = excel_file, - rds = rds_file, - field_details = field_details_table - )) + return(export_paths) } # ============================================================================ # ORCHESTRATOR FUNCTION # ============================================================================ -#' Calculate all 6 KPIs +#' Calculate all 6 AURA KPIs #' -#' Main entry point for KPI calculation. +#' Main entry point for AURA KPI calculation. #' This function orchestrates the 6 KPI calculations and returns all results. #' #' @param field_boundaries_sf SF object with field geometries @@ -714,7 +761,6 @@ export_kpi_data <- function(all_kpis, kpi_summary, project_dir, output_dir, week #' @param ci_rds_path Path to combined CI RDS file #' @param harvesting_data Data frame with harvest data (optional) #' @param output_dir Directory for KPI exports -#' @param project_dir Project name (for filename in exports) #' #' @return List with results from all 6 KPI functions #' @@ -722,11 +768,11 @@ export_kpi_data <- function(all_kpis, kpi_summary, project_dir, output_dir, week #' This function: #' 1. Loads current week mosaic and extracts field statistics #' 2. (Optionally) loads previous week mosaic for comparison metrics -#' 3. Calculates all 6 KPIs +#' 3. Calculates all 6 AURA KPIs #' 4. Creates summary tables #' 5. Exports results to Excel/RDS #' -calculate_all_kpis <- function( +calculate_all_field_analysis_agronomic_support <- function( field_boundaries_sf, current_week, current_year, @@ -735,66 +781,311 @@ calculate_all_kpis <- function( ci_rds_path = NULL, harvesting_data = NULL, output_dir = NULL, + data_dir = NULL, project_dir = NULL ) { - message("\n============ KPI CALCULATION (6 KPIs) ============") + message("\n============ AURA KPI CALCULATION (6 KPIs) ============") - # Load current week mosaic - message("Loading current week mosaic...") - current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir) + # DETECT STRUCTURE FIRST - before any use of is_per_field + week_file <- sprintf("week_%02d_%d.tif", current_week, current_year) - if (is.null(current_mosaic)) { - stop("Could not load current week mosaic") + # Safely identify immediate child directories (not including root) + # Use list.files + dir.exists filter instead of list.dirs for robustness + all_entries <- list.files(current_mosaic_dir, full.names = FALSE) + field_dirs <- all_entries[sapply( + file.path(current_mosaic_dir, all_entries), + dir.exists + )] + + is_per_field <- length(field_dirs) > 0 && + file.exists(file.path(current_mosaic_dir, field_dirs[1], week_file)) + + if (is_per_field) { + message("Detected per-field mosaic structure...") + message("Using field-by-field extraction (similar to cane supply workflow)...") + + # Use the same extraction method as cane supply + current_stats <- calculate_field_statistics( + field_boundaries_sf, + current_week, + current_year, + current_mosaic_dir, + report_date = Sys.Date() + ) + + # Extract CI pixels for each field from their individual mosaics + ci_pixels_by_field <- list() + for (i in seq_len(nrow(field_boundaries_sf))) { + field_name <- field_boundaries_sf$field[i] + field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_raster <- terra::rast(field_mosaic_path) + ci_band <- field_raster[["CI"]] + field_vect <- terra::vect(field_boundaries_sf[i, ]) + ci_pixels_by_field[[i]] <- extract_ci_values(ci_band, field_vect) + }, error = function(e) { + message(paste(" Warning: Could not extract CI for field", field_name, ":", e$message)) + ci_pixels_by_field[[i]] <- NULL + }) + } else { + ci_pixels_by_field[[i]] <- NULL + } + } + + # For uniformity calculations that need a reference raster, load first available + current_mosaic <- NULL + for (field_name in field_dirs) { + field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file) + if (file.exists(field_mosaic_path)) { + tryCatch({ + current_mosaic <- terra::rast(field_mosaic_path)[["CI"]] + break + }, error = function(e) { + next + }) + } + } + + } else { + # Single-file mosaic (original behavior) + message("Loading current week mosaic...") + current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir) + + if (is.null(current_mosaic)) { + stop("Could not load current week mosaic") + } + + message("Extracting field statistics from current mosaic...") + current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + + # Extract CI pixels for each field individually + ci_pixels_by_field <- list() + for (i in seq_len(nrow(field_boundaries_sf))) { + field_vect <- terra::vect(field_boundaries_sf[i, ]) + ci_pixels_by_field[[i]] <- extract_ci_values(current_mosaic, field_vect) + } } - # Extract field statistics - message("Extracting field statistics from current mosaic...") - current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) - ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf) - # Load previous week mosaic (if available) previous_stats <- NULL - if (!is.null(previous_mosaic_dir)) { + if (!is.null(previous_mosaic_dir) || is_per_field) { target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")...")) - previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir) - if (!is.null(previous_mosaic)) { - previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) - } else { - message("Previous week mosaic not available - skipping area change KPI") + if (is_per_field) { + # Try loading previous week from the same directory structure + prev_week_file <- sprintf("week_%02d_%d.tif", target_prev$week, target_prev$year) + prev_field_exists <- any(sapply(field_dirs, function(field) { + file.exists(file.path(current_mosaic_dir, field, prev_week_file)) + })) + + if (prev_field_exists) { + message(" Found previous week per-field mosaics, calculating statistics...") + previous_stats <- calculate_field_statistics( + field_boundaries_sf, + target_prev$week, + target_prev$year, + current_mosaic_dir, + report_date = Sys.Date() - 7 + ) + } else { + message(" Previous week mosaic not available - skipping area change KPI") + } + } else if (!is.null(previous_mosaic_dir)) { + previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir) + + if (!is.null(previous_mosaic)) { + previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + } else { + message(" Previous week mosaic not available - skipping area change KPI") + } } } # Calculate 6 KPIs message("\nCalculating KPI 1: Field Uniformity...") - uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic) + if (is_per_field) { + uniformity_kpi <- calculate_field_uniformity_kpi( + ci_pixels_by_field, + field_boundaries_sf, + ci_band = NULL, + mosaic_dir = current_mosaic_dir, + week_file = week_file + ) + } else { + uniformity_kpi <- calculate_field_uniformity_kpi( + ci_pixels_by_field, + field_boundaries_sf, + current_mosaic + ) + } message("Calculating KPI 2: Area Change...") if (!is.null(previous_stats)) { - area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats) + area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats, field_boundaries_sf) } else { area_change_kpi <- data.frame( field_idx = seq_len(nrow(field_boundaries_sf)), - mean_ci_pct_change = NA_real_, + mean_ci_abs_change = NA_real_, interpretation = rep("No previous data", nrow(field_boundaries_sf)) ) } message("Calculating KPI 3: TCH Forecasted...") - tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf) + tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf, + data_dir = data_dir, project_dir = project_dir) message("Calculating KPI 4: Growth Decline...") - growth_decline_kpi <- calculate_growth_decline_kpi( - ci_pixels_by_field # Would need historical data for real trend + + # Load historical field statistics to build weekly mean CI time series per field + # (growth_decline_kpi expects temporal series, not spatial pixel arrays) + weekly_mean_ci_by_field <- list() + + # Build list of weekly mean CI values for each field (4-week lookback) + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + weekly_ci_values <- c() + } + + # Try to load historical data for trend calculation + if (!is.null(output_dir) && !is.null(project_dir)) { + tryCatch({ + historical_data <- load_historical_field_data( + project_dir = project_dir, + current_week = current_week, + current_year = current_year, + reports_dir = output_dir, + num_weeks = 4, + auto_generate = FALSE, + field_boundaries_sf = field_boundaries_sf + ) + + if (!is.null(historical_data) && length(historical_data) > 0) { + message(" Building weekly mean CI time series from historical data...") + + # Initialize list with empty vectors for each field + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + weekly_mean_ci_by_field[[field_idx]] <- c() + } + + # Extract Mean_CI from each historical week (reverse order to go chronologically) + for (hist_idx in rev(seq_along(historical_data))) { + hist_week <- historical_data[[hist_idx]] + hist_data <- hist_week$data + + # Extract Mean_CI column if available + if ("Mean_CI" %in% names(hist_data)) { + # Match fields between historical data and field_boundaries + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + field_name <- field_boundaries_sf$field[field_idx] + + # Find matching row in historical data by field name/ID + field_row <- which( + (hist_data$Field_id == field_name | hist_data$Field_name == field_name) & + !is.na(hist_data$Mean_CI) + ) + + if (length(field_row) > 0) { + mean_ci_val <- as.numeric(hist_data$Mean_CI[field_row[1]]) + if (!is.na(mean_ci_val)) { + weekly_mean_ci_by_field[[field_idx]] <- c(weekly_mean_ci_by_field[[field_idx]], mean_ci_val) + } + } + } + } + } + + message(paste(" ✓ Loaded weekly Mean_CI for", sum(sapply(weekly_mean_ci_by_field, length) > 0), "fields")) + } + }, error = function(e) { + message(paste(" Note: Could not load historical field data for trend analysis:", e$message)) + }) + } + + # If no historical data available, create empty vectors (will result in "Insufficient data") + if (length(weekly_mean_ci_by_field) == 0 || all(sapply(weekly_mean_ci_by_field, length) == 0)) { + message(" Warning: No historical weekly CI data available - using current week only") + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + # Use current week mean CI as single-point series (insufficient for trend) + if (!is.null(current_stats) && nrow(current_stats) > 0) { + field_name <- field_boundaries_sf$field[field_idx] + matching_row <- which( + (current_stats$Field_id == field_name | current_stats$Field_name == field_name) & + !is.na(current_stats$Mean_CI) + ) + if (length(matching_row) > 0) { + weekly_mean_ci_by_field[[field_idx]] <- c(as.numeric(current_stats$Mean_CI[matching_row[1]])) + } else { + weekly_mean_ci_by_field[[field_idx]] <- NA_real_ + } + } else { + weekly_mean_ci_by_field[[field_idx]] <- NA_real_ + } + } + } + + # Calculate growth decline using weekly time series (not spatial pixel arrays) + growth_decline_kpi <- calculate_growth_decline_kpi(weekly_mean_ci_by_field) + + message("Calculating KPI 5: Field Patchiness...") + # Calculate patchiness using both Gini coefficient and Moran's I spatial clustering + patchiness_kpi <- calculate_patchiness_kpi( + ci_pixels_by_field, + field_boundaries_sf = field_boundaries_sf, + mosaic_dir = current_mosaic_dir, + week_file = week_file, + mean_ci_values = current_stats$Mean_CI ) - message("Calculating KPI 5: Weed Presence...") - weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) - message("Calculating KPI 6: Gap Filling...") - gap_filling_kpi <- calculate_gap_filling_kpi(current_mosaic, field_boundaries_sf) + # Build list of per-field files for this week + per_field_files <- c() + for (field_name in field_dirs) { + field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file) + if (file.exists(field_mosaic_path)) { + per_field_files <- c(per_field_files, field_mosaic_path) + } + } + + if (length(per_field_files) > 0) { + # Use the common wrapper function (same as cane supply) + gap_scores_result <- calculate_gap_scores(per_field_files, field_boundaries_sf) + + # Guard against NULL or empty result from calculate_gap_scores + if (is.null(gap_scores_result) || nrow(gap_scores_result) == 0) { + message(" Warning: calculate_gap_scores returned NULL/empty - creating fallback") + gap_scores_result <- data.frame( + Field_id = field_boundaries_sf$field, + gap_score = NA_real_, + stringsAsFactors = FALSE + ) + } + + # Convert to the format expected by orchestrator + gap_filling_kpi <- gap_scores_result %>% + mutate(field_idx = match(Field_id, field_boundaries_sf$field)) %>% + select(field_idx, gap_score) %>% + mutate( + gap_level = dplyr::case_when( + gap_score < 10 ~ "Minimal", + gap_score < 25 ~ "Moderate", + TRUE ~ "Significant" + ), + mean_ci = NA_real_, + outlier_threshold = NA_real_ + ) + } else { + # Fallback: no per-field files + gap_filling_kpi <- data.frame( + field_idx = seq_len(nrow(field_boundaries_sf)), + gap_score = NA_real_, + gap_level = NA_character_, + mean_ci = NA_real_, + outlier_threshold = NA_real_ + ) + } # Compile results all_kpis <- list( @@ -802,26 +1093,62 @@ calculate_all_kpis <- function( area_change = area_change_kpi, tch_forecasted = tch_kpi, growth_decline = growth_decline_kpi, - weed_presence = weed_kpi, + patchiness = patchiness_kpi, gap_filling = gap_filling_kpi ) + # Deduplicate KPI dataframes to ensure one row per field_idx + # (sometimes joins or calculations can create duplicate rows) + message("Deduplicating KPI results (keeping first occurrence per field)...") + all_kpis$uniformity <- all_kpis$uniformity %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$area_change <- all_kpis$area_change %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$tch_forecasted <- all_kpis$tch_forecasted %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$growth_decline <- all_kpis$growth_decline %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$patchiness <- all_kpis$patchiness %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$gap_filling <- all_kpis$gap_filling %>% + distinct(field_idx, .keep_all = TRUE) + + # Built single-sheet field detail table with all KPIs + message("\nBuilding comprehensive field detail table...") + field_detail_df <- create_field_detail_table( + field_boundaries_sf = field_boundaries_sf, + all_kpis = all_kpis, + current_week = current_week, + current_year = current_year, + current_stats = current_stats + ) + # Create summary tables + message("\nCreating summary tables...") kpi_summary <- create_summary_tables(all_kpis) - # Export - pass project_dir for proper filename and field_boundaries_sf for field details table - if (is.null(project_dir)) { - project_dir <- "AURA" # Fallback if not provided - } - export_result <- export_kpi_data(all_kpis, kpi_summary, project_dir, output_dir, current_week, current_year, field_boundaries_sf) + # Export + message("\nExporting KPI data (single-sheet format)...") + export_paths <- export_kpi_data( + field_detail_df = field_detail_df, + kpi_summary = kpi_summary, + output_dir = output_dir, + week = current_week, + year = current_year, + project_dir = project_dir + ) - message(paste("\n✓", project_dir, "KPI calculation complete. Week", current_week, current_year, "\n")) + message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year)) - # Return combined structure (for integration with 80_calculate_kpis.R) - # Capture field_details from export_result to propagate it out return(list( - all_kpis = all_kpis, - summary_tables = kpi_summary, - field_details = export_result$field_details # Propagate field_details from export_kpi_data + field_analysis_df = field_detail_df, + kpis = all_kpis, + summary_tables = kpi_summary, + export_paths = export_paths, + metadata = list( + week = current_week, + year = current_year, + project = project_dir + ) )) } diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index df6e319..7c75958 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -27,173 +27,685 @@ library(tidyr) library(readxl) library(writexl) +# ============================================================================ +# ALERT THRESHOLDS & CONFIGURATION CONSTANTS +# ============================================================================ + +# CI change thresholds for alert categorization and status determination +# These values are project-standard and should be consistent across all workflows +CI_CHANGE_RAPID_GROWTH_THRESHOLD <- 0.5 # Weekly CI change for positive growth alert +CI_CHANGE_POSITIVE_GROWTH_THRESHOLD <- 0.2 # Weekly CI change for acceptable growth +CI_CHANGE_STABLE_THRESHOLD <- -0.2 # Weekly CI change for stable status (between -0.2 and +0.2) +CI_CHANGE_STRESS_TREND_THRESHOLD <- -0.3 # 4-week trend threshold for stress detection +CI_CHANGE_RAPID_DECLINE_THRESHOLD <- -0.5 # Weekly CI change for rapid decline alert +# Deprecated aliases (for backward compatibility if needed): +CI_CHANGE_DECLINE_THRESHOLD <- CI_CHANGE_RAPID_DECLINE_THRESHOLD # Weekly CI change threshold for decline alerts +CI_CHANGE_INCREASE_THRESHOLD <- CI_CHANGE_RAPID_GROWTH_THRESHOLD # Weekly CI change threshold for increase alerts + # ============================================================================ # ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section) # ============================================================================ -#' Placeholder: ANGATA harvest readiness assessment +#' Calculate acreage for each field from geometry +#' @param field_boundaries_sf sf object with field geometries +#' @return data.frame with field and acreage columns +calculate_field_acreages <- function(field_boundaries_sf) { + tryCatch({ + # Project to equal-area CRS (EPSG:6933) for accurate area calculations + field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933") + + lookup_df <- field_boundaries_proj %>% + sf::st_drop_geometry() %>% + as.data.frame() %>% + mutate( + geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) { + tryCatch({ + sf::st_is_valid(field_boundaries_proj[idx, ]) + }, error = function(e) FALSE) + }), + area_ha = 0 + ) + + # Calculate area for valid geometries + valid_indices <- which(lookup_df$geometry_valid) + areas_ha <- vapply(valid_indices, function(idx) { + tryCatch({ + area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ])) + area_m2 / 10000 + }, error = function(e) NA_real_) + }, numeric(1)) + lookup_df$area_ha[valid_indices] <- areas_ha + + # Convert hectares to acres + lookup_df %>% + mutate(acreage = area_ha / 0.404686) %>% + # Aggregate by field to handle multi-row fields (e.g., sub_fields) + group_by(field) %>% + summarise(acreage = sum(acreage, na.rm = TRUE), .groups = "drop") %>% + select(field, acreage) + }, error = function(e) { + message(paste("Warning: Could not calculate acreages from geometries -", e$message)) + data.frame(field = character(0), acreage = numeric(0)) + }) +} + +#' Calculate age in weeks from planting date #' -#' Future implementation will integrate ANGATA-specific harvest readiness criteria: -#' - Maturation phase detection (CI threshold-based) -#' - Field age tracking (days since planting) -#' - Weather-based ripeness indicators (if available) -#' - Historical yield correlations -#' -#' @param field_ci CI values for the field -#' @param field_age_days Days since planting -#' -#' @return Character string with harvest readiness assessment -assess_harvest_readiness <- function(field_ci, field_age_days = NULL) { - # Placeholder implementation - # Real version would check: - # - Mean CI > 3.5 (maturation threshold) - # - Age > 350 days - # - Weekly growth rate < threshold (mature plateau) - - if (is.null(field_ci) || all(is.na(field_ci))) { - return("No data available") +#' @param planting_date Date of planting +#' @param reference_date Date to calculate age relative to (typically end_date) +#' @return Numeric age in weeks (rounded to nearest week) +calculate_age_week <- function(planting_date, reference_date) { + if (is.na(planting_date)) { + return(NA_real_) } - - mean_ci <- mean(field_ci, na.rm = TRUE) - - if (mean_ci > 3.5) { - return("Ready for harvest") - } else if (mean_ci > 2.5) { - return("Approaching harvest readiness") + round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0) +} + +#' Assign crop phase based on age in weeks +#' +#' Determines crop phase from age in weeks using canonical PHASE_DEFINITIONS +#' from 80_utils_common.R for consistency across all workflows. +#' +#' @param age_week Numeric age in weeks +#' @return Character phase name (from PHASE_DEFINITIONS or "Unknown") +#' +#' @details +#' Uses the shared PHASE_DEFINITIONS to ensure identical phase boundaries +#' across all scripts. This wrapper delegates to get_phase_by_age() which +#' is the authoritative phase lookup function. +#' +#' Phase boundaries (from PHASE_DEFINITIONS): +#' - Germination: 0-6 weeks +#' - Tillering: 4-16 weeks +#' - Grand Growth: 17-39 weeks +#' - Maturation: 39+ weeks +calculate_phase <- function(age_week) { + # Delegate to canonical get_phase_by_age() from 80_utils_common.R + # This ensures all phase boundaries are consistent across workflows + get_phase_by_age(age_week) +} + +#' Bin percentage into 10% intervals with special handling for 90-100% +#' +#' @param pct Numeric percentage value (0-100) +#' @return Character bin label +bin_percentage <- function(pct) { + if (is.na(pct)) return(NA_character_) + if (pct >= 95) return("95-100%") + else if (pct >= 90) return("90-95%") + else if (pct >= 80) return("80-90%") + else if (pct >= 70) return("70-80%") + else if (pct >= 60) return("60-70%") + else if (pct >= 50) return("50-60%") + else if (pct >= 40) return("40-50%") + else if (pct >= 30) return("30-40%") + else if (pct >= 20) return("20-30%") + else if (pct >= 10) return("10-20%") + else return("0-10%") +} + +#' Calculate germination progress from CI threshold percentage +#' +#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2 +#' @return Character bin label +calculate_germination_progress <- function(pct_pixels_ci_gte_2) { + bin_percentage(pct_pixels_ci_gte_2) +} + +#' Categorize CV trend (long-term slope) into qualitative labels +#' +#' @param cv_slope Numeric slope from CV trend analysis +#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform" +categorize_cv_trend_long_term <- function(cv_slope) { + if (is.na(cv_slope)) { + return(NA_character_) + } else if (cv_slope < -0.01) { + return("More uniform") + } else if (cv_slope > 0.01) { + return("Less uniform") } else { - return("Not ready - continue monitoring") + return("Stable uniformity") } } -#' Placeholder: ANGATA supply chain status flags +#' Determine status alert for CANE_SUPPLY client (harvest/milling workflow) +#' +#' Alerts focus on: harvest readiness, crop health monitoring, exception detection +#' Uses WEEKLY trends (Four_week_trend) not daily noise for consistency +#' Designed for harvest/milling clients who manage expectation, not daily operations #' -#' Future implementation will add supply chain-specific status indicators: -#' - Harvest scheduling readiness -#' - Equipment availability impact -#' - Transportation/logistics flags -#' - Quality parameter flags +#' Priority order: +#' 1. harvest_ready → Schedule harvest operations +#' 2. harvested_bare → Record completion / detect unexpected harvest +#' 3. stress_detected → Monitor crop health (consistent decline) +#' 4. germination_delayed → Early warning for young fields +#' 5. growth_on_track → Positive signal (no action needed) +#' 6. NA → Normal growth (no alert) #' -#' @param field_analysis Data frame with field analysis results -#' -#' @return Data frame with supply chain status columns -assess_supply_chain_status <- function(field_analysis) { - # Placeholder: return field analysis as-is - # Real version would add columns for: - # - schedule_ready (bool) - # - harvest_window_days (numeric) - # - transportation_priority (char) - # - quality_flags (char) +#' @param imminent_prob Numeric harvest probability (0-1) +#' @param age_week Numeric age in weeks since planting/harvest +#' @param mean_ci Numeric mean Chlorophyll Index +#' @param four_week_trend Numeric 4-week trend in CI (slope of growth) +#' @param weekly_ci_change Numeric week-over-week CI change +#' @param cv Numeric coefficient of variation (field uniformity) +#' @return Character status alert code or NA +calculate_status_alert <- function(imminent_prob, age_week, mean_ci, + four_week_trend, weekly_ci_change, cv) { - return(field_analysis) + # Priority 1: HARVEST READY - highest business priority + # Field is mature (≥12 months) AND harvest model predicts imminent harvest + if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) { + return("harvest_ready") + } + + # Priority 2: HARVESTED/BARE - indicator of completion (or unexpected harvest) + # Mean CI dropped below vegetative threshold + if (!is.na(mean_ci) && mean_ci < 1.5) { + return("harvested_bare") + } + + # Priority 3: STRESS DETECTED - consistent health decline (weekly trend) + # Uses Four_week_trend (smooth trend) NOT daily fluctuation to avoid noise + # Crop declining but not yet bare → opportunity to investigate + # References: CI_CHANGE_STRESS_TREND_THRESHOLD for 4-week trend detection + if (!is.na(four_week_trend) && four_week_trend < CI_CHANGE_STRESS_TREND_THRESHOLD && + !is.na(mean_ci) && mean_ci > 1.5) { + return("stress_detected") + } + + # Priority 4: GERMINATION DELAYED - early warning for young fields + # Age 4-8 weeks is typical germination window; low CI = slow start + if (!is.na(age_week) && age_week >= 4 && age_week <= 8 && + !is.na(mean_ci) && mean_ci < 1.5) { + return("germination_delayed") + } + + # Priority 5: GROWTH ON TRACK - positive operational status + # Field is healthy, uniform, and growing steadily (no action needed) + # Conditions: good uniformity (CV < 0.15) AND stable/positive weekly trend + # References: CI_CHANGE_STABLE_THRESHOLD (±0.2 = stable, no change) + if (!is.na(cv) && cv < 0.15 && + !is.na(four_week_trend) && four_week_trend >= CI_CHANGE_STABLE_THRESHOLD && + !is.na(weekly_ci_change) && weekly_ci_change >= CI_CHANGE_STABLE_THRESHOLD) { + return("growth_on_track") + } + + # Default: NORMAL GROWTH (no specific alert) + # Field is growing but may have minor variability; continues normal monitoring + NA_character_ } +#' Calculate yield prediction for CANE_SUPPLY workflows (Wrapper) +#' +#' This function wraps the shared yield prediction model from 80_utils_common.R +#' to provide CANE_SUPPLY clients (e.g., ANGATA) with ML-based yield forecasting. +#' +#' Uses Random Forest with Forward Feature Selection trained on: +#' - Cumulative Canopy Index (CI) from growth model +#' - Days After Harvest (DAH) / crop age +#' - CI-per-day (growth velocity) +#' +#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles: +#' - Top 25%: High-yield fields +#' - Average: Mid-range yield fields +#' - Lowest 25%: Lower-yield fields +#' +#' @param field_boundaries_sf SF object with field geometries +#' @param harvesting_data Data frame with harvest history (must have tonnage_ha column) +#' @param cumulative_CI_vals_dir Directory with combined CI RDS files +#' +#' @return List with: +#' - summary: Data frame with field_groups, count, and yield quartile predictions +#' - field_results: Data frame with field-level forecasts (yield_forecast_t_ha in t/ha) +#' +#' @details +#' **Data Requirements:** +#' - harvesting_data must include tonnage_ha column (yield in t/ha) for training +#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds" +#' - If either is missing, returns graceful fallback with NA values (not fake numbers) +#' +#' **Integration:** +#' This can be called from calculate_all_field_kpis() in cane_supply workflow to add +#' a new "Yield_Forecast" column to the 22-column KPI output. +#' +#' **Example:** +#' ```r +#' yield_result <- calculate_yield_prediction_kpi_cane_supply( +#' field_boundaries_sf, +#' harvesting_data, +#' file.path(data_dir, "combined_CI") +#' ) +#' # yield_result$summary has quartiles +#' # yield_result$field_results has per-field forecasts +#' ``` +calculate_yield_prediction_kpi_cane_supply <- function(field_boundaries_sf, + harvesting_data, + cumulative_CI_vals_dir) { + + # Call the shared yield prediction function from 80_utils_common.R + result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) + + return(result) +} + + +#' Build complete per-field KPI dataframe with all 22 columns +#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats +#' @param planting_dates data.frame with field_id and planting_date columns +#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL) +#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL) +#' @param field_boundaries_sf sf object with field geometries +#' @param end_date Date object for current report date +#' @return data.frame with all 22 KPI columns +calculate_all_field_kpis <- function(current_stats, + planting_dates, + imminent_prob_data, + gap_scores_df, + field_boundaries_sf, + end_date) { + + message("\nBuilding final field analysis output...") + + # Pre-calculate acreages + acreage_lookup <- calculate_field_acreages(field_boundaries_sf) + + field_analysis_df <- current_stats %>% + mutate( + # Column 2: Farm_Section (user fills manually) + Farm_Section = NA_character_, + + # Column 3: Field_name (from GeoJSON) + Field_name = Field_id, + + # Column 4: Acreage (from geometry) + Acreage = { + acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] + if_else(is.na(acreages_vec), 0, acreages_vec) + }, + + # Column 8: Last_harvest_or_planting_date (from harvest.xlsx) + Last_harvest_or_planting_date = { + planting_dates$planting_date[match(Field_id, planting_dates$field_id)] + }, + + # Column 9: Age_week (calculated) + Age_week = { + sapply(seq_len(nrow(current_stats)), function(idx) { + calculate_age_week(Last_harvest_or_planting_date[idx], end_date) + }) + }, + + # Column 10: Phase (based on Age_week) + Phase = sapply(Age_week, calculate_phase), + + # Column 12: Germination_progress (binned Pct_pixels_CI_gte_2) + Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress), + + # Column 13: Imminent_prob (from script 31 or NA) + Imminent_prob = { + if (!is.null(imminent_prob_data)) { + as.numeric(imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]) + } else { + rep(NA_real_, nrow(current_stats)) + } + }, + + # Column 14: Status_Alert (multi-priority logic for harvest/milling workflow) + Status_Alert = { + sapply(seq_len(nrow(current_stats)), function(idx) { + calculate_status_alert( + imminent_prob = Imminent_prob[idx], + age_week = Age_week[idx], + mean_ci = Mean_CI[idx], + four_week_trend = Four_week_trend[idx], + weekly_ci_change = Weekly_ci_change[idx], + cv = CV[idx] + ) + }) + }, + + # Column 19b: CV_Trend_Long_Term_Category (categorical slope) + CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term), + + # Column 21: Cloud_pct_clear (binned into intervals) + Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage), + + # Column 22: Gap_score (2σ method) + Gap_score = { + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] + } else { + rep(NA_real_, nrow(current_stats)) + } + } + ) %>% + select( + all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", + "Last_harvest_or_planting_date", "Age_week", "Phase", + "Germination_progress", + "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", + "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", + "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) + ) + + message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns")) + + return(field_analysis_df) +} + +#' Aggregate per-field data into farm-level KPI summary +#' +#' @param field_analysis_df data.frame with per-field KPI data +#' @param current_week Numeric current week number +#' @param current_year Numeric current year +#' @param end_date Date object for current report date +#' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats +calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) { + + cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") + + # Filter to only fields with actual data + field_data <- field_analysis_df %>% + filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% + filter(Acreage > 0) + + if (nrow(field_data) == 0) { + message("No valid field data for farm-level aggregation") + return(NULL) + } + + farm_summary <- list() + + # 1. PHASE DISTRIBUTION + phase_dist <- field_data %>% + group_by(Phase) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Phase) + + farm_summary$phase_distribution <- phase_dist + + # 2. STATUS ALERT DISTRIBUTION + status_dist <- field_data %>% + group_by(Status_Alert) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Status_Alert) + + farm_summary$status_distribution <- status_dist + + # 3. CLOUD COVERAGE DISTRIBUTION + cloud_dist <- field_data %>% + group_by(Cloud_category) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Cloud_category) + + farm_summary$cloud_distribution <- cloud_dist + + # 4. OVERALL STATISTICS + farm_summary$overall_stats <- data.frame( + total_fields = nrow(field_data), + total_acreage = sum(field_data$Acreage, na.rm = TRUE), + mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), + median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), + mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4), + week = current_week, + year = current_year, + date = as.character(end_date) + ) + + # Print summaries + cat("\n--- PHASE DISTRIBUTION ---\n") + print(phase_dist) + + cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") + print(status_dist) + + cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") + print(cloud_dist) + + cat("\n--- OVERALL FARM STATISTICS ---\n") + print(farm_summary$overall_stats) + + return(farm_summary) +} + + # ============================================================================ # ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS # ============================================================================ -#' Orchestrate ANGATA weekly field analysis and reporting +#' Main orchestrator for CANE_SUPPLY per-field KPI workflow #' -#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows. -#' Currently uses common utilities; future versions will add client-specific logic. +#' This function coordinates all KPI calculations for the per-field analysis workflow. +#' It loads historical data, calculates current/previous week statistics, computes +#' all 22 KPI columns, and aggregates farm-level summaries. #' -#' @param field_boundaries_sf SF object with field geometries -#' @param current_week ISO week number (1-53) -#' @param current_year ISO week year -#' @param mosaic_dir Directory containing weekly mosaics -#' @param field_boundaries_path Path to field GeoJSON -#' @param harvesting_data Data frame with harvest data (optional) -#' @param output_dir Directory for exports -#' @param data_dir Base data directory -#' -#' @return List with field analysis results -#' -#' @details -#' This function: -#' 1. Loads weekly mosaic and extracts field statistics -#' 2. Calculates field statistics (using common utilities) -#' 3. Prepares field analysis summary -#' 4. Exports to Excel/CSV/RDS -#' 5. (Future) Applies ANGATA-specific assessments -#' -calculate_field_analysis_cane_supply <- function( - field_boundaries_sf, - current_week, - current_year, - mosaic_dir, - field_boundaries_path = NULL, - harvesting_data = NULL, - output_dir = file.path(PROJECT_DIR, "output"), - data_dir = NULL -) { +#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.) +#' @param client_config List with workflow configuration (script_91_compatible, outputs) +#' @param end_date Date object for current report date +#' @param project_dir Character project identifier +#' @param weekly_mosaic Character path to weekly mosaic directory +#' @param daily_vals_dir Character path to daily values directory +#' @param field_boundaries_sf sf object with field geometries +#' @param data_dir Character path to data directory +#' @return List with field_analysis_df, farm_kpi_results, export_paths +calculate_field_analysis_cane_supply <- function(setup, + client_config, + end_date, + project_dir, + weekly_mosaic, + daily_vals_dir, + field_boundaries_sf, + data_dir) { - message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============") + message("\n", strrep("=", 70)) + message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") + message(strrep("=", 70)) - # Load current week mosaic - message("Loading current week mosaic...") - current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year) + reports_dir <- file.path(setup$reports_dir, "kpis") - if (is.null(current_mosaic)) { - warning(paste("Could not load current week mosaic for week", current_week, current_year)) - return(NULL) + # ========== PHASE 1: WEEKLY ANALYSIS SETUP ========== + message("\n", strrep("-", 70)) + message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") + message(strrep("-", 70)) + + weeks <- calculate_week_numbers(end_date) + current_week <- weeks$current_week + current_year <- weeks$current_year + previous_week <- weeks$previous_week + previous_year <- weeks$previous_year + + message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year)) + + # Find per-field weekly mosaics + message("Finding per-field weekly mosaics...") + + if (!dir.exists(weekly_mosaic)) { + stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, + "\nScript 40 (mosaic creation) must be run first.")) } - # Extract field statistics - message("Extracting field statistics from current mosaic...") - field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] - # Load previous week stats for comparison - message("Loading historical data for trends...") - target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) - previous_stats <- NULL - - previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year) - if (!is.null(previous_mosaic)) { - previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + if (length(field_dirs) == 0) { + stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, + "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) } - # Calculate 4-week historical trend - message("Calculating field trends...") - ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds") + # Verify we have mosaics for this week + single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) + per_field_files <- c() + for (field in field_dirs) { + field_mosaic_dir <- file.path(weekly_mosaic, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files <- c(per_field_files, files) + } + } - field_analysis <- calculate_field_statistics( - field_stats = field_stats, - previous_stats = previous_stats, + if (length(per_field_files) == 0) { + stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year, + "\nExpected pattern:", single_file_pattern, + "\nChecked:", weekly_mosaic)) + } + + message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) + + # ========== PHASE 2: LOAD HISTORICAL DATA ========== + message("\nLoading historical field data for trend calculations...") + num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) + message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data...")) + + allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) + + historical_data <- load_historical_field_data( + project_dir, current_week, current_year, reports_dir, + num_weeks = num_weeks_to_load, + auto_generate = allow_auto_gen, + field_boundaries_sf = field_boundaries_sf, + daily_vals_dir = daily_vals_dir + ) + + # ========== PHASE 3: LOAD PLANTING DATES ========== + message("\nLoading harvest data from harvest.xlsx for planting dates...") + # Use load_harvest_data() from 80_utils_common.R for consistency with 80_calculate_kpis.R + harvesting_data <- load_harvest_data(data_dir) + + planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) + + if (is.null(planting_dates) || nrow(planting_dates) == 0) { + message("WARNING: No planting dates available. Using NA for all fields.") + planting_dates <- data.frame( + field_id = field_boundaries_sf$field, + planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), + stringsAsFactors = FALSE + ) + } + + # ========== PHASE 4: CALCULATE WEEKLY STATISTICS ========== + message("\nUsing modular RDS-based approach for weekly statistics...") + + # Current week + message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...") + current_stats <- load_or_calculate_weekly_stats( week_num = current_week, year = current_year, - ci_rds_path = ci_rds_path, + project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, - harvesting_data = harvesting_data + mosaic_dir = weekly_mosaic, + reports_dir = reports_dir, + report_date = end_date + ) + message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week")) + + # Previous week + message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") + prev_report_date <- end_date - 7 + + prev_stats <- load_or_calculate_weekly_stats( + week_num = previous_week, + year = previous_year, + project_dir = project_dir, + field_boundaries_sf = field_boundaries_sf, + mosaic_dir = weekly_mosaic, + reports_dir = reports_dir, + report_date = prev_report_date + ) + message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) + + # ========== PHASE 5: CALCULATE TRENDS ========== + message("\n3. Calculating trend columns...") + current_stats <- calculate_kpi_trends( + current_stats, prev_stats, + project_dir = project_dir, + reports_dir = reports_dir, + current_week = current_week, + year = current_year + ) + message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) + + # ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== + message("\n4. Loading harvest probabilities from script 31...") + # Use get_harvest_output_path() to safely build path (stored in kpi_reports_dir) + harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year) + message(paste(" Looking for:", harvest_prob_file)) + + imminent_prob_data <- tryCatch({ + if (file.exists(harvest_prob_file)) { + prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE, + col_types = readr::cols(.default = readr::col_character())) + message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) + prob_df %>% + select(field, imminent_prob, detected_prob) %>% + rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) + } else { + message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) + NULL + } + }, error = function(e) { + message(paste(" WARNING: Could not load harvest probabilities:", e$message)) + NULL + }) + + # ========== PHASE 7: CALCULATE GAP SCORES ========== + gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf) + + # ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ========== + field_analysis_df <- calculate_all_field_kpis( + current_stats = current_stats, + planting_dates = planting_dates, + imminent_prob_data = imminent_prob_data, + gap_scores_df = gap_scores_df, + field_boundaries_sf = field_boundaries_sf, + end_date = end_date ) - if (is.null(field_analysis)) { - message("Could not generate field analysis") - return(NULL) - } - - # Generate summary - message("Generating summary statistics...") - summary_df <- generate_field_analysis_summary(field_analysis) - - # Export - message("Exporting field analysis...") + # ========== PHASE 9: EXPORT PER-FIELD RESULTS ========== export_paths <- export_field_analysis_excel( - field_analysis, - summary_df, - PROJECT_DIR, + field_analysis_df, + NULL, + project_dir, current_week, current_year, - output_dir + reports_dir ) - message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n")) + # cat("\n--- Per-field Results (first 10) ---\n") + # available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") + # available_cols <- available_cols[available_cols %in% names(field_analysis_df)] + # if (length(available_cols) > 0) { + # print(head(field_analysis_df[, available_cols], 10)) + # } - result <- list( - field_analysis = field_analysis, - summary = summary_df, - exports = export_paths - ) + # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ========== + # farm_kpi_results <- calculate_farm_level_kpis( + # field_analysis_df, + # current_week, + # current_year, + # end_date + # ) - return(result) + # For now, farm-level KPIs are not implemented in CANE_SUPPLY workflow + farm_kpi_results <- NULL + + # ========== RETURN RESULTS ========== + return(list( + field_analysis_df = field_analysis_df, + farm_kpi_results = farm_kpi_results, + export_paths = export_paths + )) } # ============================================================================ diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 3c0f75f..071b025 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -8,10 +8,34 @@ # - Field statistics extraction # - Week/year calculations for consistent date handling # - Excel/CSV/RDS export utilities +# - Yield prediction using ML models (Random Forest with Feature Selection) # # Used by: 80_calculate_kpis.R, all client-specific utils files +# +# NOTE: Libraries required by yield prediction (caret, CAST, here) are loaded +# in the main script 80_calculate_kpis.R, not here. This keeps dependencies +# centralized in the orchestrator script. # ============================================================================ +# ============================================================================ +# LOAD PROJECT CONFIGURATION (Guard against re-sourcing) +# ============================================================================ +# Ensure parameters_project.R has been sourced to provide global configuration +# (PROJECT, data_dir, field_boundaries_path, etc.). Use a sentinel to avoid double-sourcing. +if (!exists("PROJECT", envir = .GlobalEnv)) { + tryCatch({ + source(here::here("r_app", "parameters_project.R")) + }, error = function(e) { + # Fallback: try relative path if here() doesn't work + tryCatch({ + source("parameters_project.R") + }, error = function(e2) { + warning(paste("Could not source parameters_project.R:", e2$message, + "- using defaults or expecting caller to set PROJECT/data_dir")) + }) + }) +} + # ============================================================================ # CONSTANTS (from 80_calculate_kpis.R) # ============================================================================ @@ -355,12 +379,11 @@ calculate_cv_trend_long_term <- function(cv_values) { } #' Calculate Gap Filling Score KPI (2σ method) -#' @param ci_raster Current week CI raster -#' @param field_boundaries Field boundaries -#' @return Data frame with field-level gap filling scores +#' @param ci_raster Current week CI raster (single band) +#' @param field_boundaries Field boundaries (sf or SpatVector) +#' @return List with summary data frame and field-level results data frame calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { - safe_log("Calculating Gap Filling Score KPI (placeholder)") - + # Handle both sf and SpatVector inputs if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) @@ -368,19 +391,11 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { field_boundaries_vect <- field_boundaries } - # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions - n_fields_vect <- length(field_boundaries_vect) - n_fields_sf <- nrow(field_boundaries) - - if (n_fields_sf != n_fields_vect) { - warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) - } - field_results <- data.frame() for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] + field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_ + sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_ field_vect <- field_boundaries_vect[i] # Extract CI values using helper function @@ -391,16 +406,17 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { # Gap score using 2σ below median to detect outliers median_ci <- median(valid_values) sd_ci <- sd(valid_values) - outlier_threshold <- median_ci - (2 * sd_ci) + outlier_threshold <- median_ci - (1 * sd_ci) low_ci_pixels <- sum(valid_values < outlier_threshold) total_pixels <- length(valid_values) - gap_score <- (low_ci_pixels / total_pixels) * 100 + gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) # Classify gap severity gap_level <- dplyr::case_when( gap_score < 10 ~ "Minimal", gap_score < 25 ~ "Moderate", - TRUE ~ "Significant" + gap_score >= 25 ~ "Significant", + TRUE ~ NA_character_ ) field_results <- rbind(field_results, data.frame( @@ -412,7 +428,6 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { outlier_threshold = outlier_threshold )) } else { - # Not enough valid data, fill with NA row field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, @@ -423,9 +438,99 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { )) } } + + # Summarize results + gap_summary <- field_results %>% + dplyr::group_by(gap_level) %>% + dplyr::summarise(field_count = n(), .groups = 'drop') %>% + dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) + + return(list(summary = gap_summary, field_results = field_results)) } - +#' Calculate gap filling scores for all per-field mosaics (wrapper) +#' +#' This wrapper handles per-field mosaic structure by iterating over +#' individual field files and calling the basic KPI function +#' +#' @param per_field_files Character vector of paths to per-field mosaic TIFFs +#' @param field_boundaries_sf sf object with field geometries +#' @return data.frame with Field_id and gap_score columns +calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { + message("\nCalculating gap filling scores (2σ method)...") + message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) + + field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) + + process_gap_for_field <- function(field_file) { + field_id <- basename(dirname(field_file)) + field_bounds <- field_boundaries_by_id[[field_id]] + + if (is.null(field_bounds) || nrow(field_bounds) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + tryCatch({ + field_raster <- terra::rast(field_file) + ci_band_name <- "CI" + if (!(ci_band_name %in% names(field_raster))) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + field_ci_band <- field_raster[[ci_band_name]] + names(field_ci_band) <- "CI" + + gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) + + if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + gap_scores <- gap_result$field_results + gap_scores$Field_id <- gap_scores$field + gap_scores <- gap_scores[, c("Field_id", "gap_score")] + + stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) + }, error = function(e) { + message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) + data.frame(Field_id = field_id, gap_score = NA_real_) + }) + } + + # Process fields sequentially with progress bar + message(" Processing gap scores for ", length(per_field_files), " fields...") + pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) + + results_list <- lapply(seq_along(per_field_files), function(idx) { + result <- process_gap_for_field(per_field_files[[idx]]) + utils::setTxtProgressBar(pb, idx) + result + }) + close(pb) + + gap_scores_df <- dplyr::bind_rows(results_list) + + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + gap_scores_df <- gap_scores_df %>% + dplyr::group_by(Field_id) %>% + dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") + + message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) + + # Guard against all-NA values which would produce Inf/-Inf warnings + if (any(is.finite(gap_scores_df$gap_score))) { + min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2) + max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2) + message(paste(" Gap score range:", min_score, "-", max_score, "%")) + } else { + message(" Gap score range: All values are NA (no valid gap scores)") + } + } else { + message(" WARNING: No gap scores calculated from per-field mosaics") + gap_scores_df <- NULL + } + + return(gap_scores_df) +} # ============================================================================ # HELPER FUNCTIONS @@ -517,6 +622,88 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) }) } +# ============================================================================ +# DATA LOADING HELPERS +# ============================================================================ + +#' Load and validate harvest data from harvest.xlsx +#' +#' Encapsulates harvest data loading with validation, type coercion, and error handling. +#' Returns a data frame with required columns (field, year, tonnage_ha) or an empty +#' data frame with proper structure if loading fails. +#' +#' @param data_dir Path to data directory containing harvest.xlsx +#' +#' @return data.frame with columns: field (character), year (numeric), tonnage_ha (numeric) +#' - On success: data frame with N rows of harvest records +#' - On failure: empty data frame with correct structure (0 rows, 3 columns) +#' +#' @details +#' **File Location**: Expected at `file.path(data_dir, "harvest.xlsx")` +#' +#' **Validation**: +#' - Checks file existence before reading +#' - Validates required columns: field, year, tonnage_ha +#' - Coerces year and tonnage_ha to numeric +#' - Logs status messages for debugging +#' +#' **Error Handling**: +#' - Missing file: Returns empty DF (logs NOTE) +#' - Missing columns: Returns empty DF (logs WARNING) +#' - Read errors: Returns empty DF (logs WARNING) +#' - Always returns valid data frame structure (won't return NULL) +#' +#' **Usage**: +#' ```r +#' harvesting_data <- load_harvest_data(setup$data_dir) +#' # harvesting_data is guaranteed to be a data.frame with 3 columns +#' # even if harvest.xlsx is unavailable or invalid +#' ``` +load_harvest_data <- function(data_dir) { + harvesting_data <- NULL + harvest_file <- file.path(data_dir, "harvest.xlsx") + + if (file.exists(harvest_file)) { + tryCatch({ + harvesting_data <- readxl::read_excel(harvest_file) + + # Ensure required columns are present + required_cols <- c("field", "year", "tonnage_ha") + if (all(required_cols %in% names(harvesting_data))) { + # Convert to data frame and ensure column types + harvesting_data <- as.data.frame(harvesting_data) + # CRITICAL: Coerce field to character to preserve leading zeros (e.g., "01", "02") + harvesting_data$field <- as.character(harvesting_data$field) + harvesting_data$year <- as.numeric(harvesting_data$year) + harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha) + + message(paste(" ✓ Loaded harvest data:", nrow(harvesting_data), "records from harvest.xlsx")) + return(harvesting_data) + } else { + message(paste(" WARNING: harvest.xlsx missing required columns. Expected: field, year, tonnage_ha")) + harvesting_data <- NULL + } + }, error = function(e) { + message(paste(" WARNING: Could not read harvest.xlsx:", e$message)) + }) + } else { + message(paste(" NOTE: harvest.xlsx not found at", harvest_file)) + } + + # Fallback: create empty data frame if loading failed + if (is.null(harvesting_data)) { + message(" WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)") + harvesting_data <- data.frame( + field = character(), # Explicitly character to preserve leading zeros when data is added + year = numeric(), + tonnage_ha = numeric(), + stringsAsFactors = FALSE + ) + } + + return(harvesting_data) +} + # ============================================================================ # FIELD STATISTICS EXTRACTION # ============================================================================ @@ -678,13 +865,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre NULL } - output_subdir <- file.path(reports_dir, "field_analysis") - if (!dir.exists(output_subdir)) { - dir.create(output_subdir, recursive = TRUE) + output_dir <- file.path(reports_dir) + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE) } excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") - excel_path <- file.path(output_subdir, excel_filename) + excel_path <- file.path(output_dir, excel_filename) excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) # Build sheets list dynamically @@ -709,14 +896,14 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre ) ) - rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") - rds_path <- file.path(reports_dir, rds_filename) + rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds") + rds_path <- file.path(output_dir, rds_filename) saveRDS(kpi_data, rds_path) message(paste("✓ Field analysis RDS exported to:", rds_path)) csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") - csv_path <- file.path(output_subdir, csv_filename) + csv_path <- file.path(output_dir, csv_filename) write_csv(field_df_rounded, csv_path) message(paste("✓ Field analysis CSV exported to:", csv_path)) @@ -1304,7 +1491,7 @@ prepare_predictions <- function(predictions, newdata) { dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, - Age_days = newdata$DOY, + Age_days = newdata$DAH, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season @@ -1313,3 +1500,258 @@ prepare_predictions <- function(predictions, newdata) { dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) } + +# ============================================================================ +# YIELD PREDICTION KPI (SHARED ML-BASED MODEL FOR ALL CLIENT TYPES) +# ============================================================================ + +#' Helper function for graceful fallback when training data unavailable +#' +#' @param field_boundaries Field boundaries (sf or SpatVector) +#' @return List with summary and field_results (both with NA values) +create_fallback_result <- function(field_boundaries) { + # Convert to SpatVector if needed (for terra::project) + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries <- terra::vect(field_boundaries) + } + field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection + field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares + total_area <- sum(field_areas) + + summary_result <- data.frame( + field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), + count = c(0, 0, 0, nrow(field_boundaries)), + value = c(NA_real_, NA_real_, NA_real_, round(total_area, 1)), + stringsAsFactors = FALSE + ) + + field_results <- data.frame( + field = character(0), + sub_field = character(0), + Age_days = numeric(0), + yield_forecast_t_ha = numeric(0), + season = numeric(0), + stringsAsFactors = FALSE + ) + + return(list(summary = summary_result, field_results = field_results)) +} + +#' Calculate yield prediction KPI using Random Forest with Feature Selection +#' +#' Trains a Random Forest model on historical harvest data with cumulative CI, +#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for +#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD). +#' +#' @param field_boundaries Field boundaries (sf or SpatVector) +#' @param harvesting_data Data frame with harvest data including tonnage_ha column +#' @param cumulative_CI_vals_dir Directory containing "All_pivots_Cumulative_CI_quadrant_year_v2.rds" +#' +#' @return List with: +#' - summary: Data frame with field_groups, count, value (quartiles and total area) +#' - field_results: Data frame with field-level yield forecasts (yield_forecast_t_ha) +#' +#' @details +#' **Training Data Requirements:** +#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds" +#' - harvesting_data must have tonnage_ha column with numeric yield values +#' - Training stops gracefully if either is missing (returns NA values, not fake numbers) +#' +#' **Model Specifications:** +#' - Algorithm: Random Forest (caret + CAST) +#' - Feature Selection: Forward Feature Selection (CAST::ffs) +#' - Cross-validation: 5-fold CV +#' - Predictors: cumulative_CI, DAH, CI_per_day +#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days) +#' - Output: Field-level yield forecasts grouped by quartile +#' +#' **Error Handling:** +#' - Missing tonnage_ha: Returns graceful fallback with NA (not zero) values +#' - No training data: Logs WARNING, returns empty field_results +#' - RDS file missing: Returns graceful fallback +#' - Prediction errors: Wrapped in tryCatch, returns fallback on failure +calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) { + safe_log("Calculating yield prediction KPI using Random Forest with Feature Selection") + + tryCatch({ + # Check if tonnage_ha column is present and has valid data + if (is.null(harvesting_data) || + !("tonnage_ha" %in% names(harvesting_data)) || + all(is.na(harvesting_data$tonnage_ha))) { + safe_log("No harvest data available: lacking tonnage_ha column or all values are NA", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Check if CI quadrant RDS file exists + ci_quadrant_path <- file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds") + if (!file.exists(ci_quadrant_path)) { + safe_log(paste("CI quadrant file not found at:", ci_quadrant_path), "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Load CI quadrant data and fill missing field/sub_field values + CI_quadrant <- readRDS(ci_quadrant_path) %>% + dplyr::group_by(model) %>% + tidyr::fill(field, sub_field, .direction = "downup") %>% + dplyr::ungroup() + + # Rename year column to season for consistency + harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year) + + # Join CI and yield data + CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, + by = c("field", "sub_field", "season")) %>% + dplyr::group_by(sub_field, season) %>% + dplyr::slice(which.max(DAH)) %>% + dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>% + dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_)) + + + # Define predictors and response variables + predictors <- c("cumulative_CI", "DAH", "CI_per_day") + response <- "tonnage_ha" + + # Prepare training dataset (fields with harvest data) + CI_and_yield_test <- CI_and_yield %>% + as.data.frame() %>% + dplyr::filter(!is.na(tonnage_ha)) + + # Check if we have minimum training data + if (nrow(CI_and_yield_test) == 0) { + safe_log("No training data available: no fields with tonnage_ha observations", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Pre-clean training data: remove rows with any NAs in predictors or response + # This is required because CAST::ffs doesn't support na.rm parameter + CI_and_yield_test <- CI_and_yield_test %>% + dplyr::filter(!dplyr::if_any(dplyr::all_of(c(predictors, response)), is.na)) + + if (nrow(CI_and_yield_test) == 0) { + safe_log("No complete training data after removing NAs in predictors/response", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Prepare prediction dataset (fields without harvest data, mature fields only) + prediction_yields <- CI_and_yield %>% + as.data.frame() %>% + dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only + + # Configure model training parameters + ctrl <- caret::trainControl( + method = "cv", + savePredictions = TRUE, + allowParallel = TRUE, + number = 5, + verboseIter = FALSE + ) + + # Train the model with forward feature selection + set.seed(202) # For reproducibility + safe_log("Training Random Forest model with Forward Feature Selection...") + model_ffs_rf <- CAST::ffs( + CI_and_yield_test[, predictors], + CI_and_yield_test[, response], + method = "rf", + trControl = ctrl, + importance = TRUE, + withinSE = TRUE, + tuneLength = 5 + ) + + # Predict yields on validation data (same as training data for RMSE calculation) + pred_ffs_rf <- prepare_predictions( + stats::predict(model_ffs_rf, newdata = CI_and_yield_test), + CI_and_yield_test + ) + + # Extract cross-validated RMSE from the model object (more honest than in-sample RMSE) + # The CAST::ffs model stores CV results in $results data frame + # We extract the RMSE from the best model (lowest RMSE across folds) + if (!is.null(model_ffs_rf$results) && "RMSE" %in% names(model_ffs_rf$results)) { + # Get minimum RMSE from cross-validation results (best model from feature selection) + rmse_value <- min(model_ffs_rf$results$RMSE, na.rm = TRUE) + safe_log(paste("Yield prediction RMSE (cross-validated):", round(rmse_value, 2), "t/ha")) + } else { + # Fallback: compute in-sample RMSE if CV results unavailable, but label it clearly + rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_test$tonnage_ha)^2, na.rm = TRUE)) + safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha")) + } + + # Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days) + if (nrow(prediction_yields) > 0) { + pred_rf_current_season <- prepare_predictions( + stats::predict(model_ffs_rf, newdata = prediction_yields), + prediction_yields + ) %>% + dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>% + dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) + } else { + pred_rf_current_season <- data.frame() + } + + # Calculate summary statistics for KPI + if (nrow(pred_rf_current_season) > 0) { + safe_log(paste("Number of fields with yield predictions:", nrow(pred_rf_current_season))) + safe_log(paste("Predicted yield range:", + round(min(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1), + "-", + round(max(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1), + "t/ha")) + + # Calculate quartiles for grouping + yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, + probs = c(0.25, 0.5, 0.75), na.rm = TRUE) + + # Count fields in each quartile + top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE) + average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & + pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE) + lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE) + + # Calculate total area + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + # Handle both sf and SpatVector inputs for area calculation + if (inherits(field_boundaries, "sf")) { + field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933") + field_areas <- sf::st_area(field_boundaries_projected) / 10000 # m² to hectares + } else { + field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933") + field_areas <- terra::expanse(field_boundaries_projected) / 10000 + } + total_area <- sum(as.numeric(field_areas)) + + safe_log(paste("Total area:", round(total_area, 1), "hectares")) + + # Build summary result + result <- data.frame( + field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), + count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)), + value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), + round(yield_quartiles[1], 1), round(total_area, 1)), + stringsAsFactors = FALSE + ) + + # Prepare field-level results + field_level_results <- pred_rf_current_season %>% + dplyr::select(field, Age_days, predicted_Tcha, season) %>% + dplyr::rename(yield_forecast_t_ha = predicted_Tcha) + + safe_log("✓ Yield prediction complete") + return(list(summary = result, field_results = field_level_results)) + } else { + safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING") + return(list(summary = create_fallback_result(field_boundaries)$summary, + field_results = data.frame())) + } + + }, error = function(e) { + safe_log(paste("Error in yield prediction:", e$message), "ERROR") + return(create_fallback_result(field_boundaries)) + }) +} diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 2d59d0d..958b2be 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -2,8 +2,8 @@ params: ref: "word-styles-reference-var1.docx" output_file: "CI_report.docx" - report_date: !r Sys.Date() - data_dir: "angata" + report_date: "2026-02-04" #!r Sys.Date() + data_dir: "aura" mail_day: "Wednesday" borders: FALSE ci_plot_type: "both" @@ -61,6 +61,10 @@ suppressPackageStartupMessages({ library(officer) # For Word document manipulation (custom formatting, headers, footers) }) +# Configure tmap for static plotting (required for legend.outside to work) +tmap_mode("plot") # CRITICAL: Must be "plot" mode for legends outside to render properly +tmap_options(component.autoscale = FALSE) + # Load custom utility functions tryCatch({ source("report_utils.R") @@ -107,15 +111,15 @@ safe_log(paste("weekly_CI_mosaic path:", weekly_CI_mosaic)) # NO workspace-wide fallback that might load wrong project # Build expected KPI file path strictly from project_dir -kpi_data_dir <- paths$kpi_reports_dir # Should be: laravel_app/storage/app/{project}/reports/kpis/field_level +kpi_data_dir <- paths$kpi_reports_dir # file.path(paths$reports_dir, "kpis") # Should be: laravel_app/storage/app/{project}/reports/kpis # Calculate week from report_date current_week <- as.numeric(format(as.Date(report_date), "%V")) current_year <- as.numeric(format(as.Date(report_date), "%G")) # The ACTUAL filename format from 80_calculate_kpis.R output (after fix) -# Format: {project_dir}_kpi_summary_tables_week{WW}_{YYYY}.rds -kpi_rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", +# Format: {project_dir}_field_analysis_week{WW}_{YYYY}.rds +kpi_rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, current_year), ".rds") kpi_rds_path <- file.path(kpi_data_dir, kpi_rds_filename) @@ -139,30 +143,174 @@ if (dir.exists(kpi_data_dir)) { } ) - # Handle new RDS structure (list with $summary_tables, $all_kpis, $field_details) + # Handle RDS structure from 80_utils_agronomic_support.R + # Expected: list(field_analysis = dataframe, kpis = list, summary_tables = list, ...) + # OR just a dataframe (for backward compatibility) + if (!is.null(loaded_data)) { - if (is.list(loaded_data) && "summary_tables" %in% names(loaded_data)) { - # New structure: extract summary_tables from the list - summary_tables <- loaded_data$summary_tables - if (!is.null(loaded_data$field_details)) { - field_details_table <- loaded_data$field_details + # Try to extract field_analysis from different possible structures + if (is.data.frame(loaded_data)) { + # Direct dataframe (simplest case) + field_details_table <- loaded_data + safe_log("✓ Loaded field_analysis dataframe directly") + } else if (is.list(loaded_data)) { + # List structure - try different key names + if ("field_analysis_df" %in% names(loaded_data)) { + field_details_table <- loaded_data$field_analysis_df + safe_log("✓ Loaded field_analysis_df from list") + } else if ("field_analysis" %in% names(loaded_data)) { + field_details_table <- loaded_data$field_analysis + safe_log("✓ Loaded field_analysis from list") + } else if ("kpis" %in% names(loaded_data)) { + # Might be the full output from orchestrator - create combined table + safe_log("✓ Found kpis list in loaded data") + # For now, skip - we need the combined field table + } + + # Also check if summary_tables already exists in the RDS + if ("summary_tables" %in% names(loaded_data)) { + summary_tables <- loaded_data$summary_tables + safe_log("✓ Loaded pre-computed summary_tables from RDS") } - safe_log("✓ Loaded KPI data (new structure with summary_tables)") - kpi_files_exist <- TRUE - } else if (is.list(loaded_data) && length(loaded_data) > 0) { - # Legacy structure: directly use as summary_tables - summary_tables <- loaded_data - safe_log("✓ Loaded KPI tables (legacy structure)") - kpi_files_exist <- TRUE } - if (kpi_files_exist) { - safe_log(paste("✓ Available KPI tables:", paste(names(summary_tables), collapse=", "))) + # If we successfully loaded field_details_table, transform it into summary_tables + if (!is.null(field_details_table) && nrow(field_details_table) > 0) { + safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields")) + safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", "))) + + # NORMALIZATION: Normalize column structure (Field→Field_id rename + expected_cols injection) + field_details_table <- normalize_field_details_columns(field_details_table) + + # Normalize other common column name variations + column_mappings <- list( + c("CV Value", "CV"), + c("Mean CI", "Mean_CI"), + c("Yield Forecast (t/ha)", "TCH_Forecasted"), + c("Gap Score", "Gap_Score"), + c("Growth Uniformity", "Growth_Uniformity"), + c("Decline Risk", "Decline_Risk"), + c("Patchiness Risk", "Patchiness_Risk"), + c("Moran's I", "Morans_I") + ) + + for (mapping in column_mappings) { + old_name <- mapping[1] + new_name <- mapping[2] + if (old_name != new_name && old_name %in% names(field_details_table) && !new_name %in% names(field_details_table)) { + field_details_table <- field_details_table %>% + dplyr::rename(!!new_name := old_name) + safe_log(paste(" ✓ Normalized:", old_name, "→", new_name)) + } + } + + + # Only create summary_tables if not already loaded from RDS + if (is.null(summary_tables)) { + summary_tables <- list() + + # 1. Uniformity summary - GROUP BY Uniformity_Category and COUNT + if ("Uniformity_Category" %in% names(field_details_table)) { + summary_tables$uniformity <- field_details_table %>% + group_by(interpretation = Uniformity_Category) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created uniformity summary") + } + + # 2. Area change summary - GROUP BY Area_Change_Interpretation and COUNT + if ("Area_Change_Interpretation" %in% names(field_details_table)) { + summary_tables$area_change <- field_details_table %>% + group_by(interpretation = Area_Change_Interpretation) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created area_change summary") + } + + # 3. Growth decline summary - GROUP BY Trend_Interpretation and COUNT + if ("Trend_Interpretation" %in% names(field_details_table)) { + summary_tables$growth_decline <- field_details_table %>% + group_by(trend_interpretation = Trend_Interpretation) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created growth_decline summary") + } + + # 4. Patchiness summary - GROUP BY Patchiness_Risk + Gini ranges + if ("Patchiness_Risk" %in% names(field_details_table) && "Gini_Coefficient" %in% names(field_details_table)) { + summary_tables$patchiness <- field_details_table %>% + mutate( + gini_category = case_when( + Gini_Coefficient < 0.2 ~ "Uniform (Gini<0.2)", + Gini_Coefficient < 0.4 ~ "Moderate (Gini 0.2-0.4)", + TRUE ~ "High (Gini≥0.4)" + ) + ) %>% + group_by(gini_category, patchiness_risk = Patchiness_Risk) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created patchiness summary") + } + + # 5. TCH forecast summary - show actual value ranges (quartiles) instead of counts + if ("TCH_Forecasted" %in% names(field_details_table)) { + tch_values <- field_details_table %>% + filter(!is.na(TCH_Forecasted)) %>% + pull(TCH_Forecasted) + + if (length(tch_values) > 0) { + # Defensive check: if all TCH values are identical, handle as special case + if (length(unique(tch_values)) == 1) { + # All values are identical + identical_value <- tch_values[1] + summary_tables$tch_forecast <- tibble::tibble( + tch_category = "All equal", + range = paste0(round(identical_value, 1), " t/ha"), + field_count = length(tch_values) + ) + safe_log(" ✓ Created tch_forecast summary (all TCH values identical)") + } else { + # Multiple distinct values - use three-quartile approach + q25 <- quantile(tch_values, 0.25, na.rm = TRUE) + q50 <- quantile(tch_values, 0.50, na.rm = TRUE) + q75 <- quantile(tch_values, 0.75, na.rm = TRUE) + min_val <- min(tch_values, na.rm = TRUE) + max_val <- max(tch_values, na.rm = TRUE) + + summary_tables$tch_forecast <- tibble::tibble( + tch_category = c("Top 25%", "Middle 50%", "Bottom 25%"), + range = c( + paste0(round(q75, 1), "-", round(max_val, 1), " t/ha"), + paste0(round(q25, 1), "-", round(q75, 1), " t/ha"), + paste0(round(min_val, 1), "-", round(q25, 1), " t/ha") + ), + field_count = c( + nrow(field_details_table %>% filter(TCH_Forecasted >= q75, !is.na(TCH_Forecasted))), + nrow(field_details_table %>% filter(TCH_Forecasted >= q25 & TCH_Forecasted < q75, !is.na(TCH_Forecasted))), + nrow(field_details_table %>% filter(TCH_Forecasted < q25, !is.na(TCH_Forecasted))) + ) + ) + safe_log(" ✓ Created tch_forecast summary with value ranges") + } + } + } + + # 6. Gaps summary - GROUP BY Gap_Level and COUNT + if ("Gap_Level" %in% names(field_details_table)) { + summary_tables$gap_filling <- field_details_table %>% + group_by(gap_level = Gap_Level) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created gap_filling summary") + } + + safe_log(paste("✓ Created", length(summary_tables), "summary tables from field_details")) + } + + kpi_files_exist <- TRUE + + } else { + safe_log("ERROR: Could not extract field_analysis dataframe from RDS", "ERROR") } } + } else { - safe_log(paste("KPI file not found in:", kpi_rds_path), "WARNING") - safe_log(paste("Expected file:", kpi_rds_filename), "WARNING") + safe_log(paste("KPI file not found:", kpi_rds_path), "WARNING") safe_log(paste("Files in directory:", paste(list.files(kpi_data_dir, pattern="\\.rds$"), collapse=", ")), "WARNING") } } else { @@ -172,7 +320,23 @@ if (dir.exists(kpi_data_dir)) { if (!kpi_files_exist) { safe_log(paste("Skipping KPI sections - no data for", project_dir, "on", report_date), "WARNING") summary_tables <- NULL + field_details_table <- NULL } + +# DEBUG: Log what was actually loaded +if (exists("summary_tables") && !is.null(summary_tables)) { + safe_log(paste("✓ summary_tables available with", length(summary_tables), "KPIs")) + for (kpi_name in names(summary_tables)) { + kpi_df <- summary_tables[[kpi_name]] + if (!is.null(kpi_df) && is.data.frame(kpi_df)) { + safe_log(paste(" -", kpi_name, ":", nrow(kpi_df), "rows")) + } + } +} else { + safe_log("WARNING: summary_tables is NULL or does not exist", "WARNING") +} + +# summary_tables # Uncomment for debugging ``` ```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE} @@ -203,13 +367,13 @@ prev_week_1_date <- report_date_obj - 7 prev_week_2_date <- report_date_obj - 14 prev_week_3_date <- report_date_obj - 21 -week_minus_1 <- lubridate::isoweek(prev_week_1_date) +week_minus_1 <- sprintf("%02d", lubridate::isoweek(prev_week_1_date)) week_minus_1_year <- lubridate::isoyear(prev_week_1_date) -week_minus_2 <- lubridate::isoweek(prev_week_2_date) +week_minus_2 <- sprintf("%02d", lubridate::isoweek(prev_week_2_date)) week_minus_2_year <- lubridate::isoyear(prev_week_2_date) -week_minus_3 <- lubridate::isoweek(prev_week_3_date) +week_minus_3 <- sprintf("%02d", lubridate::isoweek(prev_week_3_date)) week_minus_3_year <- lubridate::isoyear(prev_week_3_date) # Format current week with leading zeros @@ -290,6 +454,19 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { } ``` + +::: {custom-style="Cover_title" style="text-align:center; margin-top:120px;"} +Satellite Based Field Reporting +::: + + + +::: {custom-style="Cover_subtitle" style="text-align:center; margin-top:18px;"} +Chlorophyll Index (CI) Monitoring Report — `r toupper(params$data_dir)` Farm (Week `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); if (!is.null(params$week)) params$week else format(rd, '%V') }`, `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); format(rd, '%Y') }`) +::: + +\newpage + ## Report Summary **Farm Location:** `r toupper(project_dir)` Estate @@ -307,66 +484,75 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { ## Key Insights ```{r key_insights, echo=FALSE, results='asis'} -# Calculate key insights from aggregated KPI summary data +# Calculate key insights from KPI data if (exists("summary_tables") && !is.null(summary_tables) && length(summary_tables) > 0) { - # Extract aggregated KPI summaries (farm-level, not per-field) - uniformity_summary <- summary_tables$uniformity # Has: Status, Field Count, Avg CV, Avg Moran's I - area_change_summary <- summary_tables$area_change # Has: Status, Field Count, Avg CI Change % - growth_summary <- summary_tables$growth_decline # Has: Trend, Field Count, Avg 4-Week Trend - weed_summary <- summary_tables$weed_pressure # Has: Risk Level, Field Count, Avg Fragmentation + # Aggregate per-field KPI data into summaries on-the-fly - # Total fields analyzed (from uniformity summary) - total_fields <- sum(uniformity_summary$`Field Count`, na.rm = TRUE) - - # Uniformity insights - if (!is.null(uniformity_summary) && nrow(uniformity_summary) > 0) { + # 1. Uniformity insights - group by interpretation + if (!is.null(summary_tables$uniformity) && nrow(summary_tables$uniformity) > 0) { cat("**Field Uniformity:**\n") - for (i in 1:nrow(uniformity_summary)) { - status <- uniformity_summary$Status[i] - count <- uniformity_summary$`Field Count`[i] - if (count > 0) { + uniformity_counts <- summary_tables$uniformity %>% + dplyr::select(interpretation, count = field_count) + + for (i in seq_len(nrow(uniformity_counts))) { + status <- uniformity_counts$interpretation[i] + count <- uniformity_counts$count[i] + if (!is.na(status) && !is.na(count) && count > 0) { cat("- ", count, " field(s) with ", status, "\n", sep="") } } } - # Area change insights - if (!is.null(area_change_summary) && nrow(area_change_summary) > 0) { + # 2. Area change insights - group by interpretation + if (!is.null(summary_tables$area_change) && nrow(summary_tables$area_change) > 0) { cat("\n**Area Change Status:**\n") - for (i in 1:nrow(area_change_summary)) { - status <- area_change_summary$Status[i] - count <- area_change_summary$`Field Count`[i] - if (count > 0 && !is.na(status)) { + area_counts <- summary_tables$area_change %>% + dplyr::select(interpretation, count = field_count) + + for (i in seq_len(nrow(area_counts))) { + status <- area_counts$interpretation[i] + count <- area_counts$count[i] + if (!is.na(status) && !is.na(count) && count > 0) { cat("- ", count, " field(s) ", status, "\n", sep="") } } } - # Growth trend insights - if (!is.null(growth_summary) && nrow(growth_summary) > 0) { + # 3. Growth trend insights - group by trend_interpretation + if (!is.null(summary_tables$growth_decline) && nrow(summary_tables$growth_decline) > 0) { cat("\n**Growth Trends (4-Week):**\n") - for (i in 1:nrow(growth_summary)) { - trend <- growth_summary$Trend[i] - count <- growth_summary$`Field Count`[i] - if (count > 0 && !is.na(trend)) { + growth_counts <- summary_tables$growth_decline %>% + dplyr::select(trend_interpretation, count = field_count) + + for (i in seq_len(nrow(growth_counts))) { + trend <- growth_counts$trend_interpretation[i] + count <- growth_counts$count[i] + if (!is.na(trend) && !is.na(count) && count > 0) { cat("- ", count, " field(s) with ", trend, "\n", sep="") } } } - # Weed pressure insights - if (!is.null(weed_summary) && nrow(weed_summary) > 0) { - cat("\n**Weed/Pest Pressure Risk:**\n") - for (i in 1:nrow(weed_summary)) { - risk <- weed_summary$`Risk Level`[i] - count <- weed_summary$`Field Count`[i] - if (count > 0 && !is.na(risk)) { + # 4. Patchiness insights - group by patchiness_risk + if (!is.null(summary_tables$patchiness) && nrow(summary_tables$patchiness) > 0) { + cat("\n**Field Patchiness Risk:**\n") + patchiness_counts <- summary_tables$patchiness %>% + dplyr::select(patchiness_risk, count = field_count) + + for (i in seq_len(nrow(patchiness_counts))) { + risk <- patchiness_counts$patchiness_risk[i] + count <- patchiness_counts$count[i] + if (!is.na(risk) && !is.na(count) && count > 0) { cat("- ", count, " field(s) at ", risk, " risk\n", sep="") } } } + # 5. Total fields analyzed + total_fields <- sum(summary_tables$uniformity$field_count, na.rm = TRUE) + cat("\n**Total Fields Analyzed:** ", total_fields, "\n", sep="") + } else { cat("KPI data not available for ", project_dir, " on this date.\n", sep="") } @@ -385,30 +571,37 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table tryCatch({ # KPI metadata for display kpi_display_order <- list( - uniformity = list(display = "Field Uniformity", level_col = "Status", count_col = "Field Count"), - area_change = list(display = "Area Change", level_col = "Status", count_col = "Field Count"), - tch_forecast = list(display = "TCH Forecasted", level_col = NULL, count_col = "Fields"), - growth_decline = list(display = "Growth Decline", level_col = "Trend", count_col = "Field Count"), - weed_pressure = list(display = "Weed Presence", level_col = "Risk Level", count_col = "Field Count"), - gap_filling = list(display = "Gap Filling", level_col = NULL, count_col = NULL) + uniformity = list(display = "Field Uniformity", level_col = "interpretation", count_col = "field_count"), + area_change = list(display = "Area Change", level_col = "interpretation", count_col = "field_count"), + growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"), + patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"), + tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"), + gap_filling = list(display = "Gaps", level_col = "gap_level", count_col = "field_count") ) - standardize_kpi <- function(df, level_col, count_col) { + standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) { if (is.null(level_col) || !(level_col %in% names(df)) || is.null(count_col) || !(count_col %in% names(df))) { return(NULL) } total <- sum(df[[count_col]], na.rm = TRUE) total <- ifelse(total == 0, NA_real_, total) + # If detail_col is specified, use it as the Level instead + if (!is.null(detail_col) && detail_col %in% names(df)) { + display_level <- df[[detail_col]] + } else { + display_level <- df[[level_col]] + } + df %>% dplyr::transmute( - Level = as.character(.data[[level_col]]), + Level = as.character(display_level), Count = as.integer(round(as.numeric(.data[[count_col]]))), - Percent = dplyr::if_else( - is.na(total), - NA_real_, + Percent = if (is.na(total)) { + NA_real_ + } else { round(Count / total * 100, 1) - ) + } ) } @@ -421,17 +614,9 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table kpi_df <- summary_tables[[kpi_key]] if (is.null(kpi_df) || !is.data.frame(kpi_df) || nrow(kpi_df) == 0) next - kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col) - if (is.null(kpi_rows) && kpi_key %in% c("tch_forecast", "gap_filling")) { - numeric_cols <- names(kpi_df)[vapply(kpi_df, is.numeric, logical(1))] - if (length(numeric_cols) > 0) { - kpi_rows <- tibble::tibble( - Level = numeric_cols, - Count = round(as.numeric(kpi_df[1, numeric_cols]), 2), - Percent = NA_real_ - ) - } - } + # Pass detail_col if it exists in config + detail_col <- if (!is.null(config$detail_col)) config$detail_col else NULL + kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col, detail_col) if (!is.null(kpi_rows)) { kpi_rows$KPI <- config$display @@ -443,21 +628,22 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table if (nrow(combined_df) > 0) { combined_df <- combined_df %>% + dplyr::mutate(KPI_group = KPI) %>% dplyr::group_by(KPI) %>% dplyr::mutate( KPI_display = if_else(dplyr::row_number() == 1, KPI, "") ) %>% - dplyr::ungroup() %>% + dplyr::ungroup() + + kpi_group_sizes <- rle(combined_df$KPI_group)$lengths + + display_df <- combined_df %>% dplyr::select(KPI = KPI_display, Level, Count, Percent) - ft <- flextable(combined_df) %>% + ft <- flextable(display_df) %>% merge_v(j = "KPI") %>% autofit() - kpi_group_sizes <- combined_df %>% - dplyr::group_by(KPI) %>% - dplyr::tally() %>% - dplyr::pull(n) cum_rows <- cumsum(kpi_group_sizes) for (i in seq_along(cum_rows)) { if (i < length(cum_rows)) { @@ -466,7 +652,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table } } - print(ft) + ft } else { cat("No valid KPI summary tables found for display.\n") } @@ -481,6 +667,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table } ``` +\newpage ## Field Alerts ```{r field_alerts_table, echo=FALSE, results='asis'} @@ -491,8 +678,8 @@ generate_field_alerts <- function(field_details_table) { } # Check for required columns - required_cols <- c("Field", "Field Size (ha)", "Growth Uniformity", "Yield Forecast (t/ha)", - "Gap Score", "Decline Risk", "Weed Risk", "Mean CI", "CV Value", "Moran's I") + required_cols <- c("Field", "Field Size (acres)", "Growth Uniformity", "Yield Forecast (t/ha)", + "Gap Score", "Decline Risk", "Patchiness Risk", "Mean CI", "CV Value", "Moran's I") missing_cols <- setdiff(required_cols, colnames(field_details_table)) if (length(missing_cols) > 0) { @@ -511,7 +698,7 @@ generate_field_alerts <- function(field_details_table) { # Aggregate data for the field field_summary <- field_data %>% summarise( - field_size = sum(`Field Size (ha)`, na.rm = TRUE), + field_size = sum(`Field Size (acres)`, na.rm = TRUE), uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"), avg_yield_forecast = mean(`Yield Forecast (t/ha)`, na.rm = TRUE), max_gap_score = max(`Gap Score`, na.rm = TRUE), @@ -522,10 +709,11 @@ generate_field_alerts <- function(field_details_table) { any(`Decline Risk` == "Low") ~ "Low", TRUE ~ "Unknown" ), - highest_weed_risk = case_when( - any(`Weed Risk` == "High") ~ "High", - any(`Weed Risk` == "Moderate") ~ "Moderate", - any(`Weed Risk` == "Low") ~ "Low", + highest_patchiness_risk = case_when( + any(`Patchiness Risk` == "High") ~ "High", + any(`Patchiness Risk` == "Medium") ~ "Medium", + any(`Patchiness Risk` == "Low") ~ "Low", + any(`Patchiness Risk` == "Minimal") ~ "Minimal", TRUE ~ "Unknown" ), avg_mean_ci = mean(`Mean CI`, na.rm = TRUE), @@ -551,12 +739,12 @@ generate_field_alerts <- function(field_details_table) { } # Priority 3: No alert (no stress) - # Keep other alerts for decline risk, weed risk, gap score + # Keep other alerts for decline risk, patchiness risk, gap score if (field_summary$highest_decline_risk %in% c("High", "Very-high")) { field_alerts <- c(field_alerts, "Growth decline observed") } - if (field_summary$highest_weed_risk == "High") { - field_alerts <- c(field_alerts, "Increased weed presence") + if (field_summary$highest_patchiness_risk == "High") { + field_alerts <- c(field_alerts, "High patchiness detected - recommend scouting") } if (field_summary$max_gap_score > 20) { field_alerts <- c(field_alerts, "Gaps present - recommend review") @@ -585,7 +773,43 @@ generate_field_alerts <- function(field_details_table) { # Generate and display alerts table if (exists("field_details_table") && !is.null(field_details_table) && nrow(field_details_table) > 0) { - alerts_data <- generate_field_alerts(field_details_table) + # Adapter: Map normalized column names back to legacy names for generate_field_alerts() + # (generates from the normalized schema created by normalize_field_details_columns + column_mappings) + field_details_for_alerts <- field_details_table + + # Rename normalized columns back to legacy names (only if they exist) + if ("Field_id" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(Field = Field_id) + } + if ("Mean_CI" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Mean CI` = Mean_CI) + } + if ("CV" %in% names(field_details_for_alerts) && !("CV Value" %in% names(field_details_for_alerts))) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`CV Value` = CV) + } + if ("TCH_Forecasted" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Yield Forecast (t/ha)` = TCH_Forecasted) + } + if ("Gap_Score" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Gap Score` = Gap_Score) + } + if ("Growth_Uniformity" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Growth Uniformity` = Growth_Uniformity) + } + if ("Decline_Risk" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Decline Risk` = Decline_Risk) + } + if ("Decline_Severity" %in% names(field_details_for_alerts) && !("Decline Risk" %in% names(field_details_for_alerts))) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Decline Risk` = Decline_Severity) + } + if ("Patchiness_Risk" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Patchiness Risk` = Patchiness_Risk) + } + if ("Morans_I" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Moran's I` = Morans_I) + } + + alerts_data <- generate_field_alerts(field_details_for_alerts) if (!is.null(alerts_data) && nrow(alerts_data) > 0) { ft <- flextable(alerts_data) %>% # set_caption("Field Alerts Summary") %>% @@ -653,23 +877,24 @@ if (!exists("field_details_table") || is.null(field_details_table)) { # Try to calculate field sizes (area) from geometry if available field_sizes <- if (!is.null(sf::st_geometry(AllPivots0)) && !all(sf::st_is_empty(sf::st_geometry(AllPivots0)))) { - sf::st_area(sf::st_geometry(AllPivots0)) / 10000 # Convert m² to hectares + sf::st_area(sf::st_geometry(AllPivots0)) / 4046.86 # Convert m² to acres (1 acre = 4046.86 m²) } else { rep(NA_real_, length(field_names)) } # Create minimal field details table with actual data we have + NAs for missing KPI columns + # IMPORTANT: Use column names that match downstream code expectations (no spaces, match exact names) field_details_table <- tibble::tibble( - Field = field_names, - `Field Size (ha)` = as.numeric(field_sizes), - `Growth Uniformity` = NA_character_, - `Yield Forecast (t/ha)` = NA_real_, - `Gap Score` = NA_real_, - `Decline Risk` = NA_character_, - `Weed Risk` = NA_character_, - `Mean CI` = NA_real_, - `CV Value` = NA_real_, - `Moran's I` = NA_real_ + Field_id = field_names, + Acreage = as.numeric(field_sizes), + Growth_Uniformity = NA_character_, + TCH_Forecasted = NA_real_, + Gap_Score = NA_real_, + Decline_Risk = NA_character_, + Patchiness_Risk = NA_character_, + Mean_CI = NA_real_, + CV = NA_real_, + Morans_I = NA_real_ ) safe_log(paste("Created field_details_table from geometries for", nrow(field_details_table), "fields")) } @@ -679,8 +904,6 @@ if (!exists("field_details_table") || is.null(field_details_table)) { } ``` -## Farm-Level Overview Maps - ```{r aggregate_farm_level_rasters, message=FALSE, warning=FALSE, include=FALSE} # Aggregate per-field weekly mosaics into single farm-level rasters for visualization # This creates on-the-fly mosaics for current week and historical weeks without saving intermediate files @@ -716,8 +939,8 @@ tryCatch({ # Aggregate mosaics for three weeks: current, week-1, week-3 farm_mosaic_current <- aggregate_mosaics_safe(current_week, current_iso_year, "current week") - farm_mosaic_minus_1 <- aggregate_mosaics_safe(week_minus_1, week_minus_1_year, "week-1") - farm_mosaic_minus_3 <- aggregate_mosaics_safe(week_minus_3, week_minus_3_year, "week-3") + farm_mosaic_minus_1 <- aggregate_mosaics_safe(as.numeric(week_minus_1), week_minus_1_year, "week-1") + farm_mosaic_minus_3 <- aggregate_mosaics_safe(as.numeric(week_minus_3), week_minus_3_year, "week-3") # Extract CI band (5th band, or named "CI") from each aggregated mosaic farm_ci_current <- NULL @@ -826,9 +1049,9 @@ tryCatch({ }) ``` -### Chlorophyll Index (CI) Overview Map - Current Week +\newpage -```{r render_farm_ci_map, echo=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} +```{r render_farm_ci_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=6.8, fig.width=8.5, dpi=150, dev='png'} # Create farm-level chlorophyll index map with OpenStreetMap basemap tryCatch({ if (!is.null(farm_ci_current_ll)) { @@ -902,13 +1125,13 @@ tryCatch({ map <- map + # Add scale bar and theme ggspatial::annotation_scale( - location = "br", + location = "tr", width_hint = 0.25 ) + ggplot2::theme_void() + ggplot2::theme( - legend.position = "bottom", - legend.direction = "horizontal", + legend.position = "right", + legend.direction = "vertical", legend.title = ggplot2::element_text(size = 10), legend.text = ggplot2::element_text(size = 9), plot.title = ggplot2::element_text(hjust = 0.5, size = 12, face = "bold"), @@ -934,9 +1157,7 @@ tryCatch({ }) ``` -### Weekly Chlorophyll Index Difference Map - -```{r render_farm_ci_diff_map, echo=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} +```{r render_farm_ci_diff_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=6.8, fig.width=8.5, dpi=150, dev='png'} # Create farm-level CI difference map (week-over-week change) tryCatch({ if (!is.null(farm_ci_diff_week_ll)) { @@ -1011,13 +1232,13 @@ tryCatch({ map <- map + # Add scale bar and theme ggspatial::annotation_scale( - location = "br", + location = "tr", width_hint = 0.25 ) + ggplot2::theme_void() + ggplot2::theme( - legend.position = "bottom", - legend.direction = "horizontal", + legend.position = "right", + legend.direction = "vertical", legend.title = ggplot2::element_text(size = 10), legend.text = ggplot2::element_text(size = 9), plot.title = ggplot2::element_text(hjust = 0.5, size = 12, face = "bold"), @@ -1043,34 +1264,33 @@ tryCatch({ }) ``` -\newpage - # Section 2: Field-by-Field Analysis ## Overview of Field-Level Insights This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions. **Key Elements per Field:** -- Current and historical CI maps -- Week-over-week change visualizations -- Cumulative growth trends -- Field-specific KPI summaries +- Current and historical CI maps +- Week-over-week change visualizations +- Cumulative growth trends +- Field-specific KPI summaries *Navigate to the following pages for individual field reports.* \newpage -```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=6.5, dpi=150, dev='png', message=TRUE, echo=FALSE, warning=TRUE, include=TRUE, results='asis'} +```{r generate_field_visualizations, echo=FALSE, fig.height=3.8, fig.width=10, dev='png', dpi=150, results='asis'} # Generate detailed visualizations for each field using purrr::walk + tryCatch({ - # Prepare merged field list and week/year info - AllPivots_merged <- AllPivots0 %>% + # Prepare merged field list and week/year info + AllPivots_merged <- AllPivots0 %>% dplyr::filter(!is.na(field), !is.na(sub_field)) %>% dplyr::group_by(field) %>% dplyr::summarise(.groups = 'drop') - # Helper to get week/year from a date - get_week_year <- function(date) { + # Helper to get week/year from a date + get_week_year <- function(date) { list( week = as.numeric(format(date, "%V")), year = as.numeric(format(date, "%G")) @@ -1089,9 +1309,7 @@ tryCatch({ # Helper function to safely load per-field mosaic if it exists load_per_field_mosaic <- function(base_dir, field_name, week, year) { path <- file.path(base_dir, field_name, paste0("week_", sprintf("%02d", week), "_", year, ".tif")) - cat(paste(" [DEBUG] Field:", field_name, "trying path:", path, "\n")) if (file.exists(path)) { - cat(paste(" ✓ File found\n")) tryCatch({ rast_obj <- terra::rast(path) # Extract CI band if present, otherwise first band @@ -1104,22 +1322,13 @@ tryCatch({ message(paste("Warning: Could not load", path, ":", e$message)) return(NULL) }) - } else { - cat(paste(" ✗ File NOT found\n")) } return(NULL) } # Iterate through fields using purrr::walk - is_first_field <- TRUE purrr::walk(AllPivots_merged$field, function(field_name) { tryCatch({ - # Add page break before each field (except first) - if (!is_first_field) { - cat("\\newpage\n\n") - } - is_first_field <<- FALSE - message(paste("Processing field:", field_name)) # Load per-field rasters for all 4 weeks @@ -1163,7 +1372,7 @@ tryCatch({ borders = borders, colorblind_friendly = colorblind_friendly ) - cat("\n\n") + #cat("\n\n") } else { message(paste("Warning: No raster data found for field", field_name)) } @@ -1172,8 +1381,8 @@ tryCatch({ ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") { CI_quadrant %>% dplyr::filter(field == "00F25") %>% - dplyr::arrange(DOY) %>% - dplyr::group_by(DOY) %>% + dplyr::arrange(DAH) %>% + dplyr::group_by(DAH) %>% dplyr::slice(1) %>% dplyr::ungroup() } else { @@ -1194,20 +1403,51 @@ tryCatch({ benchmark_percentiles = c(10, 50, 90), benchmark_data = benchmarks ) - cat("\n\n") + #cat("\n") } # Add field-specific KPI summary if available - # NOTE: generate_field_kpi_summary function not yet implemented - # Skipping field-level KPI text for now; KPI tables are available in Section 1 - if (FALSE) { # Disabled pending function implementation - # if (exists("field_details_table") && !is.null(field_details_table) && nrow(field_details_table) > 0) { - # kpi_summary <- generate_field_kpi_summary(field_name, field_details_table, CI_quadrant) - # if (!is.null(kpi_summary)) { - # cat(kpi_summary) - # cat("\n\n") - # } - # } + if (exists("field_details_table") && !is.null(field_details_table) && nrow(field_details_table) > 0) { + field_kpi <- field_details_table %>% + dplyr::filter(Field_id == field_name) + + if (nrow(field_kpi) > 0) { + # Format KPIs as compact single line (no interpretations, just values) + kpi_parts <- c( + sprintf("**CV:** %.2f", field_kpi$CV), + sprintf("**Mean CI:** %.2f", field_kpi$Mean_CI) + ) + + # Add Weekly_CI_Change if available (note: capital C and I) + if (!is.null(field_kpi$Weekly_CI_Change) && !is.na(field_kpi$Weekly_CI_Change)) { + change_sign <- ifelse(field_kpi$Weekly_CI_Change >= 0, "+", "") + kpi_parts <- c(kpi_parts, sprintf("**Δ CI:** %s%.2f", change_sign, field_kpi$Weekly_CI_Change)) + } + + # Compact trend display with symbols + trend_compact <- case_when( + grepl("Strong growth", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↑↑", + grepl("Growth|Increasing", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↑", + grepl("Stable|No growth", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "→", + grepl("Slight decline", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↓", + grepl("Strong decline|Severe", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↓↓", + TRUE ~ field_kpi$Trend_Interpretation + ) + kpi_parts <- c(kpi_parts, sprintf("**Trend:** %s", trend_compact)) + + if (!is.na(field_kpi$TCH_Forecasted) && field_kpi$TCH_Forecasted > 0) { + kpi_parts <- c(kpi_parts, sprintf("**Yield:** %.1f t/ha", field_kpi$TCH_Forecasted)) + } + + kpi_parts <- c( + kpi_parts, + sprintf("**Gap:** %.0f", field_kpi$Gap_Score), + sprintf("**Patchiness:** %s", field_kpi$Patchiness_Risk), + sprintf("**Decline:** %s", field_kpi$Decline_Severity) + ) + + cat(paste(kpi_parts, collapse = " | "), "\n\n") # Double newline for markdown paragraph break + } } }, error = function(e) { @@ -1257,89 +1497,102 @@ tryCatch({ }) ``` -## KPI Summary by Field - -## Detailed Field Performance Summary +\newpage +## Detailed Field Performance Summary by Field The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis. ```{r detailed_field_table, echo=FALSE, results='asis'} # Detailed field performance table -report_date_obj <- as.Date(report_date) -# Initialize empty dataframe for field_ages if CI_quadrant is unavailable -field_ages <- data.frame(Field = character(), Age_days = numeric()) - -# Try to get field ages from CI quadrant if available -if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { - tryCatch({ - # Identify the current season for each field based on report_date - current_seasons <- CI_quadrant %>% - filter(Date <= report_date_obj) %>% - group_by(field, season) %>% - summarise( - season_start = min(Date), - season_end = max(Date), - .groups = 'drop' - ) %>% - group_by(field) %>% - filter(season == max(season)) %>% - select(field, season) - - # Get current field ages (most recent DOY for each field in their CURRENT SEASON only) - field_ages <- CI_quadrant %>% - inner_join(current_seasons, by = c("field", "season")) %>% - group_by(field) %>% - filter(DOY == max(DOY)) %>% - select(field, DOY) %>% - rename(Field = field, Age_days = DOY) - }, error = function(e) { - safe_log(paste("Error extracting field ages:", e$message), "WARNING") - }) +if (!exists("field_details_table") || is.null(field_details_table) || nrow(field_details_table) == 0) { + safe_log("No field details available for table", "WARNING") + cat("No field-level KPI data available for this report period.\n") + } else { - safe_log("CI quadrant data unavailable - field ages will not be included in detailed table", "WARNING") -} - -# Clean up the field details table - remove sub field column and round numeric values -# Check if field_details_table was loaded successfully -if (!exists("field_details_table") || is.null(field_details_table)) { - # Initialize empty tibble with expected columns - field_details_clean <- tibble( - Field = character(), - `Field Size (ha)` = numeric(), - `Growth Uniformity` = character(), - `Yield Forecast (t/ha)` = numeric(), - `Gap Score` = numeric(), - `Decline Risk` = character(), - `Weed Risk` = character(), - `Mean CI` = numeric(), - `CV Value` = numeric() - ) -} else { - field_details_clean <- field_details_table %>% - left_join(field_ages, by = "Field") %>% + # Calculate field sizes from boundaries (convert to acres) + field_sizes_source <- if (exists("AllPivots_merged") && inherits(AllPivots_merged, "sf")) AllPivots_merged else AllPivots0 + field_sizes_df <- field_sizes_source %>% mutate( - `Yield Forecast (t/ha)` = ifelse(is.na(Age_days) | Age_days < 240, NA_real_, `Yield Forecast (t/ha)`) + field_size_acres = as.numeric(sf::st_area(geometry) / 4046.86) # m² to acres ) %>% - select(Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`, `Mean CI`, `CV Value`) %>% # Reorder columns as requested + sf::st_drop_geometry() %>% + select(field, field_size_acres) + + # Get field ages from CI quadrant if available + field_ages_df <- if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { + tryCatch({ + # Get current season and age for each field + CI_quadrant %>% + filter(Date <= as.Date(report_date)) %>% + group_by(field, season) %>% + summarise(last_date = max(Date), last_dah = max(DAH), .groups = 'drop') %>% + group_by(field) %>% + filter(season == max(season)) %>% + select(field, Age_days = last_dah) + }, error = function(e) { + data.frame(field = character(), Age_days = numeric()) + }) + } else { + data.frame(field = character(), Age_days = numeric()) + } + + # Join field sizes and ages to KPI data, simplified column selection + # DEFENSIVE: Normalize field_details_table column structure before joining + # Uses shared normalization function to ensure Field_id exists and all expected columns are present + field_details_table <- normalize_field_details_columns(field_details_table) + + field_details_clean <- field_details_table %>% + left_join(field_sizes_df, by = c("Field_id" = "field")) %>% + left_join(field_ages_df, by = c("Field_id" = "field")) %>% mutate( - `Mean CI` = round(`Mean CI`, 2), # Round to 2 decimal places - `CV Value` = round(`CV Value`, 2), # Round to 2 decimal places - `Gap Score` = round(`Gap Score`, 0) # Round to nearest integer + # Only show yield forecast for fields >= 240 days old + TCH_Forecasted = if_else(is.na(Age_days) | Age_days < 240, NA_real_, TCH_Forecasted), + # Round numeric columns + field_size_acres = round(field_size_acres, 1), + Mean_CI = round(Mean_CI, 2), + CV = round(CV, 2), + Gap_Score = round(Gap_Score, 2), + TCH_Forecasted = round(TCH_Forecasted, 1) ) + + # Add Weekly_CI_Change if it exists in the data (note: capital C and I) + if ("Weekly_CI_Change" %in% names(field_details_clean)) { + field_details_clean <- field_details_clean %>% + mutate(Weekly_CI_Change = round(Weekly_CI_Change, 2)) %>% + select( + Field = Field_id, + `Field Size (acres)` = field_size_acres, + `Mean CI` = Mean_CI, + `Weekly CI Change` = Weekly_CI_Change, + `Yield Forecast (t/ha)` = TCH_Forecasted, + `Gap Score %` = Gap_Score, + `Decline Risk` = Decline_Severity, + `Patchiness Risk` = Patchiness_Risk, + `CV Value` = CV + ) + } else { + field_details_clean <- field_details_clean %>% + select( + Field = Field_id, + `Field Size (acres)` = field_size_acres, + `Mean CI` = Mean_CI, + `Yield Forecast (t/ha)` = TCH_Forecasted, + `Gap Score %` = Gap_Score, + `Decline Risk` = Decline_Severity, + `Patchiness Risk` = Patchiness_Risk, + `CV Value` = CV + ) + } + + # Display the cleaned field table with flextable (fit to page width) + ft <- flextable(field_details_clean) %>% + set_caption("Detailed Field Performance Summary") %>% + theme_booktabs() %>% + set_table_properties(width = 1, layout = "autofit") # Fit to 100% page width with auto-adjust + + knit_print(ft) } - - -# Display the cleaned field table with flextable -# Set column widths to fit page (approximately 6.5 inches for 1-inch margins) -# Scale widths proportionally: original total = 8.0 inches, scale to 6.2 inches -col_widths <- c(0.97, 0.73, 0.80, 0.80, 0.65, 0.73, 0.65, 0.56, 0.48) # inches for each column - -ft <- flextable(field_details_clean) %>% - set_caption("Detailed Field Performance Summary") %>% - width(width = col_widths) - -ft ``` \newpage @@ -1377,31 +1630,89 @@ The Chlorophyll Index (CI) is a vegetation index that measures the relative amou CI values typically range from 0 (bare soil or severely stressed vegetation) to 7+ (very healthy, dense vegetation). For sugarcane, values between 3-7 generally indicate good crop health, depending on the growth stage. +