From 7eeed342f37d993b179eecd0e5520dd1d54c65d6 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Thu, 12 Feb 2026 13:47:35 +0100 Subject: [PATCH 01/30] =?UTF-8?q?Update=20the=20cane=20supply=20main=20and?= =?UTF-8?q?=20util=20script=20(with=20the=202=CF=83=20gap=20score)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- r_app/80_calculate_kpis.R | 614 ++------------------------- r_app/80_utils_cane_supply.R | 797 +++++++++++++++++++++++++++++------ 2 files changed, 709 insertions(+), 702 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index c0f5a2b..a825434 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -316,6 +316,9 @@ main <- function() { # Use centralized paths from setup object (no need for file.path calls) weekly_mosaic <- setup$weekly_mosaic_dir daily_vals_dir <- setup$daily_ci_vals_dir + reports_dir <- setup$kpi_reports_dir + data_dir <- setup$data_dir + tryCatch({ source(here("r_app", "30_growth_model_utils.R")) @@ -389,599 +392,62 @@ main <- function() { message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message(strrep("=", 70)) - # Set reports_dir for CANE_SUPPLY workflow (used by export functions) - reports_dir <- setup$kpi_reports_dir - data_dir <- setup$data_dir - - # Continue with existing per-field analysis code below - - message("\n", strrep("-", 70)) - message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") - message(strrep("-", 70)) - - weeks <- calculate_week_numbers(end_date) - current_week <- weeks$current_week - current_year <- weeks$current_year - previous_week <- weeks$previous_week - previous_year <- weeks$previous_year - - message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year)) - # Find per-field weekly mosaics - message("Finding per-field weekly mosaics...") + # Define variables needed for workflow functions + reports_dir <- setup$kpi_reports_dir + data_dir <- setup$data_dir + - if (!dir.exists(weekly_mosaic)) { - stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, - "\nScript 40 (mosaic creation) must be run first.")) - } - - field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) - field_dirs <- field_dirs[field_dirs != ""] - - if (length(field_dirs) == 0) { - stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, - "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) - } - - # Verify we have mosaics for this week - single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) - per_field_files <- c() - for (field in field_dirs) { - field_mosaic_dir <- file.path(weekly_mosaic, field) - files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) - if (length(files) > 0) { - per_field_files <- c(per_field_files, files) - } - } - - if (length(per_field_files) == 0) { - stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year, - "\nExpected pattern:", single_file_pattern, - "\nChecked:", weekly_mosaic)) - } - - message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) - - mosaic_mode <- "per-field" - mosaic_dir <- weekly_mosaic - - - - # Load field boundaries + # Load field boundaries for workflow (use data_dir from setup) + message("\nLoading field boundaries for KPI calculation...") tryCatch({ - boundaries_result <- load_field_boundaries(data_dir) - + boundaries_result <- load_field_boundaries(setup$data_dir) + if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) { field_boundaries_sf <- boundaries_result$field_boundaries_sf } else { field_boundaries_sf <- boundaries_result } - + if (nrow(field_boundaries_sf) == 0) { stop("No fields loaded from boundaries") } - - message(paste(" Loaded", nrow(field_boundaries_sf), "fields")) + + message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields")) }, error = function(e) { stop("ERROR loading field boundaries: ", e$message) }) + + # Load harvesting data + if (!exists("harvesting_data")) { + warning("harvesting_data not loaded. TCH KPI will use placeholder values.") + harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) + } + + # Extract current week/year from end_date + current_week <- as.numeric(format(end_date, "%V")) + current_year <- as.numeric(format(end_date, "%G")) + - message("Loading historical field data for trend calculations...") - # Load up to 8 weeks (max of 4-week and 8-week trend requirements) - # Function gracefully handles missing weeks and loads whatever exists - num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8 - message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data...")) - - # Only auto-generate on first call (not in recursive calls from within load_historical_field_data) - allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) - - historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir, - num_weeks = num_weeks_to_load, - auto_generate = allow_auto_gen, - field_boundaries_sf = field_boundaries_sf, - daily_vals_dir = daily_vals_dir) - - # Load harvest.xlsx for planting dates (season_start) - message("\nLoading harvest data from harvest.xlsx for planting dates...") - harvest_file_path <- file.path(data_dir, "harvest.xlsx") - - harvesting_data <- tryCatch({ - if (file.exists(harvest_file_path)) { - harvest_raw <- readxl::read_excel(harvest_file_path) - harvest_raw$season_start <- as.Date(harvest_raw$season_start) - harvest_raw$season_end <- as.Date(harvest_raw$season_end) - message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) - harvest_raw - } else { - message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) - NULL - } - }, error = function(e) { - message(paste(" ERROR loading harvest.xlsx:", e$message)) - NULL - }) - - planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) - - # Validate planting_dates - if (is.null(planting_dates) || nrow(planting_dates) == 0) { - message("WARNING: No planting dates available. Using NA for all fields.") - planting_dates <- data.frame( - field_id = field_boundaries_sf$field, - planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), - stringsAsFactors = FALSE - ) - } - # Build per-field configuration - message("\nPreparing mosaic configuration for statistics calculation...") - message(" ✓ Using per-field mosaic architecture (1 TIFF per field)") - - # Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif - field_grid <- list( - mosaic_dir = mosaic_dir, - mode = "per-field" - ) - - message("\nUsing modular RDS-based approach for weekly statistics...") - - # Load/calculate CURRENT week stats (from tiles or RDS cache) - message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...") - current_stats <- load_or_calculate_weekly_stats( - week_num = current_week, - year = current_year, + # Call the main orchestrator function from kpi_calculation_utils.R + workflow_results <- calculate_field_analysis_cane_supply( + setup = setup, + client_config = client_config, + end_date = end_date, project_dir = project_dir, + weekly_mosaic = weekly_mosaic, + daily_vals_dir = daily_vals_dir, field_boundaries_sf = field_boundaries_sf, - mosaic_dir = field_grid$mosaic_dir, - reports_dir = reports_dir, - report_date = end_date + data_dir = data_dir ) - - message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week")) - - # Load/calculate PREVIOUS week stats (from RDS cache or tiles) - message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") - - # Calculate report date for previous week (7 days before current) - prev_report_date <- end_date - 7 - - prev_stats <- load_or_calculate_weekly_stats( - week_num = previous_week, - year = previous_year, - project_dir = project_dir, - field_boundaries_sf = field_boundaries_sf, - mosaic_dir = field_grid$mosaic_dir, - reports_dir = reports_dir, - report_date = prev_report_date - ) - - message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) - message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", "))) - message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV)))) - - # Apply trend calculations (requires both weeks) - message("\n3. Calculating trend columns...") - current_stats <- calculate_kpi_trends(current_stats, prev_stats, - project_dir = project_dir, - reports_dir = reports_dir, - current_week = current_week, - year = current_year) - - message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) - - # Load weekly harvest probabilities from script 31 (if available) - # Note: Script 31 saves to reports/kpis/field_stats/ (not field_level) - message("\n4. Loading harvest probabilities from script 31...") - harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") - harvest_prob_file <- file.path(harvest_prob_dir, - sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) - message(paste(" Looking for:", harvest_prob_file)) - - imminent_prob_data <- tryCatch({ - if (file.exists(harvest_prob_file)) { - prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE, - col_types = readr::cols(.default = readr::col_character())) - message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) - prob_df %>% - select(field, imminent_prob, detected_prob) %>% - rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) - } else { - message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) - NULL - } - }, error = function(e) { - message(paste(" WARNING: Could not load harvest probabilities:", e$message)) - NULL - }) - - # ============================================================================ - # CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R) - # ============================================================================ - - message("\nCalculating gap filling scores (2σ method)...") - - # Process per-field mosaics - message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) - - field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) - - process_gap_for_field <- function(field_file) { - field_id <- basename(dirname(field_file)) - field_bounds <- field_boundaries_by_id[[field_id]] - - if (is.null(field_bounds) || nrow(field_bounds) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - - tryCatch({ - field_raster <- terra::rast(field_file) - ci_band_name <- "CI" - if (!(ci_band_name %in% names(field_raster))) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - field_ci_band <- field_raster[[ci_band_name]] - names(field_ci_band) <- "CI" - - gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) - - if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - - gap_scores <- gap_result$field_results - gap_scores$Field_id <- gap_scores$field - gap_scores <- gap_scores[, c("Field_id", "gap_score")] - - stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) - }, error = function(e) { - message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) - data.frame(Field_id = field_id, gap_score = NA_real_) - }) - } - - # Process fields sequentially with progress bar - message(" Processing gap scores for ", length(per_field_files), " fields...") - pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) - results_list <- lapply(seq_along(per_field_files), function(idx) { - result <- process_gap_for_field(per_field_files[[idx]]) - utils::setTxtProgressBar(pb, idx) - result - }) - close(pb) - - gap_scores_df <- dplyr::bind_rows(results_list) - - if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { - gap_scores_df <- gap_scores_df %>% - dplyr::group_by(Field_id) %>% - dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") - - message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) - message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) - } else { - message(" WARNING: No gap scores calculated from per-field mosaics") - gap_scores_df <- NULL - } - - # ============================================================================ - # Build final output dataframe with all 22 columns (including Gap_score) - # ============================================================================ - - message("\nBuilding final field analysis output...") - - # Pre-calculate acreages with geometry validation - # This avoids geometry errors during field_analysis construction - acreage_lookup <- tryCatch({ - lookup_df <- field_boundaries_sf %>% - sf::st_drop_geometry() %>% - as.data.frame() %>% - mutate( - geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { - tryCatch({ - sf::st_is_valid(field_boundaries_sf[idx, ]) - }, error = function(e) FALSE) - }), - area_ha = 0 - ) + # Extract results + field_analysis_df <- workflow_results$field_analysis_df + farm_kpi_results <- workflow_results$farm_kpi_results + export_paths <- workflow_results$export_paths - # Calculate area for valid geometries - for (idx in which(lookup_df$geometry_valid)) { - tryCatch({ - area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) - lookup_df$area_ha[idx] <- area_m2 / 10000 - }, error = function(e) { - lookup_df$area_ha[idx] <<- NA_real_ - }) - } - - # Convert hectares to acres - lookup_df %>% - mutate(acreage = area_ha / 0.404686) %>% - select(field, acreage) - }, error = function(e) { - message(paste("Warning: Could not calculate acreages from geometries -", e$message)) - data.frame(field = character(0), acreage = numeric(0)) - }) - - field_analysis_df <- current_stats %>% - mutate( - # Column 2: Farm_Section (user fills) - Farm_Section = NA_character_, - # Column 3: Field_name (from GeoJSON - already have Field_id, can look up) - Field_name = Field_id, - # Column 4: Acreage (calculate from geometry) - Acreage = { - acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] - if_else(is.na(acreages_vec), 0, acreages_vec) - }, - # Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change) - # Column 7: Four_week_trend (from current_stats) - # Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start) - Last_harvest_or_planting_date = { - planting_dates$planting_date[match(Field_id, planting_dates$field_id)] - }, - # Column 9: Age_week (calculated from report date and planting date) - Age_week = { - sapply(seq_len(nrow(current_stats)), function(idx) { - planting_dt <- Last_harvest_or_planting_date[idx] - if (is.na(planting_dt)) { - return(NA_real_) - } - round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0) - }) - }, - # Column 10: Phase (recalculate based on updated Age_week) - Phase = { - sapply(Age_week, function(age) { - if (is.na(age)) return(NA_character_) - if (age >= 0 & age < 4) return("Germination") - if (age >= 4 & age < 17) return("Tillering") - if (age >= 17 & age < 39) return("Grand Growth") - if (age >= 39) return("Maturation") - NA_character_ - }) - }, - # Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends) - # Column 12: Germination_progress (calculated here from CI values) - # Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% - Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) { - if (is.na(pct)) return(NA_character_) - if (pct >= 95) return("95-100%") - else if (pct >= 90) return("90-95%") - else if (pct >= 80) return("80-90%") - else if (pct >= 70) return("70-80%") - else if (pct >= 60) return("60-70%") - else if (pct >= 50) return("50-60%") - else if (pct >= 40) return("40-50%") - else if (pct >= 30) return("30-40%") - else if (pct >= 20) return("20-30%") - else if (pct >= 10) return("10-20%") - else return("0-10%") - }), - # Column 13: Imminent_prob (from script 31 or NA if not available) - Imminent_prob = { - if (!is.null(imminent_prob_data)) { - imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] - } else { - rep(NA_real_, nrow(current_stats)) - } - }, - # Column 14: Status_Alert (based on harvest probability + crop health status) - # Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA - Status_Alert = { - sapply(seq_len(nrow(current_stats)), function(idx) { - imminent_prob <- Imminent_prob[idx] - age_w <- Age_week[idx] - weekly_ci_chg <- Weekly_ci_change[idx] - mean_ci_val <- Mean_CI[idx] - - # Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months) - if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) { - return("Ready for harvest-check") - } - - # Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5) - if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) { - return("Strong decline in crop health") - } - - # Priority 3: Harvested/bare (Mean CI < 1.5) - if (!is.na(mean_ci_val) && mean_ci_val < 1.5) { - return("Harvested/bare") - } - - # Fallback: no alert - NA_character_ - }) - }, - # Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles) - # Column 17: Already in current_stats (CV) - # Column 18: Already in current_stats (CV_Trend_Short_Term) - # Column 19: CV_Trend_Long_Term (from current_stats - raw slope value) - # Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope) - # 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01) - CV_Trend_Long_Term_Category = { - sapply(current_stats$CV_Trend_Long_Term, function(slope) { - if (is.na(slope)) { - return(NA_character_) - } else if (slope < -0.01) { - return("More uniform") - } else if (slope > 0.01) { - return("Less uniform") - } else { - return("Stable uniformity") - } - }) - }, - # Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category) - # Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100% - Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) { - if (is.na(pct)) return(NA_character_) - if (pct >= 95) return("95-100%") - else if (pct >= 90) return("90-95%") - else if (pct >= 80) return("80-90%") - else if (pct >= 70) return("70-80%") - else if (pct >= 60) return("60-70%") - else if (pct >= 50) return("50-60%") - else if (pct >= 40) return("40-50%") - else if (pct >= 30) return("30-40%") - else if (pct >= 20) return("20-30%") - else if (pct >= 10) return("10-20%") - else return("0-10%") - }), - # Column 22: Gap_score (2σ below median - from kpi_utils.R) - Gap_score = { - if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { - # Debug: Print first few gap scores - message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)", - nrow(gap_scores_df), - paste(head(gap_scores_df$gap_score, 3), collapse=", "))) - message(sprintf(" First 3 Field_ids in gap_scores_df: %s", - paste(head(gap_scores_df$Field_id, 3), collapse=", "))) - message(sprintf(" First 3 Field_ids in current_stats: %s", - paste(head(current_stats$Field_id, 3), collapse=", "))) - - gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] - } else { - rep(NA_real_, nrow(current_stats)) - } - } - ) %>% - select( - all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", - "Last_harvest_or_planting_date", "Age_week", "Phase", - "Germination_progress", - "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", - "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", - "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) - ) - - message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)")) - - export_paths <- export_field_analysis_excel( - field_analysis_df, - NULL, - project_dir, - current_week, - current_year, - reports_dir - ) - - cat("\n--- Per-field Results (first 10) ---\n") - available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") - available_cols <- available_cols[available_cols %in% names(field_analysis_df)] - if (length(available_cols) > 0) { - print(head(field_analysis_df[, available_cols], 10)) - } - - # ========== FARM-LEVEL KPI AGGREGATION ========== - # Aggregate the per-field analysis into farm-level summary statistics - - cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") - - # Filter to only fields that have actual data (non-NA CI and valid acreage) - field_data <- field_analysis_df %>% - filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% - filter(Acreage > 0) - - if (nrow(field_data) > 0) { - - if (nrow(field_data) > 0) { - # Create summary statistics - farm_summary <- list() - - # 1. PHASE DISTRIBUTION - phase_dist <- field_data %>% - group_by(Phase) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Phase) - - farm_summary$phase_distribution <- phase_dist - - # 2. STATUS ALERT DISTRIBUTION - status_dist <- field_data %>% - group_by(Status_Alert) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Status_Alert) - - farm_summary$status_distribution <- status_dist - - # 3. CLOUD COVERAGE DISTRIBUTION - cloud_dist <- field_data %>% - group_by(Cloud_category) %>% - summarise( - num_fields = n(), - acreage = sum(Acreage, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename(Category = Cloud_category) - - farm_summary$cloud_distribution <- cloud_dist - - # 4. OVERALL STATISTICS - farm_summary$overall_stats <- data.frame( - total_fields = nrow(field_data), - total_acreage = sum(field_data$Acreage, na.rm = TRUE), - mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), - median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), - mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4), - week = current_week, - year = current_year, - date = as.character(end_date) - ) - - # Print summaries - cat("\n--- PHASE DISTRIBUTION ---\n") - print(phase_dist) - - cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") - print(status_dist) - - cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") - print(cloud_dist) - - cat("\n--- OVERALL FARM STATISTICS ---\n") - print(farm_summary$overall_stats) - - farm_kpi_results <- farm_summary - } else { - farm_kpi_results <- NULL - } - } else { - farm_kpi_results <- NULL - } - - # ========== FINAL SUMMARY ========== - - cat("\n", strrep("=", 70), "\n") - cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n") - cat(strrep("=", 70), "\n") - cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n") - cat("Excel export:", export_paths$excel, "\n") - cat("RDS export:", export_paths$rds, "\n") - cat("CSV export:", export_paths$csv, "\n") - - if (!is.null(farm_kpi_results)) { - cat("\nFarm-level KPIs: CALCULATED\n") - } else { - cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n") - } - - cat("\n✓ Consolidated KPI calculation complete!\n") - cat(" - Per-field data exported\n") - cat(" - Farm-level KPIs calculated\n") - cat(" - All outputs in:", reports_dir, "\n\n") - - } else { - # Unknown client type - log warning and exit - warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) + } else { + # Unknown client type - log warning and exit + warning(sprintf("Unknown client type: %s - no workflow matched", client_type)) cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "") cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n") cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n") diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index df6e319..bd06aa8 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -31,169 +31,710 @@ library(writexl) # ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section) # ============================================================================ -#' Placeholder: ANGATA harvest readiness assessment +#' Calculate acreage for each field from geometry +#' @param field_boundaries_sf sf object with field geometries +#' @return data.frame with field and acreage columns +calculate_field_acreages <- function(field_boundaries_sf) { + tryCatch({ + lookup_df <- field_boundaries_sf %>% + sf::st_drop_geometry() %>% + as.data.frame() %>% + mutate( + geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { + tryCatch({ + sf::st_is_valid(field_boundaries_sf[idx, ]) + }, error = function(e) FALSE) + }), + area_ha = 0 + ) + + # Calculate area for valid geometries + for (idx in which(lookup_df$geometry_valid)) { + tryCatch({ + area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) + lookup_df$area_ha[idx] <- area_m2 / 10000 + }, error = function(e) { + lookup_df$area_ha[idx] <<- NA_real_ + }) + } + + # Convert hectares to acres + lookup_df %>% + mutate(acreage = area_ha / 0.404686) %>% + select(field, acreage) + }, error = function(e) { + message(paste("Warning: Could not calculate acreages from geometries -", e$message)) + data.frame(field = character(0), acreage = numeric(0)) + }) +} + +#' Calculate age in weeks from planting date #' -#' Future implementation will integrate ANGATA-specific harvest readiness criteria: -#' - Maturation phase detection (CI threshold-based) -#' - Field age tracking (days since planting) -#' - Weather-based ripeness indicators (if available) -#' - Historical yield correlations -#' -#' @param field_ci CI values for the field -#' @param field_age_days Days since planting -#' -#' @return Character string with harvest readiness assessment -assess_harvest_readiness <- function(field_ci, field_age_days = NULL) { - # Placeholder implementation - # Real version would check: - # - Mean CI > 3.5 (maturation threshold) - # - Age > 350 days - # - Weekly growth rate < threshold (mature plateau) - - if (is.null(field_ci) || all(is.na(field_ci))) { - return("No data available") +#' @param planting_date Date of planting +#' @param reference_date Date to calculate age relative to (typically end_date) +#' @return Numeric age in weeks (rounded to nearest week) +calculate_age_week <- function(planting_date, reference_date) { + if (is.na(planting_date)) { + return(NA_real_) } - - mean_ci <- mean(field_ci, na.rm = TRUE) - - if (mean_ci > 3.5) { - return("Ready for harvest") - } else if (mean_ci > 2.5) { - return("Approaching harvest readiness") + round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0) +} + +#' Assign crop phase based on age in weeks +#' +#' @param age_week Numeric age in weeks +#' @return Character phase name +calculate_phase <- function(age_week) { + if (is.na(age_week)) return(NA_character_) + if (age_week >= 0 & age_week < 4) return("Germination") + if (age_week >= 4 & age_week < 17) return("Tillering") + if (age_week >= 17 & age_week < 39) return("Grand Growth") + if (age_week >= 39) return("Maturation") + NA_character_ +} + +#' Bin percentage into 10% intervals with special handling for 90-100% +#' +#' @param pct Numeric percentage value (0-100) +#' @return Character bin label +bin_percentage <- function(pct) { + if (is.na(pct)) return(NA_character_) + if (pct >= 95) return("95-100%") + else if (pct >= 90) return("90-95%") + else if (pct >= 80) return("80-90%") + else if (pct >= 70) return("70-80%") + else if (pct >= 60) return("60-70%") + else if (pct >= 50) return("50-60%") + else if (pct >= 40) return("40-50%") + else if (pct >= 30) return("30-40%") + else if (pct >= 20) return("20-30%") + else if (pct >= 10) return("10-20%") + else return("0-10%") +} + +#' Calculate germination progress from CI threshold percentage +#' +#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2 +#' @return Character bin label +calculate_germination_progress <- function(pct_pixels_ci_gte_2) { + bin_percentage(pct_pixels_ci_gte_2) +} + +#' Categorize CV trend (long-term slope) into qualitative labels +#' +#' @param cv_slope Numeric slope from CV trend analysis +#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform" +categorize_cv_trend_long_term <- function(cv_slope) { + if (is.na(cv_slope)) { + return(NA_character_) + } else if (cv_slope < -0.01) { + return("More uniform") + } else if (cv_slope > 0.01) { + return("Less uniform") } else { - return("Not ready - continue monitoring") + return("Stable uniformity") } } -#' Placeholder: ANGATA supply chain status flags -#' -#' Future implementation will add supply chain-specific status indicators: -#' - Harvest scheduling readiness -#' - Equipment availability impact -#' - Transportation/logistics flags -#' - Quality parameter flags -#' -#' @param field_analysis Data frame with field analysis results -#' -#' @return Data frame with supply chain status columns -assess_supply_chain_status <- function(field_analysis) { - # Placeholder: return field analysis as-is - # Real version would add columns for: - # - schedule_ready (bool) - # - harvest_window_days (numeric) - # - transportation_priority (char) - # - quality_flags (char) +#' Determine status alert based on harvest probability and crop health +#' Priority order: +#' 1. Ready for harvest-check (imminent + mature ≥12 months) +#' 2. Strong decline in crop health (drop ≥2 points but still >1.5) +#' 3. Harvested/bare (Mean CI < 1.5) +#' @param imminent_prob Numeric harvest probability +#' @param age_week Numeric age in weeks +#' @param weekly_ci_change Numeric weekly CI change +#' @param mean_ci Numeric mean CI value +#' @return Character status alert or NA +calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, mean_ci) { + # Priority 1: Ready for harvest-check + if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) { + return("Ready for harvest-check") + } - return(field_analysis) + # Priority 2: Strong decline + if (!is.na(weekly_ci_change) && weekly_ci_change <= -2.0 && !is.na(mean_ci) && mean_ci > 1.5) { + return("Strong decline in crop health") + } + + # Priority 3: Harvested/bare + if (!is.na(mean_ci) && mean_ci < 1.5) { + return("Harvested/bare") + } + + # Fallback: no alert + NA_character_ } +#' Calculate Gap Filling Score KPI (2σ method) +#' @param ci_raster Current week CI raster +#' @param field_boundaries Field boundaries +#' @return Data frame with field-level gap filling scores +calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { + safe_log("Calculating Gap Filling Score KPI (placeholder)") + + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions + n_fields_vect <- length(field_boundaries_vect) + n_fields_sf <- nrow(field_boundaries) + + if (n_fields_sf != n_fields_vect) { + warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) + } + + field_results <- data.frame() + + for (i in seq_len(nrow(field_boundaries))) { + field_name <- field_boundaries$field[i] + sub_field_name <- field_boundaries$sub_field[i] + field_vect <- field_boundaries_vect[i] + + # Extract CI values using helper function + ci_values <- extract_ci_values(ci_raster, field_vect) + valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] + + if (length(valid_values) > 1) { + # Gap score using 2σ below median to detect outliers + median_ci <- median(valid_values) + sd_ci <- sd(valid_values) + outlier_threshold <- median_ci - (2 * sd_ci) + low_ci_pixels <- sum(valid_values < outlier_threshold) + total_pixels <- length(valid_values) + gap_score <- (low_ci_pixels / total_pixels) * 100 + + # Classify gap severity + gap_level <- dplyr::case_when( + gap_score < 10 ~ "Minimal", + gap_score < 25 ~ "Moderate", + TRUE ~ "Significant" + ) + + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = gap_level, + gap_score = gap_score, + mean_ci = mean(valid_values), + outlier_threshold = outlier_threshold + )) + } else { + # Not enough valid data, fill with NA row + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = NA_character_, + gap_score = NA_real_, + mean_ci = NA_real_, + outlier_threshold = NA_real_ + )) + } + } + return(list(field_results = field_results)) +} + + +#' Calculate gap filling scores for all per-field mosaics +#' This is a wrapper function that processes multiple per-field mosaic files +#' and calculates gap scores for each field. +#' @param per_field_files Character vector of paths to per-field mosaic TIFFs +#' @param field_boundaries_sf sf object with field geometries +#' @return data.frame with Field_id and gap_score columns +calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { + message("\nCalculating gap filling scores (2σ method)...") + message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) + + field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) + + process_gap_for_field <- function(field_file) { + field_id <- basename(dirname(field_file)) + field_bounds <- field_boundaries_by_id[[field_id]] + + if (is.null(field_bounds) || nrow(field_bounds) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + tryCatch({ + field_raster <- terra::rast(field_file) + ci_band_name <- "CI" + if (!(ci_band_name %in% names(field_raster))) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + field_ci_band <- field_raster[[ci_band_name]] + names(field_ci_band) <- "CI" + + gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) + + if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + gap_scores <- gap_result$field_results + gap_scores$Field_id <- gap_scores$field + gap_scores <- gap_scores[, c("Field_id", "gap_score")] + + stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) + }, error = function(e) { + message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) + data.frame(Field_id = field_id, gap_score = NA_real_) + }) + } + + # Process fields sequentially with progress bar + message(" Processing gap scores for ", length(per_field_files), " fields...") + pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) + + results_list <- lapply(seq_along(per_field_files), function(idx) { + result <- process_gap_for_field(per_field_files[[idx]]) + utils::setTxtProgressBar(pb, idx) + result + }) + close(pb) + + gap_scores_df <- dplyr::bind_rows(results_list) + + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + gap_scores_df <- gap_scores_df %>% + dplyr::group_by(Field_id) %>% + dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") + + message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) + message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", + round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) + } else { + message(" WARNING: No gap scores calculated from per-field mosaics") + gap_scores_df <- NULL + } + + return(gap_scores_df) +} + +#' Build complete per-field KPI dataframe with all 22 columns +#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats +#' @param planting_dates data.frame with field_id and planting_date columns +#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL) +#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL) +#' @param field_boundaries_sf sf object with field geometries +#' @param end_date Date object for current report date +#' @return data.frame with all 22 KPI columns +calculate_all_field_kpis <- function(current_stats, + planting_dates, + imminent_prob_data, + gap_scores_df, + field_boundaries_sf, + end_date) { + + message("\nBuilding final field analysis output...") + + # Pre-calculate acreages + acreage_lookup <- calculate_field_acreages(field_boundaries_sf) + + field_analysis_df <- current_stats %>% + mutate( + # Column 2: Farm_Section (user fills manually) + Farm_Section = NA_character_, + + # Column 3: Field_name (from GeoJSON) + Field_name = Field_id, + + # Column 4: Acreage (from geometry) + Acreage = { + acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)] + if_else(is.na(acreages_vec), 0, acreages_vec) + }, + + # Column 8: Last_harvest_or_planting_date (from harvest.xlsx) + Last_harvest_or_planting_date = { + planting_dates$planting_date[match(Field_id, planting_dates$field_id)] + }, + + # Column 9: Age_week (calculated) + Age_week = { + sapply(seq_len(nrow(current_stats)), function(idx) { + calculate_age_week(Last_harvest_or_planting_date[idx], end_date) + }) + }, + + # Column 10: Phase (based on Age_week) + Phase = sapply(Age_week, calculate_phase), + + # Column 12: Germination_progress (binned Pct_pixels_CI_gte_2) + Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress), + + # Column 13: Imminent_prob (from script 31 or NA) + Imminent_prob = { + if (!is.null(imminent_prob_data)) { + imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] + } else { + rep(NA_real_, nrow(current_stats)) + } + }, + + # Column 14: Status_Alert (multi-priority logic) + Status_Alert = { + sapply(seq_len(nrow(current_stats)), function(idx) { + calculate_status_alert( + Imminent_prob[idx], + Age_week[idx], + Weekly_ci_change[idx], + Mean_CI[idx] + ) + }) + }, + + # Column 19b: CV_Trend_Long_Term_Category (categorical slope) + CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term), + + # Column 21: Cloud_pct_clear (binned into intervals) + Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage), + + # Column 22: Gap_score (2σ method) + Gap_score = { + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)] + } else { + rep(NA_real_, nrow(current_stats)) + } + } + ) %>% + select( + all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert", + "Last_harvest_or_planting_date", "Age_week", "Phase", + "Germination_progress", + "Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles", + "CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category", + "Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score")) + ) + + message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns")) + + return(field_analysis_df) +} + +#' Aggregate per-field data into farm-level KPI summary +#' +#' @param field_analysis_df data.frame with per-field KPI data +#' @param current_week Numeric current week number +#' @param current_year Numeric current year +#' @param end_date Date object for current report date +#' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats +calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) { + + cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n") + + # Filter to only fields with actual data + field_data <- field_analysis_df %>% + filter(!is.na(Mean_CI) & !is.na(Acreage)) %>% + filter(Acreage > 0) + + if (nrow(field_data) == 0) { + message("No valid field data for farm-level aggregation") + return(NULL) + } + + farm_summary <- list() + + # 1. PHASE DISTRIBUTION + phase_dist <- field_data %>% + group_by(Phase) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Phase) + + farm_summary$phase_distribution <- phase_dist + + # 2. STATUS ALERT DISTRIBUTION + status_dist <- field_data %>% + group_by(Status_Alert) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Status_Alert) + + farm_summary$status_distribution <- status_dist + + # 3. CLOUD COVERAGE DISTRIBUTION + cloud_dist <- field_data %>% + group_by(Cloud_category) %>% + summarise( + num_fields = n(), + acreage = sum(Acreage, na.rm = TRUE), + .groups = 'drop' + ) %>% + rename(Category = Cloud_category) + + farm_summary$cloud_distribution <- cloud_dist + + # 4. OVERALL STATISTICS + farm_summary$overall_stats <- data.frame( + total_fields = nrow(field_data), + total_acreage = sum(field_data$Acreage, na.rm = TRUE), + mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2), + median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2), + mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4), + week = current_week, + year = current_year, + date = as.character(end_date) + ) + + # Print summaries + cat("\n--- PHASE DISTRIBUTION ---\n") + print(phase_dist) + + cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n") + print(status_dist) + + cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n") + print(cloud_dist) + + cat("\n--- OVERALL FARM STATISTICS ---\n") + print(farm_summary$overall_stats) + + return(farm_summary) +} + + # ============================================================================ # ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS # ============================================================================ -#' Orchestrate ANGATA weekly field analysis and reporting +#' Main orchestrator for CANE_SUPPLY per-field KPI workflow #' -#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows. -#' Currently uses common utilities; future versions will add client-specific logic. +#' This function coordinates all KPI calculations for the per-field analysis workflow. +#' It loads historical data, calculates current/previous week statistics, computes +#' all 22 KPI columns, and aggregates farm-level summaries. #' -#' @param field_boundaries_sf SF object with field geometries -#' @param current_week ISO week number (1-53) -#' @param current_year ISO week year -#' @param mosaic_dir Directory containing weekly mosaics -#' @param field_boundaries_path Path to field GeoJSON -#' @param harvesting_data Data frame with harvest data (optional) -#' @param output_dir Directory for exports -#' @param data_dir Base data directory -#' -#' @return List with field analysis results -#' -#' @details -#' This function: -#' 1. Loads weekly mosaic and extracts field statistics -#' 2. Calculates field statistics (using common utilities) -#' 3. Prepares field analysis summary -#' 4. Exports to Excel/CSV/RDS -#' 5. (Future) Applies ANGATA-specific assessments -#' -calculate_field_analysis_cane_supply <- function( - field_boundaries_sf, - current_week, - current_year, - mosaic_dir, - field_boundaries_path = NULL, - harvesting_data = NULL, - output_dir = file.path(PROJECT_DIR, "output"), - data_dir = NULL -) { +#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.) +#' @param client_config List with workflow configuration (script_91_compatible, outputs) +#' @param end_date Date object for current report date +#' @param project_dir Character project identifier +#' @param weekly_mosaic Character path to weekly mosaic directory +#' @param daily_vals_dir Character path to daily values directory +#' @param field_boundaries_sf sf object with field geometries +#' @param data_dir Character path to data directory +#' @return List with field_analysis_df, farm_kpi_results, export_paths +calculate_field_analysis_cane_supply <- function(setup, + client_config, + end_date, + project_dir, + weekly_mosaic, + daily_vals_dir, + field_boundaries_sf, + data_dir) { - message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============") + message("\n", strrep("=", 70)) + message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") + message(strrep("=", 70)) - # Load current week mosaic - message("Loading current week mosaic...") - current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year) + reports_dir <- setup$kpi_reports_dir - if (is.null(current_mosaic)) { - warning(paste("Could not load current week mosaic for week", current_week, current_year)) - return(NULL) + # ========== PHASE 1: WEEKLY ANALYSIS SETUP ========== + message("\n", strrep("-", 70)) + message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ") + message(strrep("-", 70)) + + weeks <- calculate_week_numbers(end_date) + current_week <- weeks$current_week + current_year <- weeks$current_year + previous_week <- weeks$previous_week + previous_year <- weeks$previous_year + + message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year)) + + # Find per-field weekly mosaics + message("Finding per-field weekly mosaics...") + + if (!dir.exists(weekly_mosaic)) { + stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic, + "\nScript 40 (mosaic creation) must be run first.")) } - # Extract field statistics - message("Extracting field statistics from current mosaic...") - field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] - # Load previous week stats for comparison - message("Loading historical data for trends...") - target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) - previous_stats <- NULL - - previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year) - if (!is.null(previous_mosaic)) { - previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + if (length(field_dirs) == 0) { + stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic, + "\nScript 40 must create weekly_mosaic/{FIELD}/ structure.")) } - # Calculate 4-week historical trend - message("Calculating field trends...") - ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds") + # Verify we have mosaics for this week + single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year) + per_field_files <- c() + for (field in field_dirs) { + field_mosaic_dir <- file.path(weekly_mosaic, field) + files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE) + if (length(files) > 0) { + per_field_files <- c(per_field_files, files) + } + } - field_analysis <- calculate_field_statistics( - field_stats = field_stats, - previous_stats = previous_stats, + if (length(per_field_files) == 0) { + stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year, + "\nExpected pattern:", single_file_pattern, + "\nChecked:", weekly_mosaic)) + } + + message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics")) + + # ========== PHASE 2: LOAD HISTORICAL DATA ========== + message("\nLoading historical field data for trend calculations...") + num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) + message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data...")) + + allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv) + + historical_data <- load_historical_field_data( + project_dir, current_week, current_year, reports_dir, + num_weeks = num_weeks_to_load, + auto_generate = allow_auto_gen, + field_boundaries_sf = field_boundaries_sf, + daily_vals_dir = daily_vals_dir + ) + + # ========== PHASE 3: LOAD PLANTING DATES ========== + message("\nLoading harvest data from harvest.xlsx for planting dates...") + harvest_file_path <- file.path(data_dir, "harvest.xlsx") + + harvesting_data <- tryCatch({ + if (file.exists(harvest_file_path)) { + harvest_raw <- readxl::read_excel(harvest_file_path) + harvest_raw$season_start <- as.Date(harvest_raw$season_start) + harvest_raw$season_end <- as.Date(harvest_raw$season_end) + message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) + harvest_raw + } else { + message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) + NULL + } + }, error = function(e) { + message(paste(" ERROR loading harvest.xlsx:", e$message)) + NULL + }) + + planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) + + if (is.null(planting_dates) || nrow(planting_dates) == 0) { + message("WARNING: No planting dates available. Using NA for all fields.") + planting_dates <- data.frame( + field_id = field_boundaries_sf$field, + planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)), + stringsAsFactors = FALSE + ) + } + + # ========== PHASE 4: CALCULATE WEEKLY STATISTICS ========== + message("\nUsing modular RDS-based approach for weekly statistics...") + + # Current week + message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...") + current_stats <- load_or_calculate_weekly_stats( week_num = current_week, year = current_year, - ci_rds_path = ci_rds_path, + project_dir = project_dir, field_boundaries_sf = field_boundaries_sf, - harvesting_data = harvesting_data + mosaic_dir = weekly_mosaic, + reports_dir = reports_dir, + report_date = end_date + ) + message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week")) + + # Previous week + message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...") + prev_report_date <- end_date - 7 + + prev_stats <- load_or_calculate_weekly_stats( + week_num = previous_week, + year = previous_year, + project_dir = project_dir, + field_boundaries_sf = field_boundaries_sf, + mosaic_dir = weekly_mosaic, + reports_dir = reports_dir, + report_date = prev_report_date + ) + message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week")) + + # ========== PHASE 5: CALCULATE TRENDS ========== + message("\n3. Calculating trend columns...") + current_stats <- calculate_kpi_trends( + current_stats, prev_stats, + project_dir = project_dir, + reports_dir = reports_dir, + current_week = current_week, + year = current_year + ) + message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed")) + + # ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== + message("\n4. Loading harvest probabilities from script 31...") + harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") + harvest_prob_file <- file.path(harvest_prob_dir, + sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) + message(paste(" Looking for:", harvest_prob_file)) + + imminent_prob_data <- tryCatch({ + if (file.exists(harvest_prob_file)) { + prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE, + col_types = readr::cols(.default = readr::col_character())) + message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields")) + prob_df %>% + select(field, imminent_prob, detected_prob) %>% + rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob) + } else { + message(paste(" INFO: Harvest probabilities not available (script 31 not run)")) + NULL + } + }, error = function(e) { + message(paste(" WARNING: Could not load harvest probabilities:", e$message)) + NULL + }) + + # ========== PHASE 7: CALCULATE GAP SCORES ========== + gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf) + + # ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ========== + field_analysis_df <- calculate_all_field_kpis( + current_stats = current_stats, + planting_dates = planting_dates, + imminent_prob_data = imminent_prob_data, + gap_scores_df = gap_scores_df, + field_boundaries_sf = field_boundaries_sf, + end_date = end_date ) - if (is.null(field_analysis)) { - message("Could not generate field analysis") - return(NULL) - } - - # Generate summary - message("Generating summary statistics...") - summary_df <- generate_field_analysis_summary(field_analysis) - - # Export - message("Exporting field analysis...") + # ========== PHASE 9: EXPORT PER-FIELD RESULTS ========== export_paths <- export_field_analysis_excel( - field_analysis, - summary_df, - PROJECT_DIR, + field_analysis_df, + NULL, + project_dir, current_week, current_year, - output_dir + reports_dir ) - message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n")) + # cat("\n--- Per-field Results (first 10) ---\n") + # available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category") + # available_cols <- available_cols[available_cols %in% names(field_analysis_df)] + # if (length(available_cols) > 0) { + # print(head(field_analysis_df[, available_cols], 10)) + # } - result <- list( - field_analysis = field_analysis, - summary = summary_df, - exports = export_paths - ) - - return(result) + # # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ========== + # farm_kpi_results <- calculate_farm_level_kpis( + # field_analysis_df, + # current_week, + # current_year, + # end_date + # ) } # ============================================================================ From 13015f6ec0c4dccb4b66eb9457869bef5605b4cc Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Thu, 12 Feb 2026 13:50:57 +0100 Subject: [PATCH 02/30] Fix the output path so excel, rds and csv are saved in the same folder --- r_app/80_utils_common.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 3c0f75f..1a6b989 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -709,8 +709,8 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre ) ) - rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds") - rds_path <- file.path(reports_dir, rds_filename) + rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds") + rds_path <- file.path(output_subdir, rds_filename) saveRDS(kpi_data, rds_path) message(paste("✓ Field analysis RDS exported to:", rds_path)) From 750db99a41249acc5bc47c2fd3b375f25384d358 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Thu, 12 Feb 2026 15:43:58 +0100 Subject: [PATCH 03/30] fixed the KPIs calculation for agronomic utils, fixed the path for saving the excel and rds, updated the main file --- r_app/80_calculate_kpis.R | 2 +- r_app/80_utils_agronomic_support.R | 509 +++++++++++------------------ 2 files changed, 195 insertions(+), 316 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index a825434..65496ce 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -370,7 +370,7 @@ main <- function() { current_year <- as.numeric(format(end_date, "%G")) # Call with correct signature - kpi_results <- calculate_all_kpis( + kpi_results <- calculate_all_field_analysis_agronomic_support( field_boundaries_sf = field_boundaries_sf, current_week = current_week, current_year = current_year, diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 421b35f..d649775 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -1,8 +1,8 @@ # 80_UTILS_AGRONOMIC_SUPPORT.R # ============================================================================ -# SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support) +# AURA-SPECIFIC KPI UTILITIES (SCRIPT 80 - CLIENT TYPE: agronomic_support) # -# Contains all 6 KPI calculation functions and helpers: +# Contains all 6 AURA KPI calculation functions and helpers: # - Field uniformity KPI (CV-based, spatial autocorrelation) # - Area change KPI (week-over-week CI changes) # - TCH forecasted KPI (tonnage projections from harvest data) @@ -12,7 +12,7 @@ # - KPI reporting (summary tables, field details, text interpretation) # - KPI export (Excel, RDS, data export) # -# Orchestrator: calculate_all_kpis() +# Orchestrator: calculate_all_field_analysis_agronomic_support() # Dependencies: 00_common_utils.R (safe_log), sourced from common # Used by: 80_calculate_kpis.R (when client_type == "agronomic_support") # ============================================================================ @@ -24,6 +24,8 @@ library(tidyr) library(readxl) library(writexl) library(spdep) +library(caret) +library(CAST) # ============================================================================ # SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R) @@ -65,7 +67,7 @@ prepare_predictions <- function(harvest_model, field_data, scenario = "optimisti } # ============================================================================ -# KPI CALCULATION FUNCTIONS (6 KPIS) +# AURA KPI CALCULATION FUNCTIONS (6 KPIS) # ============================================================================ #' KPI 1: Calculate field uniformity based on CV and spatial autocorrelation @@ -75,36 +77,52 @@ prepare_predictions <- function(harvest_model, field_data, scenario = "optimisti #' #' @param ci_pixels_by_field List of CI pixel arrays for each field #' @param field_boundaries_sf SF object with field geometries -#' @param ci_raster Raster object with CI values (for spatial autocorrelation) +#' @param ci_band Raster band with CI values #' #' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation -calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_raster = NULL) { - results_list <- list() +calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) { + result <- data.frame( + field_idx = integer(), + cv_value = numeric(), + morans_i = numeric(), + uniformity_score = numeric(), + interpretation = character(), + stringsAsFactors = FALSE + ) for (field_idx in seq_len(nrow(field_boundaries_sf))) { ci_pixels <- ci_pixels_by_field[[field_idx]] if (is.null(ci_pixels) || length(ci_pixels) == 0) { - results_list[[length(results_list) + 1]] <- list( + result <- rbind(result, data.frame( field_idx = field_idx, cv_value = NA_real_, morans_i = NA_real_, uniformity_score = NA_real_, - interpretation = "No data" - ) + interpretation = "No data", + stringsAsFactors = FALSE + )) next } cv_val <- calculate_cv(ci_pixels) morans_i <- NA_real_ - if (!is.null(ci_raster)) { - morans_result <- calculate_spatial_autocorrelation(ci_raster, field_boundaries_sf[field_idx, ]) - if (is.list(morans_result)) { - morans_i <- morans_result$morans_i - } else { - morans_i <- morans_result - } + if (!is.null(ci_band) && inherits(ci_band, "SpatRaster")) { + tryCatch({ + # Get single field geometry + single_field <- field_boundaries_sf[field_idx, ] + morans_result <- calculate_spatial_autocorrelation(ci_band, single_field) + + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + }, error = function(e) { + message(paste(" Warning: Spatial autocorrelation failed for field", field_idx, ":", e$message)) + morans_i <<- NA_real_ + }) } # Normalize CV (0-1 scale, invert so lower CV = higher score) @@ -135,18 +153,15 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ interpretation <- "Very poor uniformity" } - results_list[[length(results_list) + 1]] <- list( + result <- rbind(result, data.frame( field_idx = field_idx, cv_value = cv_val, morans_i = morans_i, uniformity_score = round(uniformity_score, 3), - interpretation = interpretation - ) - } - - # Convert accumulated list to data frame in a single operation - result <- do.call(rbind, lapply(results_list, as.data.frame)) - + interpretation = interpretation, + stringsAsFactors = FALSE + )) + } return(result) } @@ -214,12 +229,19 @@ calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NUL next } - if (is.na(result$mean_ci[i])) { - result$tch_forecasted[i] <- NA_real_ - result$tch_lower_bound[i] <- NA_real_ - result$tch_upper_bound[i] <- NA_real_ - result$confidence[i] <- "No data" - } + ci_val <- result$mean_ci[i] + + # Simple linear model + tch_est <- 50 + (ci_val * 10) + + # Confidence interval based on CI range + tch_lower <- tch_est * 0.85 + tch_upper <- tch_est * 1.15 + + result$tch_forecasted[i] <- round(tch_est, 2) + result$tch_lower_bound[i] <- round(tch_lower, 2) + result$tch_upper_bound[i] <- round(tch_upper, 2) + result$confidence[i] <- "Medium" } return(result) @@ -338,190 +360,107 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) { return(result) } -# #' Calculate Gap Filling Score KPI (placeholder) -# #' @param ci_raster Current week CI raster -# #' @param field_boundaries Field boundaries -# #' @return Data frame with field-level gap filling scores -# calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { -# # Handle both sf and SpatVector inputs -# if (!inherits(field_boundaries, "SpatVector")) { -# field_boundaries_vect <- terra::vect(field_boundaries) -# } else { -# field_boundaries_vect <- field_boundaries -# } +#' Calculate Gap Filling Score KPI (placeholder) +#' @param ci_raster Current week CI raster +#' @param field_boundaries Field boundaries +#' @return List with summary data frame and field-level results data frame +calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { + # Handle both sf and SpatVector inputs + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } -# results_list <- list() + field_results <- data.frame() -# # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions -# n_fields_vect <- length(field_boundaries_vect) -# n_fields_sf <- nrow(field_boundaries) - -# if (n_fields_sf != n_fields_vect) { -# warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) -# } + for (i in seq_len(nrow(field_boundaries))) { + field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_ + sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_ + field_vect <- field_boundaries_vect[i] -# for (i in seq_len(n_fields_vect)) { -# field_vect <- field_boundaries_vect[i] + # Extract CI values using helper function + ci_values <- extract_ci_values(ci_raster, field_vect) + valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] -# # Extract CI values using helper function -# ci_values <- extract_ci_values(ci_raster, field_vect) -# valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] + if (length(valid_values) > 1) { + # Gap score using 2σ below median to detect outliers + median_ci <- median(valid_values) + sd_ci <- sd(valid_values) + outlier_threshold <- median_ci - (2 * sd_ci) + low_ci_pixels <- sum(valid_values < outlier_threshold) + total_pixels <- length(valid_values) + gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) -# if (length(valid_values) > 1) { -# # Calculate % of valid (non-NA) values = gap filling success -# total_pixels <- length(ci_values) -# valid_pixels <- length(valid_values) -# gap_filling_success <- (valid_pixels / total_pixels) * 100 -# na_percent <- ((total_pixels - valid_pixels) / total_pixels) * 100 + # Classify gap severity + gap_level <- dplyr::case_when( + gap_score < 10 ~ "Minimal", + gap_score < 25 ~ "Moderate", + TRUE ~ "Significant" + ) -# results_list[[length(results_list) + 1]] <- list( -# field_idx = i, -# gap_filling_success = round(gap_filling_success, 2), -# na_percent_pre_interpolation = round(na_percent, 2), -# mean_ci = round(mean(valid_values), 2) -# ) -# } else { -# # Not enough valid data -# results_list[[length(results_list) + 1]] <- list( -# field_idx = i, -# gap_filling_success = NA_real_, -# na_percent_pre_interpolation = NA_real_, -# mean_ci = NA_real_ -# ) -# } -# } + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = gap_level, + gap_score = gap_score, + mean_ci = mean(valid_values), + outlier_threshold = outlier_threshold + )) + } else { + # Not enough valid data, fill with NA row + field_results <- rbind(field_results, data.frame( + field = field_name, + sub_field = sub_field_name, + gap_level = NA_character_, + gap_score = NA_real_, + mean_ci = NA_real_, + outlier_threshold = NA_real_ + )) + } + } + # Summarize results + gap_summary <- field_results %>% + dplyr::group_by(gap_level) %>% + dplyr::summarise(field_count = n(), .groups = 'drop') %>% + dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) - # Convert accumulated list to data frame in a single operation - field_results <- do.call(rbind, lapply(results_list, as.data.frame)) - - return(field_results) + return(list(summary = gap_summary, field_results = field_results)) } # ============================================================================ # KPI ORCHESTRATOR AND REPORTING # ============================================================================ -#' Create summary tables for all 6 KPIs (AGGREGATED farm-level summaries) +#' Create summary tables for all 6 KPIs #' -#' @param all_kpis List containing results from all 6 KPI functions (per-field data) +#' @param all_kpis List containing results from all 6 KPI functions #' -#' @return List of summary data frames ready for reporting (farm-level aggregates) +#' @return List of summary data frames ready for reporting create_summary_tables <- function(all_kpis) { - - # ========================================== - # 1. UNIFORMITY SUMMARY (count by interpretation) - # ========================================== - uniformity_summary <- all_kpis$uniformity %>% - group_by(interpretation) %>% - summarise( - field_count = n(), - avg_cv = mean(cv_value, na.rm = TRUE), - avg_morans_i = mean(morans_i, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - Status = interpretation, - `Field Count` = field_count, - `Avg CV` = avg_cv, - `Avg Moran's I` = avg_morans_i - ) - - # ========================================== - # 2. AREA CHANGE SUMMARY (improving/stable/declining counts) - # ========================================== - area_change_summary <- all_kpis$area_change %>% - group_by(interpretation) %>% - summarise( - field_count = n(), - avg_ci_change = mean(mean_ci_pct_change, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - Status = interpretation, - `Field Count` = field_count, - `Avg CI Change %` = avg_ci_change - ) - - # ========================================== - # 3. TCH FORECAST SUMMARY (yield statistics) - # ========================================== - tch_summary <- all_kpis$tch_forecasted %>% - summarise( - avg_tch = mean(tch_forecasted, na.rm = TRUE), - min_tch = min(tch_forecasted, na.rm = TRUE), - max_tch = max(tch_forecasted, na.rm = TRUE), - avg_ci = mean(mean_ci, na.rm = TRUE), - fields_with_data = sum(!is.na(tch_forecasted)) - ) %>% - rename( - `Avg Forecast (t/ha)` = avg_tch, - `Min (t/ha)` = min_tch, - `Max (t/ha)` = max_tch, - `Avg CI` = avg_ci, - `Fields` = fields_with_data - ) - - # ========================================== - # 4. GROWTH DECLINE SUMMARY (trend interpretation) - # ========================================== - growth_summary <- all_kpis$growth_decline %>% - group_by(trend_interpretation) %>% - summarise( - field_count = n(), - avg_trend = mean(four_week_trend, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - Trend = trend_interpretation, - `Field Count` = field_count, - `Avg 4-Week Trend` = avg_trend - ) - - # ========================================== - # 5. WEED PRESSURE SUMMARY (risk level counts) - # ========================================== - weed_summary <- all_kpis$weed_presence %>% - group_by(weed_pressure_risk) %>% - summarise( - field_count = n(), - avg_fragmentation = mean(fragmentation_index, na.rm = TRUE), - .groups = 'drop' - ) %>% - rename( - `Risk Level` = weed_pressure_risk, - `Field Count` = field_count, - `Avg Fragmentation` = avg_fragmentation - ) - - # ========================================== - # 6. GAP FILLING SUMMARY - # ========================================== - gap_summary <- if (!is.null(all_kpis$gap_filling) && is.data.frame(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { - all_kpis$gap_filling %>% - summarise( - avg_gap_filling = mean(gap_filling_success, na.rm = TRUE), - avg_na_percent = mean(na_percent_pre_interpolation, na.rm = TRUE), - fields_with_data = n() - ) %>% - rename( - `Avg Gap Filling Success %` = avg_gap_filling, - `Avg NA % Pre-Interpolation` = avg_na_percent, - `Fields Analyzed` = fields_with_data - ) - } else { - data.frame(`Avg Gap Filling Success %` = NA_real_, `Avg NA % Pre-Interpolation` = NA_real_, `Fields Analyzed` = 0, check.names = FALSE) - } - - # Return as list (each element is a farm-level summary table) kpi_summary <- list( - uniformity = uniformity_summary, - area_change = area_change_summary, - tch_forecast = tch_summary, - growth_decline = growth_summary, - weed_pressure = weed_summary, - gap_filling = gap_summary + uniformity = all_kpis$uniformity %>% + select(field_idx, cv_value, morans_i, uniformity_score, interpretation), + + area_change = all_kpis$area_change %>% + select(field_idx, mean_ci_pct_change, interpretation), + + tch_forecast = all_kpis$tch_forecasted %>% + select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence), + + growth_decline = all_kpis$growth_decline %>% + select(field_idx, four_week_trend, trend_interpretation, decline_severity), + + weed_pressure = all_kpis$weed_presence %>% + select(field_idx, fragmentation_index, weed_pressure_risk), + + gap_filling = if (!is.null(all_kpis$gap_filling)) { + all_kpis$gap_filling %>% + select(field_idx, gap_score, gap_level, mean_ci) + } else { + NULL + } ) - return(kpi_summary) } @@ -531,7 +470,7 @@ create_summary_tables <- function(all_kpis) { #' @param all_kpis List with all KPI results #' @param field_boundaries_sf SF object with field boundaries #' -#' @return Data frame with one row per field, all KPI columns (renamed for reporting compatibility) +#' @return Data frame with one row per field, all KPI columns create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { result <- field_df %>% left_join( @@ -543,7 +482,7 @@ create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { by = c("field_idx") ) %>% left_join( - all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted, mean_ci), + all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted), by = c("field_idx") ) %>% left_join( @@ -553,26 +492,7 @@ create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { left_join( all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk), by = c("field_idx") - ) %>% - # Rename columns to match reporting script expectations - rename( - Field = field_name, - `Growth Uniformity` = uniformity_interpretation, - `Yield Forecast (t/ha)` = tch_forecasted, - `Decline Risk` = decline_severity, - `Weed Risk` = weed_pressure_risk, - `CI Change %` = mean_ci_pct_change, - `Mean CI` = mean_ci, - `CV Value` = cv_value - ) %>% - # Add placeholder columns expected by reporting script (will be populated from other sources) - mutate( - `Field Size (ha)` = NA_real_, - `Gap Score` = NA_real_ - ) %>% - select(field_idx, Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, - `Gap Score`, `Decline Risk`, `Weed Risk`, `CI Change %`, `Mean CI`, `CV Value`) - + ) return(result) } @@ -583,7 +503,7 @@ create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { #' @return Character string with formatted KPI summary text create_field_kpi_text <- function(all_kpis) { text_parts <- c( - "## KPI ANALYSIS SUMMARY\n", + "## AURA KPI ANALYSIS SUMMARY\n", "### Field Uniformity\n", paste(all_kpis$uniformity$interpretation, collapse = "; "), "\n", "### Growth Trends\n", @@ -597,69 +517,21 @@ create_field_kpi_text <- function(all_kpis) { #' Export detailed KPI data to Excel/RDS #' -#' @param all_kpis List with all KPI results (per-field data) -#' @param kpi_summary List with summary tables (farm-level aggregates) -#' @param project_dir Project name (for filename) +#' @param all_kpis List with all KPI results +#' @param kpi_summary List with summary tables #' @param output_dir Directory for output files #' @param week Week number #' @param year Year -#' @param field_boundaries_sf SF object with field boundaries (optional, for field_details_table) -#' #' @return List of output file paths -export_kpi_data <- function(all_kpis, kpi_summary, project_dir, output_dir, week, year, field_boundaries_sf = NULL) { +export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year, project_dir) { # Ensure output directory exists if (!dir.exists(output_dir)) { dir.create(output_dir, recursive = TRUE) } - # Create unified field details table if field_boundaries_sf is provided - field_details_table <- NULL - if (!is.null(field_boundaries_sf)) { - tryCatch({ - # Create a basic field_df from the boundaries - # Robust field name extraction with multiple fallbacks - field_name <- NA_character_ - - # Check for 'name' column in the data.frame - if ("name" %in% names(field_boundaries_sf)) { - field_name <- field_boundaries_sf$name - } else if ("properties" %in% names(field_boundaries_sf)) { - # Extract from properties column (may be a list-column) - props <- field_boundaries_sf$properties - if (is.list(props) && length(props) > 0 && "name" %in% names(props[[1]])) { - field_name <- sapply(props, function(x) ifelse(is.null(x$name), NA_character_, x$name)) - } else if (!is.list(props)) { - # Try direct access if properties is a simple column - field_name <- props - } - } - - # Ensure field_name is a character vector of appropriate length - if (length(field_name) != nrow(field_boundaries_sf)) { - field_name <- rep(NA_character_, nrow(field_boundaries_sf)) - } - - # Replace only NA elements with fallback names, keeping valid names intact - na_indices <- which(is.na(field_name)) - if (length(na_indices) > 0) { - field_name[na_indices] <- paste0("Field_", na_indices) - } - - field_df <- data.frame( - field_idx = 1:nrow(field_boundaries_sf), - field_name = field_name, - stringsAsFactors = FALSE - ) - - field_details_table <- create_field_detail_table(field_df, all_kpis, field_boundaries_sf) - message(paste("✓ Field details table created with", nrow(field_details_table), "fields")) - }, error = function(e) { - message(paste("WARNING: Could not create field_details_table:", e$message)) - }) - } - - # Export all KPI tables to a single Excel file - use project_dir" - excel_file <- file.path(output_dir, paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".xlsx")) + # Export all KPI tables to a single Excel file + excel_file <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", week, year), ".xlsx") + excel_path <- file.path(output_dir, excel_file) sheets <- list( "Uniformity" = as.data.frame(kpi_summary$uniformity), @@ -670,40 +542,38 @@ export_kpi_data <- function(all_kpis, kpi_summary, project_dir, output_dir, week "Gap_Filling" = as.data.frame(kpi_summary$gap_filling) ) - write_xlsx(sheets, excel_file) - message(paste("✓ KPI data exported to:", excel_file)) + write_xlsx(sheets, excel_path) + message(paste("✓ AURA KPI data exported to:", excel_path)) - # Export to RDS for programmatic access (CRITICAL: Both per-field AND summary tables) - # The reporting script expects: summary_tables (list of 6 summary tables) - # We also provide: all_kpis (per-field data) and field_details (unified field view) - rds_file <- file.path(output_dir, paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".rds")) - - # Create the export structure that reporting scripts expect - export_data <- list( - summary_tables = kpi_summary, # Farm-level aggregates (6 KPI summaries) - all_kpis = all_kpis, # Per-field data (6 KPI per-field tables) - field_details = field_details_table # Unified field-level detail table + # Also export to RDS for programmatic access + rds_file <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", week, year), ".rds") + rds_path <- file.path(output_dir, rds_file) + + # Save complete structure including metadata + kpi_export_data <- list( + kpis = all_kpis, + summary_tables = kpi_summary, + metadata = list( + week = week, + year = year, + project = project_dir, + created_at = Sys.time() + ) ) - saveRDS(export_data, rds_file) - message(paste("✓ KPI RDS exported to:", rds_file)) - message(" Structure: list($summary_tables, $all_kpis, $field_details)") + saveRDS(kpi_export_data, rds_path) + message(paste("✓ AURA KPI RDS exported to:", rds_path)) - # Return including field_details for orchestrator to capture - return(list( - excel = excel_file, - rds = rds_file, - field_details = field_details_table - )) + return(list(excel = excel_path, rds = rds_path)) } # ============================================================================ # ORCHESTRATOR FUNCTION # ============================================================================ -#' Calculate all 6 KPIs +#' Calculate all 6 AURA KPIs #' -#' Main entry point for KPI calculation. +#' Main entry point for AURA KPI calculation. #' This function orchestrates the 6 KPI calculations and returns all results. #' #' @param field_boundaries_sf SF object with field geometries @@ -714,7 +584,6 @@ export_kpi_data <- function(all_kpis, kpi_summary, project_dir, output_dir, week #' @param ci_rds_path Path to combined CI RDS file #' @param harvesting_data Data frame with harvest data (optional) #' @param output_dir Directory for KPI exports -#' @param project_dir Project name (for filename in exports) #' #' @return List with results from all 6 KPI functions #' @@ -722,11 +591,11 @@ export_kpi_data <- function(all_kpis, kpi_summary, project_dir, output_dir, week #' This function: #' 1. Loads current week mosaic and extracts field statistics #' 2. (Optionally) loads previous week mosaic for comparison metrics -#' 3. Calculates all 6 KPIs +#' 3. Calculates all 6 AURA KPIs #' 4. Creates summary tables #' 5. Exports results to Excel/RDS #' -calculate_all_kpis <- function( +calculate_all_field_analysis_agronomic_support <- function( field_boundaries_sf, current_week, current_year, @@ -738,7 +607,7 @@ calculate_all_kpis <- function( project_dir = NULL ) { - message("\n============ KPI CALCULATION (6 KPIs) ============") + message("\n============ AURA KPI CALCULATION (6 KPIs) ============") # Load current week mosaic message("Loading current week mosaic...") @@ -751,7 +620,12 @@ calculate_all_kpis <- function( # Extract field statistics message("Extracting field statistics from current mosaic...") current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) - ci_pixels_by_field <- extract_ci_values(current_mosaic, field_boundaries_sf) + #Extract CI pixels for each field individually + ci_pixels_by_field <- list() + for (i in seq_len(nrow(field_boundaries_sf))) { + field_vect <- terra::vect(field_boundaries_sf[i, ]) + ci_pixels_by_field[[i]] <- extract_ci_values(current_mosaic, field_vect) + } # Load previous week mosaic (if available) previous_stats <- NULL @@ -787,14 +661,19 @@ calculate_all_kpis <- function( message("Calculating KPI 4: Growth Decline...") growth_decline_kpi <- calculate_growth_decline_kpi( - ci_pixels_by_field # Would need historical data for real trend + list(ci_pixels_by_field) # Would need historical data for real trend ) message("Calculating KPI 5: Weed Presence...") weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) message("Calculating KPI 6: Gap Filling...") - gap_filling_kpi <- calculate_gap_filling_kpi(current_mosaic, field_boundaries_sf) + gap_filling_result <- calculate_gap_filling_kpi(current_mosaic, field_boundaries_sf) + + # Add field_idx to gap filling results + gap_filling_kpi <- gap_filling_result$field_results %>% + mutate(field_idx = row_number()) %>% + select(field_idx, gap_score, gap_level, mean_ci, outlier_threshold) # Compile results all_kpis <- list( @@ -807,21 +686,21 @@ calculate_all_kpis <- function( ) # Create summary tables + message("\nCreating summary tables...") kpi_summary <- create_summary_tables(all_kpis) - # Export - pass project_dir for proper filename and field_boundaries_sf for field details table - if (is.null(project_dir)) { - project_dir <- "AURA" # Fallback if not provided - } - export_result <- export_kpi_data(all_kpis, kpi_summary, project_dir, output_dir, current_week, current_year, field_boundaries_sf) + # Export + message("\nExporting KPI data...") + export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year, project_dir) - message(paste("\n✓", project_dir, "KPI calculation complete. Week", current_week, current_year, "\n")) + message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year)) - # Return combined structure (for integration with 80_calculate_kpis.R) - # Capture field_details from export_result to propagate it out return(list( - all_kpis = all_kpis, + kpis = all_kpis, summary_tables = kpi_summary, - field_details = export_result$field_details # Propagate field_details from export_kpi_data - )) + metadata = list( + week = current_week, + year = current_year, + project = project_dir, + export_paths = export_paths) )) } From be29e4fcb385ae5be14cb63009134044f8eab39a Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Thu, 12 Feb 2026 15:44:41 +0100 Subject: [PATCH 04/30] round to 2 decimals the gap score --- r_app/80_utils_cane_supply.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index bd06aa8..b8ef588 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -206,7 +206,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { outlier_threshold <- median_ci - (2 * sd_ci) low_ci_pixels <- sum(valid_values < outlier_threshold) total_pixels <- length(valid_values) - gap_score <- (low_ci_pixels / total_pixels) * 100 + gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) # Classify gap severity gap_level <- dplyr::case_when( From 3106871a8182ddf544c22a5583204d88084ac264 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Thu, 12 Feb 2026 16:55:34 +0100 Subject: [PATCH 05/30] Remove the creation of weekly_tile_max_mosaic folder --- r_app/parameters_project.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 6e657a5..adeadc9 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -163,7 +163,6 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { # TIER 5: MOSAICS (Script 40 output) weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic") - weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max") # TIER 6: KPI & REPORTING (Scripts 80/90/91 output) reports_dir <- here(laravel_storage_dir, "reports") @@ -181,7 +180,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { merged_tif_folder, field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir, extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir, growth_model_interpolated_dir, - weekly_mosaic_dir, weekly_tile_max_dir, + weekly_mosaic_dir, reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir, data_dir, vrt_dir, harvest_dir, log_dir ) @@ -217,7 +216,6 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { # TIER 5: Mosaics weekly_mosaic_dir = weekly_mosaic_dir, - weekly_tile_max_dir = weekly_tile_max_dir, # TIER 6: KPI & reporting reports_dir = reports_dir, From eb1b7772e5f963f1a6b58334c033c8792720a15f Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Thu, 12 Feb 2026 17:16:41 +0100 Subject: [PATCH 06/30] changed the named of the generated excel and rmd back to "kpi_summary_tables_week", as the 90 requests --- r_app/80_utils_agronomic_support.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index d649775..7bf6ebd 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -530,7 +530,7 @@ export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year, proje } # Export all KPI tables to a single Excel file - excel_file <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", week, year), ".xlsx") + excel_file <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".xlsx") excel_path <- file.path(output_dir, excel_file) sheets <- list( @@ -546,7 +546,7 @@ export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year, proje message(paste("✓ AURA KPI data exported to:", excel_path)) # Also export to RDS for programmatic access - rds_file <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", week, year), ".rds") + rds_file <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".rds") rds_path <- file.path(output_dir, rds_file) # Save complete structure including metadata From 684050d459cb8544002ef00becf1c589a64ac7d6 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Thu, 12 Feb 2026 17:27:21 +0100 Subject: [PATCH 07/30] fixed the "key insights" calculation --- ..._CI_report_with_kpis_agronomic_support.Rmd | 79 +++++++++++-------- 1 file changed, 46 insertions(+), 33 deletions(-) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 2d59d0d..33451e9 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -307,66 +307,79 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { ## Key Insights ```{r key_insights, echo=FALSE, results='asis'} -# Calculate key insights from aggregated KPI summary data +# Calculate key insights from KPI data if (exists("summary_tables") && !is.null(summary_tables) && length(summary_tables) > 0) { - # Extract aggregated KPI summaries (farm-level, not per-field) - uniformity_summary <- summary_tables$uniformity # Has: Status, Field Count, Avg CV, Avg Moran's I - area_change_summary <- summary_tables$area_change # Has: Status, Field Count, Avg CI Change % - growth_summary <- summary_tables$growth_decline # Has: Trend, Field Count, Avg 4-Week Trend - weed_summary <- summary_tables$weed_pressure # Has: Risk Level, Field Count, Avg Fragmentation + # Aggregate per-field KPI data into summaries on-the-fly - # Total fields analyzed (from uniformity summary) - total_fields <- sum(uniformity_summary$`Field Count`, na.rm = TRUE) - - # Uniformity insights - if (!is.null(uniformity_summary) && nrow(uniformity_summary) > 0) { + # 1. Uniformity insights - group by interpretation + if (!is.null(summary_tables$uniformity) && nrow(summary_tables$uniformity) > 0) { cat("**Field Uniformity:**\n") - for (i in 1:nrow(uniformity_summary)) { - status <- uniformity_summary$Status[i] - count <- uniformity_summary$`Field Count`[i] - if (count > 0) { + uniformity_counts <- summary_tables$uniformity %>% + group_by(interpretation) %>% + summarise(count = n(), .groups = 'drop') + + for (i in 1:nrow(uniformity_counts)) { + status <- uniformity_counts$interpretation[i] + count <- uniformity_counts$count[i] + if (!is.na(status) && !is.na(count) && count > 0) { cat("- ", count, " field(s) with ", status, "\n", sep="") } } } - # Area change insights - if (!is.null(area_change_summary) && nrow(area_change_summary) > 0) { + # 2. Area change insights - group by interpretation + if (!is.null(summary_tables$area_change) && nrow(summary_tables$area_change) > 0) { cat("\n**Area Change Status:**\n") - for (i in 1:nrow(area_change_summary)) { - status <- area_change_summary$Status[i] - count <- area_change_summary$`Field Count`[i] - if (count > 0 && !is.na(status)) { + area_counts <- summary_tables$area_change %>% + group_by(interpretation) %>% + summarise(count = n(), .groups = 'drop') + + for (i in 1:nrow(area_counts)) { + status <- area_counts$interpretation[i] + count <- area_counts$count[i] + if (!is.na(status) && !is.na(count) && count > 0) { cat("- ", count, " field(s) ", status, "\n", sep="") } } } - # Growth trend insights - if (!is.null(growth_summary) && nrow(growth_summary) > 0) { + # 3. Growth trend insights - group by trend_interpretation + if (!is.null(summary_tables$growth_decline) && nrow(summary_tables$growth_decline) > 0) { cat("\n**Growth Trends (4-Week):**\n") - for (i in 1:nrow(growth_summary)) { - trend <- growth_summary$Trend[i] - count <- growth_summary$`Field Count`[i] - if (count > 0 && !is.na(trend)) { + growth_counts <- summary_tables$growth_decline %>% + group_by(trend_interpretation) %>% + summarise(count = n(), .groups = 'drop') + + for (i in 1:nrow(growth_counts)) { + trend <- growth_counts$trend_interpretation[i] + count <- growth_counts$count[i] + if (!is.na(trend) && !is.na(count) && count > 0) { cat("- ", count, " field(s) with ", trend, "\n", sep="") } } } - # Weed pressure insights - if (!is.null(weed_summary) && nrow(weed_summary) > 0) { + # 4. Weed pressure insights - group by weed_pressure_risk + if (!is.null(summary_tables$weed_pressure) && nrow(summary_tables$weed_pressure) > 0) { cat("\n**Weed/Pest Pressure Risk:**\n") - for (i in 1:nrow(weed_summary)) { - risk <- weed_summary$`Risk Level`[i] - count <- weed_summary$`Field Count`[i] - if (count > 0 && !is.na(risk)) { + weed_counts <- summary_tables$weed_pressure %>% + group_by(weed_pressure_risk) %>% + summarise(count = n(), .groups = 'drop') + + for (i in 1:nrow(weed_counts)) { + risk <- weed_counts$weed_pressure_risk[i] + count <- weed_counts$count[i] + if (!is.na(risk) && !is.na(count) && count > 0) { cat("- ", count, " field(s) at ", risk, " risk\n", sep="") } } } + # 5. Total fields analyzed + total_fields <- nrow(summary_tables$uniformity) + cat("\n**Total Fields Analyzed:** ", total_fields, "\n", sep="") + } else { cat("KPI data not available for ", project_dir, " on this date.\n", sep="") } From a9840171cbc539d7d048e096d6ae7c34e5ace819 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 10:21:55 +0100 Subject: [PATCH 08/30] Enhance logging functionality and update report generation scripts - Improved safe_log function to include timestamps and conditional logging - Added diagnostic messages for field visualization processing - Updated CI map rendering parameters for consistency - Refined raster mapping functions in report_utils for clarity - Added .png files to .gitignore --- python_app/.gitignore | 3 ++ r_app/00_common_utils.R | 28 ++++++++++++--- ..._CI_report_with_kpis_agronomic_support.Rmd | 34 ++++++++++++++++--- r_app/report_utils.R | 10 +++--- 4 files changed, 62 insertions(+), 13 deletions(-) diff --git a/python_app/.gitignore b/python_app/.gitignore index 7c3d9f1..5199317 100644 --- a/python_app/.gitignore +++ b/python_app/.gitignore @@ -41,3 +41,6 @@ dist/ *.swo *.swp +*.png + + diff --git a/r_app/00_common_utils.R b/r_app/00_common_utils.R index 49a7b58..e7e9a44 100644 --- a/r_app/00_common_utils.R +++ b/r_app/00_common_utils.R @@ -48,10 +48,30 @@ # #' safe_log("Check input file", "WARNING") # #' safe_log("Failed to load data", "ERROR") # #' -# safe_log <- function(message, level = "INFO") { -# prefix <- sprintf("[%s]", level) -# cat(sprintf("%s %s\n", prefix, message)) -# } +safe_log <- function(message, level = "INFO") { + # Build the full log message with timestamp + timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") + log_msg <- sprintf("[%s] [%s] %s", timestamp, level, message) + + # Only output to console if NOT rendering with knitr + if (!isTRUE(getOption('knitr.in.progress'))) { + cat(log_msg, "\n") + } + + # Always write to log file if available + if (exists("LOG_FILE", envir = .GlobalEnv)) { + log_file <- get("LOG_FILE", envir = .GlobalEnv) + if (!is.null(log_file) && nzchar(log_file)) { + tryCatch({ + cat(log_msg, "\n", file = log_file, append = TRUE) + }, error = function(e) { + # Silently fail if log file can't be written + }) + } + } + + invisible(log_msg) +} # #' SmartCane Debug Logging (Conditional) # #' diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 33451e9..c2d7d3d 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -841,7 +841,7 @@ tryCatch({ ### Chlorophyll Index (CI) Overview Map - Current Week -```{r render_farm_ci_map, echo=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} +```{r render_farm_ci_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} # Create farm-level chlorophyll index map with OpenStreetMap basemap tryCatch({ if (!is.null(farm_ci_current_ll)) { @@ -1075,15 +1075,39 @@ This section provides detailed, field-specific analyses including chlorophyll in ```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=6.5, dpi=150, dev='png', message=TRUE, echo=FALSE, warning=TRUE, include=TRUE, results='asis'} # Generate detailed visualizations for each field using purrr::walk +# DIAGNOSTIC MODE - Remove this after debugging +cat("\n## DIAGNOSTIC: Starting field visualization processing\n\n") + tryCatch({ - # Prepare merged field list and week/year info - AllPivots_merged <- AllPivots0 %>% + # Check prerequisites + cat("- Fields to process:", nrow(AllPivots_merged), "\n") + cat("- Field names:", paste(AllPivots_merged$field, collapse = ", "), "\n") + cat("- Weekly mosaic directory:", weekly_CI_mosaic, "\n") + cat("- CI quadrant data available:", !is.null(CI_quadrant), "\n") + cat("- Harvesting data available:", !is.null(harvesting_data), "\n\n") + + # Check if ci_plot function exists + if (!exists("ci_plot")) { + cat("**ERROR: ci_plot() function not found!**\n\n") + stop("ci_plot function missing") + } + + if (!exists("cum_ci_plot")) { + cat("**ERROR: cum_ci_plot() function not found!**\n\n") + stop("cum_ci_plot function missing") + } + + cat("- ci_plot() function:", "FOUND", "\n") + cat("- cum_ci_plot() function:", "FOUND", "\n\n") + + # Prepare merged field list and week/year info + AllPivots_merged <- AllPivots0 %>% dplyr::filter(!is.na(field), !is.na(sub_field)) %>% dplyr::group_by(field) %>% dplyr::summarise(.groups = 'drop') - # Helper to get week/year from a date - get_week_year <- function(date) { + # Helper to get week/year from a date + get_week_year <- function(date) { list( week = as.numeric(format(date, "%V")), year = as.numeric(format(date, "%G")) diff --git a/r_app/report_utils.R b/r_app/report_utils.R index e7e7888..855d78c 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -64,9 +64,10 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (fixed scale 8-1 for consistent comparison, reversed) - map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette, + map <- map + tm_raster("CI", + col_scale = tm_scale_continuous(values = palette, limits = c(1,8)), - col.legend = tm_legend(title = "CI", + col_legend = tm_legend(title = "CI", orientation = if(legend_is_portrait) "portrait" else "landscape", show = show_legend, position = if(show_legend) tm_pos_out("left", "center") else c("left", "bottom"), @@ -135,10 +136,11 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale, reversed) - map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette, + map <- map + tm_raster("CI", + col_scale = tm_scale_continuous(values = palette, midpoint = 0, limits = c(-3, 3)), - col.legend = tm_legend(title = "CI diff.", + col_legend = tm_legend(title = "CI diff.", orientation = if(legend_is_portrait) "portrait" else "landscape", show = show_legend, position = if(show_legend) tm_pos_out("right", "center") else c("left", "bottom"), From 951eb487b8432f5b51b970c6c6022ae61b9871b7 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 11:04:31 +0100 Subject: [PATCH 09/30] Refactor KPI export function to excel file that matches cane supply strycture --- r_app/80_calculate_kpis.R | 4 + r_app/80_utils_agronomic_support.R | 165 ++++++++++++++++++----------- 2 files changed, 105 insertions(+), 64 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 65496ce..1b481c9 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -381,6 +381,10 @@ main <- function() { output_dir = reports_dir_kpi, project_dir = project_dir ) + + # Extract results + field_analysis_df <- kpi_results$field_analysis_df + export_paths <- kpi_results$export_paths cat("\n=== KPI CALCULATION COMPLETE ===\n") cat("Summary tables saved for Script 90 integration\n") diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 7bf6ebd..aecc837 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -464,35 +464,81 @@ create_summary_tables <- function(all_kpis) { return(kpi_summary) } -#' Create detailed field-by-field KPI report +#' Create detailed field-by-field KPI report (ALL KPIs in one row) #' -#' @param field_df Data frame with field identifiers and acreage -#' @param all_kpis List with all KPI results #' @param field_boundaries_sf SF object with field boundaries +#' @param all_kpis List with all KPI results +#' @param current_week Current week number +#' @param current_year Current year #' #' @return Data frame with one row per field, all KPI columns -create_field_detail_table <- function(field_df, all_kpis, field_boundaries_sf) { - result <- field_df %>% +create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_week, current_year) { + + # Start with field identifiers AND field_idx for joining + result <- field_boundaries_sf %>% + sf::st_drop_geometry() %>% + mutate( + field_idx = row_number(), # ADD THIS: match the integer index used in KPI functions + Field_id = field, + Field_name = field, + Week = current_week, + Year = current_year + ) %>% + select(field_idx, Field_id, Field_name, Week, Year) # Include field_idx first + + # Join all KPI results (now field_idx matches on both sides) + result <- result %>% left_join( - all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_interpretation = interpretation), - by = c("field_idx") + all_kpis$uniformity %>% + select(field_idx, CV = cv_value, Uniformity_Score = uniformity_score, + Morans_I = morans_i, Uniformity_Interpretation = interpretation), + by = "field_idx" ) %>% left_join( - all_kpis$area_change %>% select(field_idx, mean_ci_pct_change), - by = c("field_idx") + all_kpis$area_change %>% + select(field_idx, Weekly_CI_Change = mean_ci_pct_change, + Area_Change_Interpretation = interpretation), + by = "field_idx" ) %>% left_join( - all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted), - by = c("field_idx") + all_kpis$tch_forecasted %>% + select(field_idx, Mean_CI = mean_ci, TCH_Forecasted = tch_forecasted, + TCH_Lower = tch_lower_bound, TCH_Upper = tch_upper_bound, + TCH_Confidence = confidence), + by = "field_idx" ) %>% left_join( - all_kpis$growth_decline %>% select(field_idx, decline_severity), - by = c("field_idx") + all_kpis$growth_decline %>% + select(field_idx, Four_Week_Trend = four_week_trend, + Trend_Interpretation = trend_interpretation, + Decline_Severity = decline_severity), + by = "field_idx" ) %>% left_join( - all_kpis$weed_presence %>% select(field_idx, weed_pressure_risk), - by = c("field_idx") - ) + all_kpis$weed_presence %>% + select(field_idx, Fragmentation_Index = fragmentation_index, + Weed_Pressure_Risk = weed_pressure_risk), + by = "field_idx" + ) + + # Add gap filling if available + if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { + result <- result %>% + left_join( + all_kpis$gap_filling %>% + select(field_idx, Gap_Score = gap_score, Gap_Level = gap_level), + by = "field_idx" + ) + } + + # Remove field_idx from final output (it was only needed for joining) + result <- result %>% + select(-field_idx) + + # Round numeric columns + result <- result %>% + mutate(across(where(is.numeric), ~ round(., 2))) + return(result) } @@ -515,56 +561,28 @@ create_field_kpi_text <- function(all_kpis) { return(paste(text_parts, collapse = "")) } -#' Export detailed KPI data to Excel/RDS +#' Export detailed KPI data to Excel/RDS #' -#' @param all_kpis List with all KPI results -#' @param kpi_summary List with summary tables +#' @param field_detail_df Data frame with all KPI columns (one row per field) +#' @param kpi_summary List with summary tables (optional, for metadata) #' @param output_dir Directory for output files #' @param week Week number #' @param year Year +#' @param project_dir Project name #' @return List of output file paths -export_kpi_data <- function(all_kpis, kpi_summary, output_dir, week, year, project_dir) { - # Ensure output directory exists - if (!dir.exists(output_dir)) { - dir.create(output_dir, recursive = TRUE) - } +export_kpi_data <- function(field_detail_df, kpi_summary, output_dir, week, year, project_dir) { - # Export all KPI tables to a single Excel file - excel_file <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".xlsx") - excel_path <- file.path(output_dir, excel_file) - - sheets <- list( - "Uniformity" = as.data.frame(kpi_summary$uniformity), - "Area_Change" = as.data.frame(kpi_summary$area_change), - "TCH_Forecast" = as.data.frame(kpi_summary$tch_forecast), - "Growth_Decline" = as.data.frame(kpi_summary$growth_decline), - "Weed_Pressure" = as.data.frame(kpi_summary$weed_pressure), - "Gap_Filling" = as.data.frame(kpi_summary$gap_filling) + # Use the common export function from 80_utils_common.R + export_paths <- export_field_analysis_excel( + field_df = field_detail_df, + summary_df = NULL, # No separate summary sheet for agronomic support + project_dir = project_dir, + current_week = week, + year = year, + reports_dir = output_dir ) - write_xlsx(sheets, excel_path) - message(paste("✓ AURA KPI data exported to:", excel_path)) - - # Also export to RDS for programmatic access - rds_file <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", week, year), ".rds") - rds_path <- file.path(output_dir, rds_file) - - # Save complete structure including metadata - kpi_export_data <- list( - kpis = all_kpis, - summary_tables = kpi_summary, - metadata = list( - week = week, - year = year, - project = project_dir, - created_at = Sys.time() - ) - ) - - saveRDS(kpi_export_data, rds_path) - message(paste("✓ AURA KPI RDS exported to:", rds_path)) - - return(list(excel = excel_path, rds = rds_path)) + return(export_paths) } # ============================================================================ @@ -685,22 +703,41 @@ calculate_all_field_analysis_agronomic_support <- function( gap_filling = gap_filling_kpi ) + # Built single-sheet field detail table with all KPIs + message("\nBuilding comprehensive field detail table...") + field_detail_df <- create_field_detail_table( + field_boundaries_sf = field_boundaries_sf, + all_kpis = all_kpis, + current_week = current_week, + current_year = current_year + ) + # Create summary tables message("\nCreating summary tables...") kpi_summary <- create_summary_tables(all_kpis) # Export - message("\nExporting KPI data...") - export_paths <- export_kpi_data(all_kpis, kpi_summary, output_dir, current_week, current_year, project_dir) + message("\nExporting KPI data (single-sheet format)...") + export_paths <- export_kpi_data( + field_detail_df = field_detail_df, + kpi_summary = kpi_summary, + output_dir = output_dir, + week = current_week, + year = current_year, + project_dir = project_dir + ) message(paste("\n✓ AURA KPI calculation complete. Week", current_week, current_year)) return(list( - kpis = all_kpis, - summary_tables = kpi_summary, + field_analysis_df = field_detail_df, + kpis = all_kpis, + summary_tables = kpi_summary, + export_paths = export_paths, metadata = list( week = current_week, year = current_year, - project = project_dir, - export_paths = export_paths) )) + project = project_dir + ) + )) } From 0503a93bcd83c26177fe0fb1fdd7739c5ea4be3b Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 11:51:10 +0100 Subject: [PATCH 10/30] fixed the path that the excel is being saved to --- r_app/80_calculate_kpis.R | 4 ++-- r_app/80_utils_common.R | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 1b481c9..a35c808 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -335,8 +335,8 @@ main <- function() { message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)") message(strrep("=", 70)) - # Prepare inputs for KPI calculation (already created by setup_project_directories) - reports_dir_kpi <- setup$kpi_reports_dir + # Prepare outputs and inputs for KPI calculation (already created by setup_project_directories) + reports_dir_kpi <- file.path(setup$reports_dir, "kpis") cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir # Load field boundaries for workflow (use data_dir from setup) diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 1a6b989..8a166eb 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -678,13 +678,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre NULL } - output_subdir <- file.path(reports_dir, "field_analysis") - if (!dir.exists(output_subdir)) { - dir.create(output_subdir, recursive = TRUE) + output_dir <- file.path(reports_dir) + if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE) } excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx") - excel_path <- file.path(output_subdir, excel_filename) + excel_path <- file.path(output_dir, excel_filename) excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE) # Build sheets list dynamically @@ -710,13 +710,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre ) rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds") - rds_path <- file.path(output_subdir, rds_filename) + rds_path <- file.path(output_dir, rds_filename) saveRDS(kpi_data, rds_path) message(paste("✓ Field analysis RDS exported to:", rds_path)) csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv") - csv_path <- file.path(output_subdir, csv_filename) + csv_path <- file.path(output_dir, csv_filename) write_csv(field_df_rounded, csv_path) message(paste("✓ Field analysis CSV exported to:", csv_path)) From cea3f9a0e5fe4da9bf832a973b69b68dbc4b5682 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 12:05:40 +0100 Subject: [PATCH 11/30] changed the path for cane supply too --- r_app/80_calculate_kpis.R | 2 +- r_app/80_utils_cane_supply.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index a35c808..7619347 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -397,7 +397,7 @@ main <- function() { message(strrep("=", 70)) # Define variables needed for workflow functions - reports_dir <- setup$kpi_reports_dir + reports_dir <- file.path(setup$reports_dir, "kpis") data_dir <- setup$data_dir diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index b8ef588..bceaa5d 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -536,7 +536,7 @@ calculate_field_analysis_cane_supply <- function(setup, message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)") message(strrep("=", 70)) - reports_dir <- setup$kpi_reports_dir + reports_dir <- file.path(setup$reports_dir, "kpis") # ========== PHASE 1: WEEKLY ANALYSIS SETUP ========== message("\n", strrep("-", 70)) From 3f0832630841a03d794aceee3409a2d73f7ece87 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 12:12:16 +0100 Subject: [PATCH 12/30] removed redudant variable definition --- r_app/80_calculate_kpis.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 7619347..208a373 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -397,11 +397,9 @@ main <- function() { message(strrep("=", 70)) # Define variables needed for workflow functions - reports_dir <- file.path(setup$reports_dir, "kpis") data_dir <- setup$data_dir - - # Load field boundaries for workflow (use data_dir from setup) + # Load field boundaries for workflow (use data_dir from setup) message("\nLoading field boundaries for KPI calculation...") tryCatch({ boundaries_result <- load_field_boundaries(setup$data_dir) From 8e2d796b415ce1642ace379bfe2bc7d9ec57ddf5 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 14:45:00 +0100 Subject: [PATCH 13/30] Added aura in client type map --- r_app/parameters_project.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index adeadc9..f0b5802 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -45,7 +45,8 @@ CLIENT_TYPE_MAP <- list( "esa" = "agronomic_support", "simba" = "agronomic_support", "john" = "agronomic_support", - "huss" = "agronomic_support" + "huss" = "agronomic_support", + "aura" = "agronomic_support" ) #' Get client type for a project From 35e474cf5c21d873cf786cac9384cb3c5c0dfa57 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 14:46:16 +0100 Subject: [PATCH 14/30] Add gap score calculation to common utils --- r_app/80_utils_common.R | 109 +++++++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 18 deletions(-) diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 8a166eb..78962e9 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -355,12 +355,11 @@ calculate_cv_trend_long_term <- function(cv_values) { } #' Calculate Gap Filling Score KPI (2σ method) -#' @param ci_raster Current week CI raster -#' @param field_boundaries Field boundaries -#' @return Data frame with field-level gap filling scores +#' @param ci_raster Current week CI raster (single band) +#' @param field_boundaries Field boundaries (sf or SpatVector) +#' @return List with summary data frame and field-level results data frame calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { - safe_log("Calculating Gap Filling Score KPI (placeholder)") - + # Handle both sf and SpatVector inputs if (!inherits(field_boundaries, "SpatVector")) { field_boundaries_vect <- terra::vect(field_boundaries) @@ -368,19 +367,11 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { field_boundaries_vect <- field_boundaries } - # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions - n_fields_vect <- length(field_boundaries_vect) - n_fields_sf <- nrow(field_boundaries) - - if (n_fields_sf != n_fields_vect) { - warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) - } - field_results <- data.frame() for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] + field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_ + sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_ field_vect <- field_boundaries_vect[i] # Extract CI values using helper function @@ -394,7 +385,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { outlier_threshold <- median_ci - (2 * sd_ci) low_ci_pixels <- sum(valid_values < outlier_threshold) total_pixels <- length(valid_values) - gap_score <- (low_ci_pixels / total_pixels) * 100 + gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) # Classify gap severity gap_level <- dplyr::case_when( @@ -412,7 +403,6 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { outlier_threshold = outlier_threshold )) } else { - # Not enough valid data, fill with NA row field_results <- rbind(field_results, data.frame( field = field_name, sub_field = sub_field_name, @@ -423,9 +413,92 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { )) } } + + # Summarize results + gap_summary <- field_results %>% + dplyr::group_by(gap_level) %>% + dplyr::summarise(field_count = n(), .groups = 'drop') %>% + dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) + + return(list(summary = gap_summary, field_results = field_results)) } - +#' Calculate gap filling scores for all per-field mosaics (wrapper) +#' +#' This wrapper handles per-field mosaic structure by iterating over +#' individual field files and calling the basic KPI function +#' +#' @param per_field_files Character vector of paths to per-field mosaic TIFFs +#' @param field_boundaries_sf sf object with field geometries +#' @return data.frame with Field_id and gap_score columns +calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { + message("\nCalculating gap filling scores (2σ method)...") + message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) + + field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) + + process_gap_for_field <- function(field_file) { + field_id <- basename(dirname(field_file)) + field_bounds <- field_boundaries_by_id[[field_id]] + + if (is.null(field_bounds) || nrow(field_bounds) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + tryCatch({ + field_raster <- terra::rast(field_file) + ci_band_name <- "CI" + if (!(ci_band_name %in% names(field_raster))) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + field_ci_band <- field_raster[[ci_band_name]] + names(field_ci_band) <- "CI" + + gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) + + if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { + return(data.frame(Field_id = field_id, gap_score = NA_real_)) + } + + gap_scores <- gap_result$field_results + gap_scores$Field_id <- gap_scores$field + gap_scores <- gap_scores[, c("Field_id", "gap_score")] + + stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) + }, error = function(e) { + message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) + data.frame(Field_id = field_id, gap_score = NA_real_) + }) + } + + # Process fields sequentially with progress bar + message(" Processing gap scores for ", length(per_field_files), " fields...") + pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) + + results_list <- lapply(seq_along(per_field_files), function(idx) { + result <- process_gap_for_field(per_field_files[[idx]]) + utils::setTxtProgressBar(pb, idx) + result + }) + close(pb) + + gap_scores_df <- dplyr::bind_rows(results_list) + + if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { + gap_scores_df <- gap_scores_df %>% + dplyr::group_by(Field_id) %>% + dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") + + message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) + message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", + round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) + } else { + message(" WARNING: No gap scores calculated from per-field mosaics") + gap_scores_df <- NULL + } + + return(gap_scores_df) +} # ============================================================================ # HELPER FUNCTIONS From 5f2dca0a92c0c9ef313d31ae9ed082f391163e35 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 15:05:52 +0100 Subject: [PATCH 15/30] remove gap score calculation --> moved to common --- r_app/80_utils_cane_supply.R | 244 +++++++++++++++++------------------ 1 file changed, 122 insertions(+), 122 deletions(-) diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index bceaa5d..6c93221 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -166,153 +166,153 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me NA_character_ } -#' Calculate Gap Filling Score KPI (2σ method) -#' @param ci_raster Current week CI raster -#' @param field_boundaries Field boundaries -#' @return Data frame with field-level gap filling scores -calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { - safe_log("Calculating Gap Filling Score KPI (placeholder)") +# #' Calculate Gap Filling Score KPI (2σ method) +# #' @param ci_raster Current week CI raster +# #' @param field_boundaries Field boundaries +# #' @return Data frame with field-level gap filling scores +# calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { +# safe_log("Calculating Gap Filling Score KPI (placeholder)") - # Handle both sf and SpatVector inputs - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } +# # Handle both sf and SpatVector inputs +# if (!inherits(field_boundaries, "SpatVector")) { +# field_boundaries_vect <- terra::vect(field_boundaries) +# } else { +# field_boundaries_vect <- field_boundaries +# } - # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions - n_fields_vect <- length(field_boundaries_vect) - n_fields_sf <- nrow(field_boundaries) +# # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions +# n_fields_vect <- length(field_boundaries_vect) +# n_fields_sf <- nrow(field_boundaries) - if (n_fields_sf != n_fields_vect) { - warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) - } +# if (n_fields_sf != n_fields_vect) { +# warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) +# } - field_results <- data.frame() +# field_results <- data.frame() - for (i in seq_len(nrow(field_boundaries))) { - field_name <- field_boundaries$field[i] - sub_field_name <- field_boundaries$sub_field[i] - field_vect <- field_boundaries_vect[i] +# for (i in seq_len(nrow(field_boundaries))) { +# field_name <- field_boundaries$field[i] +# sub_field_name <- field_boundaries$sub_field[i] +# field_vect <- field_boundaries_vect[i] - # Extract CI values using helper function - ci_values <- extract_ci_values(ci_raster, field_vect) - valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] +# # Extract CI values using helper function +# ci_values <- extract_ci_values(ci_raster, field_vect) +# valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] - if (length(valid_values) > 1) { - # Gap score using 2σ below median to detect outliers - median_ci <- median(valid_values) - sd_ci <- sd(valid_values) - outlier_threshold <- median_ci - (2 * sd_ci) - low_ci_pixels <- sum(valid_values < outlier_threshold) - total_pixels <- length(valid_values) - gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) +# if (length(valid_values) > 1) { +# # Gap score using 2σ below median to detect outliers +# median_ci <- median(valid_values) +# sd_ci <- sd(valid_values) +# outlier_threshold <- median_ci - (2 * sd_ci) +# low_ci_pixels <- sum(valid_values < outlier_threshold) +# total_pixels <- length(valid_values) +# gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) - # Classify gap severity - gap_level <- dplyr::case_when( - gap_score < 10 ~ "Minimal", - gap_score < 25 ~ "Moderate", - TRUE ~ "Significant" - ) +# # Classify gap severity +# gap_level <- dplyr::case_when( +# gap_score < 10 ~ "Minimal", +# gap_score < 25 ~ "Moderate", +# TRUE ~ "Significant" +# ) - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - gap_level = gap_level, - gap_score = gap_score, - mean_ci = mean(valid_values), - outlier_threshold = outlier_threshold - )) - } else { - # Not enough valid data, fill with NA row - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - gap_level = NA_character_, - gap_score = NA_real_, - mean_ci = NA_real_, - outlier_threshold = NA_real_ - )) - } - } - return(list(field_results = field_results)) -} +# field_results <- rbind(field_results, data.frame( +# field = field_name, +# sub_field = sub_field_name, +# gap_level = gap_level, +# gap_score = gap_score, +# mean_ci = mean(valid_values), +# outlier_threshold = outlier_threshold +# )) +# } else { +# # Not enough valid data, fill with NA row +# field_results <- rbind(field_results, data.frame( +# field = field_name, +# sub_field = sub_field_name, +# gap_level = NA_character_, +# gap_score = NA_real_, +# mean_ci = NA_real_, +# outlier_threshold = NA_real_ +# )) +# } +# } +# return(list(field_results = field_results)) +# } -#' Calculate gap filling scores for all per-field mosaics -#' This is a wrapper function that processes multiple per-field mosaic files -#' and calculates gap scores for each field. -#' @param per_field_files Character vector of paths to per-field mosaic TIFFs -#' @param field_boundaries_sf sf object with field geometries -#' @return data.frame with Field_id and gap_score columns -calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { - message("\nCalculating gap filling scores (2σ method)...") - message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) +# #' Calculate gap filling scores for all per-field mosaics +# #' This is a wrapper function that processes multiple per-field mosaic files +# #' and calculates gap scores for each field. +# #' @param per_field_files Character vector of paths to per-field mosaic TIFFs +# #' @param field_boundaries_sf sf object with field geometries +# #' @return data.frame with Field_id and gap_score columns +# calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { +# message("\nCalculating gap filling scores (2σ method)...") +# message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) - field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) +# field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) - process_gap_for_field <- function(field_file) { - field_id <- basename(dirname(field_file)) - field_bounds <- field_boundaries_by_id[[field_id]] +# process_gap_for_field <- function(field_file) { +# field_id <- basename(dirname(field_file)) +# field_bounds <- field_boundaries_by_id[[field_id]] - if (is.null(field_bounds) || nrow(field_bounds) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } +# if (is.null(field_bounds) || nrow(field_bounds) == 0) { +# return(data.frame(Field_id = field_id, gap_score = NA_real_)) +# } - tryCatch({ - field_raster <- terra::rast(field_file) - ci_band_name <- "CI" - if (!(ci_band_name %in% names(field_raster))) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } - field_ci_band <- field_raster[[ci_band_name]] - names(field_ci_band) <- "CI" +# tryCatch({ +# field_raster <- terra::rast(field_file) +# ci_band_name <- "CI" +# if (!(ci_band_name %in% names(field_raster))) { +# return(data.frame(Field_id = field_id, gap_score = NA_real_)) +# } +# field_ci_band <- field_raster[[ci_band_name]] +# names(field_ci_band) <- "CI" - gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) +# gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) - if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { - return(data.frame(Field_id = field_id, gap_score = NA_real_)) - } +# if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { +# return(data.frame(Field_id = field_id, gap_score = NA_real_)) +# } - gap_scores <- gap_result$field_results - gap_scores$Field_id <- gap_scores$field - gap_scores <- gap_scores[, c("Field_id", "gap_score")] +# gap_scores <- gap_result$field_results +# gap_scores$Field_id <- gap_scores$field +# gap_scores <- gap_scores[, c("Field_id", "gap_score")] - stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) - }, error = function(e) { - message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) - data.frame(Field_id = field_id, gap_score = NA_real_) - }) - } +# stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) +# }, error = function(e) { +# message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) +# data.frame(Field_id = field_id, gap_score = NA_real_) +# }) +# } - # Process fields sequentially with progress bar - message(" Processing gap scores for ", length(per_field_files), " fields...") - pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) +# # Process fields sequentially with progress bar +# message(" Processing gap scores for ", length(per_field_files), " fields...") +# pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) - results_list <- lapply(seq_along(per_field_files), function(idx) { - result <- process_gap_for_field(per_field_files[[idx]]) - utils::setTxtProgressBar(pb, idx) - result - }) - close(pb) +# results_list <- lapply(seq_along(per_field_files), function(idx) { +# result <- process_gap_for_field(per_field_files[[idx]]) +# utils::setTxtProgressBar(pb, idx) +# result +# }) +# close(pb) - gap_scores_df <- dplyr::bind_rows(results_list) +# gap_scores_df <- dplyr::bind_rows(results_list) - if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { - gap_scores_df <- gap_scores_df %>% - dplyr::group_by(Field_id) %>% - dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") +# if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { +# gap_scores_df <- gap_scores_df %>% +# dplyr::group_by(Field_id) %>% +# dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") - message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) - message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", - round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) - } else { - message(" WARNING: No gap scores calculated from per-field mosaics") - gap_scores_df <- NULL - } +# message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) +# message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", +# round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) +# } else { +# message(" WARNING: No gap scores calculated from per-field mosaics") +# gap_scores_df <- NULL +# } - return(gap_scores_df) -} +# return(gap_scores_df) +# } #' Build complete per-field KPI dataframe with all 22 columns #' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats From e16d920eeaa7ec02f5814eb134d4740cfc224cb5 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 15:06:37 +0100 Subject: [PATCH 16/30] 1. removed gap score 2.fixed morans I to calculate per-field --- r_app/80_utils_agronomic_support.R | 362 ++++++++++++++++++++--------- 1 file changed, 251 insertions(+), 111 deletions(-) diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index aecc837..b874ee9 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -80,7 +80,8 @@ prepare_predictions <- function(harvest_model, field_data, scenario = "optimisti #' @param ci_band Raster band with CI values #' #' @return Data frame with field_idx, cv_value, morans_i, uniformity_score, interpretation -calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL) { +calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_sf, ci_band = NULL, + mosaic_dir = NULL, week_file = NULL) { result <- data.frame( field_idx = integer(), cv_value = numeric(), @@ -90,6 +91,9 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ stringsAsFactors = FALSE ) + # Determine if we're using per-field structure + is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) + for (field_idx in seq_len(nrow(field_boundaries_sf))) { ci_pixels <- ci_pixels_by_field[[field_idx]] @@ -107,10 +111,31 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ cv_val <- calculate_cv(ci_pixels) + # Calculate Moran's I morans_i <- NA_real_ - if (!is.null(ci_band) && inherits(ci_band, "SpatRaster")) { + if (is_per_field) { + # Load individual field raster for per-field structure + field_name <- field_boundaries_sf$field[field_idx] + field_mosaic_path <- file.path(mosaic_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_raster <- terra::rast(field_mosaic_path)[["CI"]] + single_field <- field_boundaries_sf[field_idx, ] + morans_result <- calculate_spatial_autocorrelation(field_raster, single_field) + + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + }, error = function(e) { + message(paste(" Warning: Spatial autocorrelation failed for field", field_name, ":", e$message)) + }) + } + } else if (!is.null(ci_band) && inherits(ci_band, "SpatRaster")) { + # Use single raster for single-file structure tryCatch({ - # Get single field geometry single_field <- field_boundaries_sf[field_idx, ] morans_result <- calculate_spatial_autocorrelation(ci_band, single_field) @@ -121,12 +146,11 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ } }, error = function(e) { message(paste(" Warning: Spatial autocorrelation failed for field", field_idx, ":", e$message)) - morans_i <<- NA_real_ }) } # Normalize CV (0-1 scale, invert so lower CV = higher score) - cv_normalized <- min(cv_val / 0.3, 1) # 0.3 = threshold for CV + cv_normalized <- min(cv_val / 0.3, 1) cv_score <- 1 - cv_normalized # Normalize Moran's I (-1 to 1 scale, shift to 0-1) @@ -174,23 +198,54 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ #' #' @return Data frame with field-level CI changes calculate_area_change_kpi <- function(current_stats, previous_stats) { - result <- calculate_change_percentages(current_stats, previous_stats) - # Add interpretation - result$interpretation <- NA_character_ + # Initialize result data frame + result <- data.frame( + field_idx = seq_len(nrow(current_stats)), + mean_ci_pct_change = NA_real_, + interpretation = NA_character_, + stringsAsFactors = FALSE + ) - for (i in seq_len(nrow(result))) { - change <- result$mean_ci_pct_change[i] + if (is.null(previous_stats) || nrow(previous_stats) == 0) { + result$interpretation <- "No previous data" + return(result) + } + + # Match fields between current and previous stats + for (i in seq_len(nrow(current_stats))) { + field_id <- current_stats$Field_id[i] - if (is.na(change)) { + # Find matching field in previous stats + prev_idx <- which(previous_stats$Field_id == field_id) + + if (length(prev_idx) == 0) { result$interpretation[i] <- "No previous data" - } else if (change > 15) { + next + } + + prev_idx <- prev_idx[1] # Take first match + + current_ci <- current_stats$Mean_CI[i] + previous_ci <- previous_stats$Mean_CI[prev_idx] + + if (is.na(current_ci) || is.na(previous_ci) || previous_ci == 0) { + result$interpretation[i] <- "No previous data" + next + } + + # Calculate percentage change + pct_change <- ((current_ci - previous_ci) / previous_ci) * 100 + result$mean_ci_pct_change[i] <- round(pct_change, 2) + + # Add interpretation + if (pct_change > 15) { result$interpretation[i] <- "Rapid growth" - } else if (change > 5) { + } else if (pct_change > 5) { result$interpretation[i] <- "Positive growth" - } else if (change > -5) { + } else if (pct_change > -5) { result$interpretation[i] <- "Stable" - } else if (change > -15) { + } else if (pct_change > -15) { result$interpretation[i] <- "Declining" } else { result$interpretation[i] <- "Rapid decline" @@ -210,9 +265,37 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) { #' #' @return Data frame with field-level TCH forecasts calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) { + + # Handle both naming conventions (Field_id/Mean_CI vs field_idx/mean_ci) + if ("Field_id" %in% names(field_statistics)) { + # Add field_idx to match field_boundaries row numbers + field_statistics <- field_statistics %>% + mutate(field_idx = match(Field_id, field_boundaries_sf$field)) + mean_ci_col <- "Mean_CI" + } else { + mean_ci_col <- "mean_ci" + } + + # Filter out any fields without a match + field_statistics <- field_statistics %>% + filter(!is.na(field_idx)) + + if (nrow(field_statistics) == 0) { + warning("No fields matched between statistics and boundaries") + return(data.frame( + field_idx = integer(), + mean_ci = numeric(), + tch_forecasted = numeric(), + tch_lower_bound = numeric(), + tch_upper_bound = numeric(), + confidence = character(), + stringsAsFactors = FALSE + )) + } + result <- data.frame( field_idx = field_statistics$field_idx, - mean_ci = field_statistics$mean_ci, + mean_ci = field_statistics[[mean_ci_col]], tch_forecasted = NA_real_, tch_lower_bound = NA_real_, tch_upper_bound = NA_real_, @@ -360,73 +443,6 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) { return(result) } -#' Calculate Gap Filling Score KPI (placeholder) -#' @param ci_raster Current week CI raster -#' @param field_boundaries Field boundaries -#' @return List with summary data frame and field-level results data frame -calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { - # Handle both sf and SpatVector inputs - if (!inherits(field_boundaries, "SpatVector")) { - field_boundaries_vect <- terra::vect(field_boundaries) - } else { - field_boundaries_vect <- field_boundaries - } - - field_results <- data.frame() - - for (i in seq_len(nrow(field_boundaries))) { - field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_ - sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_ - field_vect <- field_boundaries_vect[i] - - # Extract CI values using helper function - ci_values <- extract_ci_values(ci_raster, field_vect) - valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] - - if (length(valid_values) > 1) { - # Gap score using 2σ below median to detect outliers - median_ci <- median(valid_values) - sd_ci <- sd(valid_values) - outlier_threshold <- median_ci - (2 * sd_ci) - low_ci_pixels <- sum(valid_values < outlier_threshold) - total_pixels <- length(valid_values) - gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) - - # Classify gap severity - gap_level <- dplyr::case_when( - gap_score < 10 ~ "Minimal", - gap_score < 25 ~ "Moderate", - TRUE ~ "Significant" - ) - - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - gap_level = gap_level, - gap_score = gap_score, - mean_ci = mean(valid_values), - outlier_threshold = outlier_threshold - )) - } else { - # Not enough valid data, fill with NA row - field_results <- rbind(field_results, data.frame( - field = field_name, - sub_field = sub_field_name, - gap_level = NA_character_, - gap_score = NA_real_, - mean_ci = NA_real_, - outlier_threshold = NA_real_ - )) - } - } - # Summarize results - gap_summary <- field_results %>% - dplyr::group_by(gap_level) %>% - dplyr::summarise(field_count = n(), .groups = 'drop') %>% - dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1)) - - return(list(summary = gap_summary, field_results = field_results)) -} # ============================================================================ # KPI ORCHESTRATOR AND REPORTING @@ -627,41 +643,135 @@ calculate_all_field_analysis_agronomic_support <- function( message("\n============ AURA KPI CALCULATION (6 KPIs) ============") - # Load current week mosaic - message("Loading current week mosaic...") - current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir) + # DETECT STRUCTURE FIRST - before any use of is_per_field + week_file <- sprintf("week_%02d_%d.tif", current_week, current_year) + field_dirs <- list.dirs(current_mosaic_dir, full.names = FALSE, recursive = FALSE) + field_dirs <- field_dirs[field_dirs != ""] - if (is.null(current_mosaic)) { - stop("Could not load current week mosaic") - } + is_per_field <- length(field_dirs) > 0 && + file.exists(file.path(current_mosaic_dir, field_dirs[1], week_file)) - # Extract field statistics - message("Extracting field statistics from current mosaic...") - current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) - #Extract CI pixels for each field individually - ci_pixels_by_field <- list() - for (i in seq_len(nrow(field_boundaries_sf))) { - field_vect <- terra::vect(field_boundaries_sf[i, ]) - ci_pixels_by_field[[i]] <- extract_ci_values(current_mosaic, field_vect) + if (is_per_field) { + message("Detected per-field mosaic structure...") + message("Using field-by-field extraction (similar to cane supply workflow)...") + + # Use the same extraction method as cane supply + current_stats <- calculate_field_statistics( + field_boundaries_sf, + current_week, + current_year, + current_mosaic_dir, + report_date = Sys.Date() + ) + + # Extract CI pixels for each field from their individual mosaics + ci_pixels_by_field <- list() + for (i in seq_len(nrow(field_boundaries_sf))) { + field_name <- field_boundaries_sf$field[i] + field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file) + + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_raster <- terra::rast(field_mosaic_path) + ci_band <- field_raster[["CI"]] + field_vect <- terra::vect(field_boundaries_sf[i, ]) + ci_pixels_by_field[[i]] <- extract_ci_values(ci_band, field_vect) + }, error = function(e) { + message(paste(" Warning: Could not extract CI for field", field_name, ":", e$message)) + ci_pixels_by_field[[i]] <- NULL + }) + } else { + ci_pixels_by_field[[i]] <- NULL + } + } + + # For uniformity calculations that need a reference raster, load first available + current_mosaic <- NULL + for (field_name in field_dirs) { + field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file) + if (file.exists(field_mosaic_path)) { + tryCatch({ + current_mosaic <- terra::rast(field_mosaic_path)[["CI"]] + break + }, error = function(e) { + next + }) + } + } + + } else { + # Single-file mosaic (original behavior) + message("Loading current week mosaic...") + current_mosaic <- load_weekly_ci_mosaic(current_week, current_year, current_mosaic_dir) + + if (is.null(current_mosaic)) { + stop("Could not load current week mosaic") + } + + message("Extracting field statistics from current mosaic...") + current_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf) + + # Extract CI pixels for each field individually + ci_pixels_by_field <- list() + for (i in seq_len(nrow(field_boundaries_sf))) { + field_vect <- terra::vect(field_boundaries_sf[i, ]) + ci_pixels_by_field[[i]] <- extract_ci_values(current_mosaic, field_vect) + } } # Load previous week mosaic (if available) previous_stats <- NULL - if (!is.null(previous_mosaic_dir)) { + if (!is.null(previous_mosaic_dir) || is_per_field) { target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1) message(paste("Loading previous week mosaic (week", target_prev$week, target_prev$year, ")...")) - previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir) - if (!is.null(previous_mosaic)) { - previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) - } else { - message("Previous week mosaic not available - skipping area change KPI") + if (is_per_field) { + # Try loading previous week from the same directory structure + prev_week_file <- sprintf("week_%02d_%d.tif", target_prev$week, target_prev$year) + prev_field_exists <- any(sapply(field_dirs, function(field) { + file.exists(file.path(current_mosaic_dir, field, prev_week_file)) + })) + + if (prev_field_exists) { + message(" Found previous week per-field mosaics, calculating statistics...") + previous_stats <- calculate_field_statistics( + field_boundaries_sf, + target_prev$week, + target_prev$year, + current_mosaic_dir, + report_date = Sys.Date() - 7 + ) + } else { + message(" Previous week mosaic not available - skipping area change KPI") + } + } else if (!is.null(previous_mosaic_dir)) { + previous_mosaic <- load_weekly_ci_mosaic(target_prev$week, target_prev$year, previous_mosaic_dir) + + if (!is.null(previous_mosaic)) { + previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf) + } else { + message(" Previous week mosaic not available - skipping area change KPI") + } } } # Calculate 6 KPIs message("\nCalculating KPI 1: Field Uniformity...") - uniformity_kpi <- calculate_field_uniformity_kpi(ci_pixels_by_field, field_boundaries_sf, current_mosaic) + if (is_per_field) { + uniformity_kpi <- calculate_field_uniformity_kpi( + ci_pixels_by_field, + field_boundaries_sf, + ci_band = NULL, + mosaic_dir = current_mosaic_dir, + week_file = week_file + ) + } else { + uniformity_kpi <- calculate_field_uniformity_kpi( + ci_pixels_by_field, + field_boundaries_sf, + current_mosaic + ) + } message("Calculating KPI 2: Area Change...") if (!is.null(previous_stats)) { @@ -679,19 +789,49 @@ calculate_all_field_analysis_agronomic_support <- function( message("Calculating KPI 4: Growth Decline...") growth_decline_kpi <- calculate_growth_decline_kpi( - list(ci_pixels_by_field) # Would need historical data for real trend + ci_pixels_by_field ) message("Calculating KPI 5: Weed Presence...") weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) message("Calculating KPI 6: Gap Filling...") - gap_filling_result <- calculate_gap_filling_kpi(current_mosaic, field_boundaries_sf) - - # Add field_idx to gap filling results - gap_filling_kpi <- gap_filling_result$field_results %>% - mutate(field_idx = row_number()) %>% - select(field_idx, gap_score, gap_level, mean_ci, outlier_threshold) + # Build list of per-field files for this week + per_field_files <- c() + for (field_name in field_dirs) { + field_mosaic_path <- file.path(current_mosaic_dir, field_name, week_file) + if (file.exists(field_mosaic_path)) { + per_field_files <- c(per_field_files, field_mosaic_path) + } + } + + if (length(per_field_files) > 0) { + # Use the common wrapper function (same as cane supply) + gap_scores_result <- calculate_gap_scores(per_field_files, field_boundaries_sf) + + # Convert to the format expected by orchestrator + gap_filling_kpi <- gap_scores_result %>% + mutate(field_idx = match(Field_id, field_boundaries_sf$field)) %>% + select(field_idx, gap_score) %>% + mutate( + gap_level = dplyr::case_when( + gap_score < 10 ~ "Minimal", + gap_score < 25 ~ "Moderate", + TRUE ~ "Significant" + ), + mean_ci = NA_real_, + outlier_threshold = NA_real_ + ) + } else { + # Fallback: no per-field files + gap_filling_kpi <- data.frame( + field_idx = seq_len(nrow(field_boundaries_sf)), + gap_score = NA_real_, + gap_level = NA_character_, + mean_ci = NA_real_, + outlier_threshold = NA_real_ + ) + } # Compile results all_kpis <- list( From e966d778f4afda4bda3af909c120e48ce56b1b1e Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 16:05:54 +0100 Subject: [PATCH 17/30] Enhance area change KPI calculation to handle multiple field naming conventions and improve interpretation logic for missing previous data --- r_app/80_utils_agronomic_support.R | 73 +++++++++++++++++------------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index b874ee9..8e55893 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -207,48 +207,57 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) { stringsAsFactors = FALSE ) + # Handle case where previous stats is NULL or empty if (is.null(previous_stats) || nrow(previous_stats) == 0) { result$interpretation <- "No previous data" return(result) } # Match fields between current and previous stats + # Handle both naming conventions (Field_id vs field_idx) + if ("Field_id" %in% names(current_stats)) { + current_field_col <- "Field_id" + prev_field_col <- "Field_id" + ci_col <- "Mean_CI" + } else { + current_field_col <- "field_idx" + prev_field_col <- "field_idx" + ci_col <- "mean_ci" + } + + # Create lookup for previous stats + prev_lookup <- setNames( + previous_stats[[ci_col]], + previous_stats[[prev_field_col]] + ) + + # Calculate percentage change for each field for (i in seq_len(nrow(current_stats))) { - field_id <- current_stats$Field_id[i] + current_field_id <- current_stats[[current_field_col]][i] + current_ci <- current_stats[[ci_col]][i] - # Find matching field in previous stats - prev_idx <- which(previous_stats$Field_id == field_id) + # Find matching previous CI value + prev_ci <- prev_lookup[[as.character(current_field_id)]] - if (length(prev_idx) == 0) { - result$interpretation[i] <- "No previous data" - next - } - - prev_idx <- prev_idx[1] # Take first match - - current_ci <- current_stats$Mean_CI[i] - previous_ci <- previous_stats$Mean_CI[prev_idx] - - if (is.na(current_ci) || is.na(previous_ci) || previous_ci == 0) { - result$interpretation[i] <- "No previous data" - next - } - - # Calculate percentage change - pct_change <- ((current_ci - previous_ci) / previous_ci) * 100 - result$mean_ci_pct_change[i] <- round(pct_change, 2) - - # Add interpretation - if (pct_change > 15) { - result$interpretation[i] <- "Rapid growth" - } else if (pct_change > 5) { - result$interpretation[i] <- "Positive growth" - } else if (pct_change > -5) { - result$interpretation[i] <- "Stable" - } else if (pct_change > -15) { - result$interpretation[i] <- "Declining" + if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci) && prev_ci > 0) { + # Calculate percentage change + pct_change <- ((current_ci - prev_ci) / prev_ci) * 100 + result$mean_ci_pct_change[i] <- round(pct_change, 2) + + # Add interpretation + if (pct_change > 15) { + result$interpretation[i] <- "Rapid growth" + } else if (pct_change > 5) { + result$interpretation[i] <- "Positive growth" + } else if (pct_change > -5) { + result$interpretation[i] <- "Stable" + } else if (pct_change > -15) { + result$interpretation[i] <- "Declining" + } else { + result$interpretation[i] <- "Rapid decline" + } } else { - result$interpretation[i] <- "Rapid decline" + result$interpretation[i] <- "No previous data" } } From 2e683d0c6df109a1049ec043ab95f820e1b431e5 Mon Sep 17 00:00:00 2001 From: DimitraVeropoulou Date: Mon, 16 Feb 2026 17:25:03 +0100 Subject: [PATCH 18/30] Refactor KPI report generation to improve data handling and summary table creation; update field details mapping and enhance logging for better traceability. --- r_app/.gitignore | 1 + r_app/80_utils_cane_supply.R | 147 ----------- ..._CI_report_with_kpis_agronomic_support.Rmd | 244 +++++++++++++----- 3 files changed, 176 insertions(+), 216 deletions(-) diff --git a/r_app/.gitignore b/r_app/.gitignore index ec29223..d159461 100644 --- a/r_app/.gitignore +++ b/r_app/.gitignore @@ -8,6 +8,7 @@ renv *.tmp *.swp *.save +*.png # Ignore files related to Rproj .Rproj.user/ diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 6c93221..2d3a7a7 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -166,153 +166,6 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me NA_character_ } -# #' Calculate Gap Filling Score KPI (2σ method) -# #' @param ci_raster Current week CI raster -# #' @param field_boundaries Field boundaries -# #' @return Data frame with field-level gap filling scores -# calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { -# safe_log("Calculating Gap Filling Score KPI (placeholder)") - -# # Handle both sf and SpatVector inputs -# if (!inherits(field_boundaries, "SpatVector")) { -# field_boundaries_vect <- terra::vect(field_boundaries) -# } else { -# field_boundaries_vect <- field_boundaries -# } - -# # Ensure field_boundaries_vect is valid and matches field_boundaries dimensions -# n_fields_vect <- length(field_boundaries_vect) -# n_fields_sf <- nrow(field_boundaries) - -# if (n_fields_sf != n_fields_vect) { -# warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length.")) -# } - -# field_results <- data.frame() - -# for (i in seq_len(nrow(field_boundaries))) { -# field_name <- field_boundaries$field[i] -# sub_field_name <- field_boundaries$sub_field[i] -# field_vect <- field_boundaries_vect[i] - -# # Extract CI values using helper function -# ci_values <- extract_ci_values(ci_raster, field_vect) -# valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)] - -# if (length(valid_values) > 1) { -# # Gap score using 2σ below median to detect outliers -# median_ci <- median(valid_values) -# sd_ci <- sd(valid_values) -# outlier_threshold <- median_ci - (2 * sd_ci) -# low_ci_pixels <- sum(valid_values < outlier_threshold) -# total_pixels <- length(valid_values) -# gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) - -# # Classify gap severity -# gap_level <- dplyr::case_when( -# gap_score < 10 ~ "Minimal", -# gap_score < 25 ~ "Moderate", -# TRUE ~ "Significant" -# ) - -# field_results <- rbind(field_results, data.frame( -# field = field_name, -# sub_field = sub_field_name, -# gap_level = gap_level, -# gap_score = gap_score, -# mean_ci = mean(valid_values), -# outlier_threshold = outlier_threshold -# )) -# } else { -# # Not enough valid data, fill with NA row -# field_results <- rbind(field_results, data.frame( -# field = field_name, -# sub_field = sub_field_name, -# gap_level = NA_character_, -# gap_score = NA_real_, -# mean_ci = NA_real_, -# outlier_threshold = NA_real_ -# )) -# } -# } -# return(list(field_results = field_results)) -# } - - -# #' Calculate gap filling scores for all per-field mosaics -# #' This is a wrapper function that processes multiple per-field mosaic files -# #' and calculates gap scores for each field. -# #' @param per_field_files Character vector of paths to per-field mosaic TIFFs -# #' @param field_boundaries_sf sf object with field geometries -# #' @return data.frame with Field_id and gap_score columns -# calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { -# message("\nCalculating gap filling scores (2σ method)...") -# message(paste(" Using per-field mosaics for", length(per_field_files), "fields")) - -# field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field) - -# process_gap_for_field <- function(field_file) { -# field_id <- basename(dirname(field_file)) -# field_bounds <- field_boundaries_by_id[[field_id]] - -# if (is.null(field_bounds) || nrow(field_bounds) == 0) { -# return(data.frame(Field_id = field_id, gap_score = NA_real_)) -# } - -# tryCatch({ -# field_raster <- terra::rast(field_file) -# ci_band_name <- "CI" -# if (!(ci_band_name %in% names(field_raster))) { -# return(data.frame(Field_id = field_id, gap_score = NA_real_)) -# } -# field_ci_band <- field_raster[[ci_band_name]] -# names(field_ci_band) <- "CI" - -# gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds) - -# if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) { -# return(data.frame(Field_id = field_id, gap_score = NA_real_)) -# } - -# gap_scores <- gap_result$field_results -# gap_scores$Field_id <- gap_scores$field -# gap_scores <- gap_scores[, c("Field_id", "gap_score")] - -# stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE)) -# }, error = function(e) { -# message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message)) -# data.frame(Field_id = field_id, gap_score = NA_real_) -# }) -# } - -# # Process fields sequentially with progress bar -# message(" Processing gap scores for ", length(per_field_files), " fields...") -# pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50) - -# results_list <- lapply(seq_along(per_field_files), function(idx) { -# result <- process_gap_for_field(per_field_files[[idx]]) -# utils::setTxtProgressBar(pb, idx) -# result -# }) -# close(pb) - -# gap_scores_df <- dplyr::bind_rows(results_list) - -# if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) { -# gap_scores_df <- gap_scores_df %>% -# dplyr::group_by(Field_id) %>% -# dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") - -# message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) -# message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", -# round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) -# } else { -# message(" WARNING: No gap scores calculated from per-field mosaics") -# gap_scores_df <- NULL -# } - -# return(gap_scores_df) -# } #' Build complete per-field KPI dataframe with all 22 columns #' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index c2d7d3d..7411e33 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -107,15 +107,15 @@ safe_log(paste("weekly_CI_mosaic path:", weekly_CI_mosaic)) # NO workspace-wide fallback that might load wrong project # Build expected KPI file path strictly from project_dir -kpi_data_dir <- paths$kpi_reports_dir # Should be: laravel_app/storage/app/{project}/reports/kpis/field_level +kpi_data_dir <- file.path(paths$reports_dir, "kpis") # Should be: laravel_app/storage/app/{project}/reports/kpis # Calculate week from report_date current_week <- as.numeric(format(as.Date(report_date), "%V")) current_year <- as.numeric(format(as.Date(report_date), "%G")) # The ACTUAL filename format from 80_calculate_kpis.R output (after fix) -# Format: {project_dir}_kpi_summary_tables_week{WW}_{YYYY}.rds -kpi_rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", +# Format: {project_dir}_field_analysis_week{WW}_{YYYY}.rds +kpi_rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, current_year), ".rds") kpi_rds_path <- file.path(kpi_data_dir, kpi_rds_filename) @@ -139,30 +139,114 @@ if (dir.exists(kpi_data_dir)) { } ) - # Handle new RDS structure (list with $summary_tables, $all_kpis, $field_details) + # Handle RDS structure from 80_utils_agronomic_support.R + # Expected: list(field_analysis = dataframe, kpis = list, summary_tables = list, ...) + # OR just a dataframe (for backward compatibility) + if (!is.null(loaded_data)) { - if (is.list(loaded_data) && "summary_tables" %in% names(loaded_data)) { - # New structure: extract summary_tables from the list - summary_tables <- loaded_data$summary_tables - if (!is.null(loaded_data$field_details)) { - field_details_table <- loaded_data$field_details + # Try to extract field_analysis from different possible structures + if (is.data.frame(loaded_data)) { + # Direct dataframe (simplest case) + field_details_table <- loaded_data + safe_log("✓ Loaded field_analysis dataframe directly") + } else if (is.list(loaded_data)) { + # List structure - try different key names + if ("field_analysis_df" %in% names(loaded_data)) { + field_details_table <- loaded_data$field_analysis_df + safe_log("✓ Loaded field_analysis_df from list") + } else if ("field_analysis" %in% names(loaded_data)) { + field_details_table <- loaded_data$field_analysis + safe_log("✓ Loaded field_analysis from list") + } else if ("kpis" %in% names(loaded_data)) { + # Might be the full output from orchestrator - create combined table + safe_log("✓ Found kpis list in loaded data") + # For now, skip - we need the combined field table + } + + # Also check if summary_tables already exists in the RDS + if ("summary_tables" %in% names(loaded_data)) { + summary_tables <- loaded_data$summary_tables + safe_log("✓ Loaded pre-computed summary_tables from RDS") } - safe_log("✓ Loaded KPI data (new structure with summary_tables)") - kpi_files_exist <- TRUE - } else if (is.list(loaded_data) && length(loaded_data) > 0) { - # Legacy structure: directly use as summary_tables - summary_tables <- loaded_data - safe_log("✓ Loaded KPI tables (legacy structure)") - kpi_files_exist <- TRUE } - if (kpi_files_exist) { - safe_log(paste("✓ Available KPI tables:", paste(names(summary_tables), collapse=", "))) + # If we successfully loaded field_details_table, transform it into summary_tables + if (!is.null(field_details_table) && nrow(field_details_table) > 0) { + safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields")) + safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", "))) + + # Only create summary_tables if not already loaded from RDS + if (is.null(summary_tables)) { + summary_tables <- list() + + # 1. Uniformity summary - GROUP BY Uniformity_Interpretation and COUNT + if ("Uniformity_Interpretation" %in% names(field_details_table)) { + summary_tables$uniformity <- field_details_table %>% + group_by(interpretation = Uniformity_Interpretation) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created uniformity summary") + } + + # 2. Area change summary - GROUP BY Area_Change_Interpretation and COUNT + if ("Area_Change_Interpretation" %in% names(field_details_table)) { + summary_tables$area_change <- field_details_table %>% + group_by(interpretation = Area_Change_Interpretation) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created area_change summary") + } + + # 3. Growth decline summary - GROUP BY Trend_Interpretation and COUNT + if ("Trend_Interpretation" %in% names(field_details_table)) { + summary_tables$growth_decline <- field_details_table %>% + group_by(trend_interpretation = Trend_Interpretation) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created growth_decline summary") + } + + # 4. Weed pressure summary - GROUP BY Weed_Pressure_Risk and COUNT + if ("Weed_Pressure_Risk" %in% names(field_details_table)) { + summary_tables$weed_pressure <- field_details_table %>% + group_by(weed_pressure_risk = Weed_Pressure_Risk) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created weed_pressure summary") + } + + # 5. TCH forecast summary - bin into categories and COUNT + if ("TCH_Forecasted" %in% names(field_details_table)) { + summary_tables$tch_forecast <- field_details_table %>% + filter(!is.na(TCH_Forecasted)) %>% + mutate( + tch_category = case_when( + TCH_Forecasted >= quantile(TCH_Forecasted, 0.75, na.rm = TRUE) ~ "Top 25%", + TCH_Forecasted >= quantile(TCH_Forecasted, 0.25, na.rm = TRUE) ~ "Average", + TRUE ~ "Lowest 25%" + ) + ) %>% + group_by(tch_category) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created tch_forecast summary") + } + + # 6. Gap filling summary - GROUP BY Gap_Level and COUNT + if ("Gap_Level" %in% names(field_details_table)) { + summary_tables$gap_filling <- field_details_table %>% + group_by(gap_level = Gap_Level) %>% + summarise(field_count = n(), .groups = 'drop') + safe_log(" ✓ Created gap_filling summary") + } + + safe_log(paste("✓ Created", length(summary_tables), "summary tables from field_details")) + } + + kpi_files_exist <- TRUE + + } else { + safe_log("ERROR: Could not extract field_analysis dataframe from RDS", "ERROR") } } + } else { - safe_log(paste("KPI file not found in:", kpi_rds_path), "WARNING") - safe_log(paste("Expected file:", kpi_rds_filename), "WARNING") + safe_log(paste("KPI file not found:", kpi_rds_path), "WARNING") safe_log(paste("Files in directory:", paste(list.files(kpi_data_dir, pattern="\\.rds$"), collapse=", ")), "WARNING") } } else { @@ -172,6 +256,20 @@ if (dir.exists(kpi_data_dir)) { if (!kpi_files_exist) { safe_log(paste("Skipping KPI sections - no data for", project_dir, "on", report_date), "WARNING") summary_tables <- NULL + field_details_table <- NULL +} + +# DEBUG: Log what was actually loaded +if (exists("summary_tables") && !is.null(summary_tables)) { + safe_log(paste("✓ summary_tables available with", length(summary_tables), "KPIs")) + for (kpi_name in names(summary_tables)) { + kpi_df <- summary_tables[[kpi_name]] + if (!is.null(kpi_df) && is.data.frame(kpi_df)) { + safe_log(paste(" -", kpi_name, ":", nrow(kpi_df), "rows")) + } + } +} else { + safe_log("WARNING: summary_tables is NULL or does not exist", "WARNING") } ``` @@ -306,7 +404,7 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { ## Key Insights -```{r key_insights, echo=FALSE, results='asis'} +```{r key_insights, echo=TRUE, message=TRUE, warning=TRUE, results='asis'} # Calculate key insights from KPI data if (exists("summary_tables") && !is.null(summary_tables) && length(summary_tables) > 0) { @@ -319,7 +417,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table group_by(interpretation) %>% summarise(count = n(), .groups = 'drop') - for (i in 1:nrow(uniformity_counts)) { + for (i in seq_len(nrow(uniformity_counts))) { status <- uniformity_counts$interpretation[i] count <- uniformity_counts$count[i] if (!is.na(status) && !is.na(count) && count > 0) { @@ -335,7 +433,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table group_by(interpretation) %>% summarise(count = n(), .groups = 'drop') - for (i in 1:nrow(area_counts)) { + for (i in seq_len(nrow(area_counts))) { status <- area_counts$interpretation[i] count <- area_counts$count[i] if (!is.na(status) && !is.na(count) && count > 0) { @@ -367,7 +465,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table group_by(weed_pressure_risk) %>% summarise(count = n(), .groups = 'drop') - for (i in 1:nrow(weed_counts)) { + for (i in seq_len(nrow(weed_counts))) { risk <- weed_counts$weed_pressure_risk[i] count <- weed_counts$count[i] if (!is.na(risk) && !is.na(count) && count > 0) { @@ -398,12 +496,12 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table tryCatch({ # KPI metadata for display kpi_display_order <- list( - uniformity = list(display = "Field Uniformity", level_col = "Status", count_col = "Field Count"), - area_change = list(display = "Area Change", level_col = "Status", count_col = "Field Count"), - tch_forecast = list(display = "TCH Forecasted", level_col = NULL, count_col = "Fields"), - growth_decline = list(display = "Growth Decline", level_col = "Trend", count_col = "Field Count"), - weed_pressure = list(display = "Weed Presence", level_col = "Risk Level", count_col = "Field Count"), - gap_filling = list(display = "Gap Filling", level_col = NULL, count_col = NULL) + uniformity = list(display = "Field Uniformity", level_col = "interpretation", count_col = "field_count"), + area_change = list(display = "Area Change", level_col = "interpretation", count_col = "field_count"), + tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", count_col = "field_count"), + growth_decline = list(display = "Growth Decline", level_col = "trend_interpretation", count_col = "field_count"), + weed_pressure = list(display = "Weed Presence", level_col = "weed_pressure_risk", count_col = "field_count"), + gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count") ) standardize_kpi <- function(df, level_col, count_col) { @@ -479,7 +577,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table } } - print(ft) + ft } else { cat("No valid KPI summary tables found for display.\n") } @@ -494,6 +592,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table } ``` +\newpage ## Field Alerts ```{r field_alerts_table, echo=FALSE, results='asis'} @@ -839,6 +938,7 @@ tryCatch({ }) ``` +\newpage ### Chlorophyll Index (CI) Overview Map - Current Week ```{r render_farm_ci_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} @@ -947,6 +1047,7 @@ tryCatch({ }) ``` +\newpage ### Weekly Chlorophyll Index Difference Map ```{r render_farm_ci_diff_map, echo=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} @@ -1064,10 +1165,10 @@ tryCatch({ This section provides detailed, field-specific analyses including chlorophyll index maps, trend graphs, and performance metrics. Each field is analyzed individually to support targeted interventions. **Key Elements per Field:** -- Current and historical CI maps -- Week-over-week change visualizations -- Cumulative growth trends -- Field-specific KPI summaries +- Current and historical CI maps +- Week-over-week change visualizations +- Cumulative growth trends +- Field-specific KPI summaries *Navigate to the following pages for individual field reports.* @@ -1294,6 +1395,7 @@ tryCatch({ }) ``` +\newpage ## KPI Summary by Field ## Detailed Field Performance Summary @@ -1337,46 +1439,50 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { safe_log("CI quadrant data unavailable - field ages will not be included in detailed table", "WARNING") } -# Clean up the field details table - remove sub field column and round numeric values -# Check if field_details_table was loaded successfully -if (!exists("field_details_table") || is.null(field_details_table)) { - # Initialize empty tibble with expected columns - field_details_clean <- tibble( - Field = character(), - `Field Size (ha)` = numeric(), - `Growth Uniformity` = character(), - `Yield Forecast (t/ha)` = numeric(), - `Gap Score` = numeric(), - `Decline Risk` = character(), - `Weed Risk` = character(), - `Mean CI` = numeric(), - `CV Value` = numeric() - ) +# Transform field_details_table to display format with proper column names +if (!exists("field_details_table") || is.null(field_details_table) || nrow(field_details_table) == 0) { + safe_log("No field details available for table", "WARNING") + cat("No field-level KPI data available for this report period.\n") + } else { + # Map raw KPI columns to display names field_details_clean <- field_details_table %>% + mutate( + Field = Field_id, + `Field Size (ha)` = NA_real_, # Not available in KPI output, would need to come from boundaries + `Growth Uniformity` = Uniformity_Interpretation, + `Yield Forecast (t/ha)` = TCH_Forecasted, + `Gap Score` = Gap_Score, + `Decline Risk` = Decline_Severity, + `Weed Risk` = Weed_Pressure_Risk, + `Mean CI` = Mean_CI, + `CV Value` = CV + ) %>% left_join(field_ages, by = "Field") %>% mutate( - `Yield Forecast (t/ha)` = ifelse(is.na(Age_days) | Age_days < 240, NA_real_, `Yield Forecast (t/ha)`) + # Only show yield forecast for fields >= 240 days old + `Yield Forecast (t/ha)` = if_else(is.na(Age_days) | Age_days < 240, + NA_real_, + `Yield Forecast (t/ha)`), + # Round numeric columns + `Mean CI` = round(`Mean CI`, 2), + `CV Value` = round(`CV Value`, 2), + `Gap Score` = round(`Gap Score`, 0), + `Yield Forecast (t/ha)` = round(`Yield Forecast (t/ha)`, 1) ) %>% - select(Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, `Gap Score`, `Decline Risk`, `Weed Risk`, `Mean CI`, `CV Value`) %>% # Reorder columns as requested - mutate( - `Mean CI` = round(`Mean CI`, 2), # Round to 2 decimal places - `CV Value` = round(`CV Value`, 2), # Round to 2 decimal places - `Gap Score` = round(`Gap Score`, 0) # Round to nearest integer - ) + select(Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, + `Gap Score`, `Decline Risk`, `Weed Risk`, `Mean CI`, `CV Value`) + + # Display the cleaned field table with flextable + col_widths <- c(0.97, 0.73, 0.80, 0.80, 0.65, 0.73, 0.65, 0.56, 0.48) + + ft <- flextable(field_details_clean) %>% + set_caption("Detailed Field Performance Summary") %>% + width(width = col_widths) %>% + theme_booktabs() + + knit_print(ft) } - - -# Display the cleaned field table with flextable -# Set column widths to fit page (approximately 6.5 inches for 1-inch margins) -# Scale widths proportionally: original total = 8.0 inches, scale to 6.2 inches -col_widths <- c(0.97, 0.73, 0.80, 0.80, 0.65, 0.73, 0.65, 0.56, 0.48) # inches for each column - -ft <- flextable(field_details_clean) %>% - set_caption("Detailed Field Performance Summary") %>% - width(width = col_widths) - -ft ``` \newpage From e4e19df0c71219a679577ed6f187a8704ef9be14 Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 17 Feb 2026 13:46:43 +0100 Subject: [PATCH 19/30] Update CI report parameters and improve map legend configurations - Changed report date in CI report for cane supply to "2026-02-04". - Updated output file naming convention for agronomic support report to reflect new report date. - Enhanced map creation functions to allow customizable legend positions and improved layout settings. - Adjusted widths for map arrangements to ensure better visual representation. - Fixed minor issues in ggplot aesthetics for clearer legend positioning and improved readability. - Corrected field size unit from hectares to acres in KPI summary generation. --- python_app/.gitignore | 1 - r_app/80_utils_cane_supply.R | 12 +- ..._CI_report_with_kpis_agronomic_support.Rmd | 310 +++++++++--------- r_app/91_CI_report_with_kpis_cane_supply.Rmd | 8 +- r_app/MANUAL_PIPELINE_RUNNER.R | 6 +- r_app/report_utils.R | 226 ++++++++----- 6 files changed, 319 insertions(+), 244 deletions(-) diff --git a/python_app/.gitignore b/python_app/.gitignore index 5199317..4fca4c8 100644 --- a/python_app/.gitignore +++ b/python_app/.gitignore @@ -39,7 +39,6 @@ dist/ *.bak *.swp *.swo -*.swp *.png diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 2d3a7a7..ad2cdf5 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -49,14 +49,14 @@ calculate_field_acreages <- function(field_boundaries_sf) { ) # Calculate area for valid geometries - for (idx in which(lookup_df$geometry_valid)) { + valid_indices <- which(lookup_df$geometry_valid) + areas_ha <- vapply(valid_indices, function(idx) { tryCatch({ area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) - lookup_df$area_ha[idx] <- area_m2 / 10000 - }, error = function(e) { - lookup_df$area_ha[idx] <<- NA_real_ - }) - } + area_m2 / 10000 + }, error = function(e) NA_real_) + }, numeric(1)) + lookup_df$area_ha[valid_indices] <- areas_ha # Convert hectares to acres lookup_df %>% diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 7411e33..0aa02f7 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -3,7 +3,7 @@ params: ref: "word-styles-reference-var1.docx" output_file: "CI_report.docx" report_date: !r Sys.Date() - data_dir: "angata" + data_dir: "aura" mail_day: "Wednesday" borders: FALSE ci_plot_type: "both" @@ -61,6 +61,10 @@ suppressPackageStartupMessages({ library(officer) # For Word document manipulation (custom formatting, headers, footers) }) +# Configure tmap for static plotting (required for legend.outside to work) +tmap_mode("plot") # CRITICAL: Must be "plot" mode for legends outside to render properly +tmap_options(component.autoscale = FALSE) + # Load custom utility functions tryCatch({ source("report_utils.R") @@ -271,6 +275,8 @@ if (exists("summary_tables") && !is.null(summary_tables)) { } else { safe_log("WARNING: summary_tables is NULL or does not exist", "WARNING") } + +# summary_tables # Uncomment for debugging ``` ```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE} @@ -388,6 +394,15 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { } ``` + +::: {custom-style="Cover_title" style="text-align:center; margin-top:120px;"} +Satellite Based Field Reporting +::: + +::: {custom-style="Cover_subtitle" style="text-align:center; margin-top:18px;"} +Chlorophyll Index (CI) Monitoring Report — `r toupper(params$data_dir)` Farm (Week `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); if (!is.null(params$week)) params$week else format(rd, '%V') }`, `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); format(rd, '%Y') }`) +::: + ## Report Summary **Farm Location:** `r toupper(project_dir)` Estate @@ -404,7 +419,7 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { ## Key Insights -```{r key_insights, echo=TRUE, message=TRUE, warning=TRUE, results='asis'} +```{r key_insights, echo=FALSE, results='asis'} # Calculate key insights from KPI data if (exists("summary_tables") && !is.null(summary_tables) && length(summary_tables) > 0) { @@ -414,8 +429,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table if (!is.null(summary_tables$uniformity) && nrow(summary_tables$uniformity) > 0) { cat("**Field Uniformity:**\n") uniformity_counts <- summary_tables$uniformity %>% - group_by(interpretation) %>% - summarise(count = n(), .groups = 'drop') + dplyr::select(interpretation, count = field_count) for (i in seq_len(nrow(uniformity_counts))) { status <- uniformity_counts$interpretation[i] @@ -430,8 +444,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table if (!is.null(summary_tables$area_change) && nrow(summary_tables$area_change) > 0) { cat("\n**Area Change Status:**\n") area_counts <- summary_tables$area_change %>% - group_by(interpretation) %>% - summarise(count = n(), .groups = 'drop') + dplyr::select(interpretation, count = field_count) for (i in seq_len(nrow(area_counts))) { status <- area_counts$interpretation[i] @@ -446,10 +459,9 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table if (!is.null(summary_tables$growth_decline) && nrow(summary_tables$growth_decline) > 0) { cat("\n**Growth Trends (4-Week):**\n") growth_counts <- summary_tables$growth_decline %>% - group_by(trend_interpretation) %>% - summarise(count = n(), .groups = 'drop') + dplyr::select(trend_interpretation, count = field_count) - for (i in 1:nrow(growth_counts)) { + for (i in seq_len(nrow(growth_counts))) { trend <- growth_counts$trend_interpretation[i] count <- growth_counts$count[i] if (!is.na(trend) && !is.na(count) && count > 0) { @@ -462,8 +474,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table if (!is.null(summary_tables$weed_pressure) && nrow(summary_tables$weed_pressure) > 0) { cat("\n**Weed/Pest Pressure Risk:**\n") weed_counts <- summary_tables$weed_pressure %>% - group_by(weed_pressure_risk) %>% - summarise(count = n(), .groups = 'drop') + dplyr::select(weed_pressure_risk, count = field_count) for (i in seq_len(nrow(weed_counts))) { risk <- weed_counts$weed_pressure_risk[i] @@ -475,7 +486,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table } # 5. Total fields analyzed - total_fields <- nrow(summary_tables$uniformity) + total_fields <- sum(summary_tables$uniformity$field_count, na.rm = TRUE) cat("\n**Total Fields Analyzed:** ", total_fields, "\n", sep="") } else { @@ -515,11 +526,11 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table dplyr::transmute( Level = as.character(.data[[level_col]]), Count = as.integer(round(as.numeric(.data[[count_col]]))), - Percent = dplyr::if_else( - is.na(total), - NA_real_, + Percent = if (is.na(total)) { + NA_real_ + } else { round(Count / total * 100, 1) - ) + } ) } @@ -554,21 +565,22 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table if (nrow(combined_df) > 0) { combined_df <- combined_df %>% + dplyr::mutate(KPI_group = KPI) %>% dplyr::group_by(KPI) %>% dplyr::mutate( KPI_display = if_else(dplyr::row_number() == 1, KPI, "") ) %>% - dplyr::ungroup() %>% + dplyr::ungroup() + + kpi_group_sizes <- rle(combined_df$KPI_group)$lengths + + display_df <- combined_df %>% dplyr::select(KPI = KPI_display, Level, Count, Percent) - ft <- flextable(combined_df) %>% + ft <- flextable(display_df) %>% merge_v(j = "KPI") %>% autofit() - kpi_group_sizes <- combined_df %>% - dplyr::group_by(KPI) %>% - dplyr::tally() %>% - dplyr::pull(n) cum_rows <- cumsum(kpi_group_sizes) for (i in seq_along(cum_rows)) { if (i < length(cum_rows)) { @@ -603,7 +615,7 @@ generate_field_alerts <- function(field_details_table) { } # Check for required columns - required_cols <- c("Field", "Field Size (ha)", "Growth Uniformity", "Yield Forecast (t/ha)", + required_cols <- c("Field", "Field Size (acres)", "Growth Uniformity", "Yield Forecast (t/ha)", "Gap Score", "Decline Risk", "Weed Risk", "Mean CI", "CV Value", "Moran's I") missing_cols <- setdiff(required_cols, colnames(field_details_table)) @@ -623,7 +635,7 @@ generate_field_alerts <- function(field_details_table) { # Aggregate data for the field field_summary <- field_data %>% summarise( - field_size = sum(`Field Size (ha)`, na.rm = TRUE), + field_size = sum(`Field Size (acres)`, na.rm = TRUE), uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"), avg_yield_forecast = mean(`Yield Forecast (t/ha)`, na.rm = TRUE), max_gap_score = max(`Gap Score`, na.rm = TRUE), @@ -765,7 +777,7 @@ if (!exists("field_details_table") || is.null(field_details_table)) { # Try to calculate field sizes (area) from geometry if available field_sizes <- if (!is.null(sf::st_geometry(AllPivots0)) && !all(sf::st_is_empty(sf::st_geometry(AllPivots0)))) { - sf::st_area(sf::st_geometry(AllPivots0)) / 10000 # Convert m² to hectares + sf::st_area(sf::st_geometry(AllPivots0)) / 4046.86 # Convert m² to acres (1 acre = 4046.86 m²) } else { rep(NA_real_, length(field_names)) } @@ -773,7 +785,7 @@ if (!exists("field_details_table") || is.null(field_details_table)) { # Create minimal field details table with actual data we have + NAs for missing KPI columns field_details_table <- tibble::tibble( Field = field_names, - `Field Size (ha)` = as.numeric(field_sizes), + `Field Size (acres)` = as.numeric(field_sizes), `Growth Uniformity` = NA_character_, `Yield Forecast (t/ha)` = NA_real_, `Gap Score` = NA_real_, @@ -791,8 +803,6 @@ if (!exists("field_details_table") || is.null(field_details_table)) { } ``` -## Farm-Level Overview Maps - ```{r aggregate_farm_level_rasters, message=FALSE, warning=FALSE, include=FALSE} # Aggregate per-field weekly mosaics into single farm-level rasters for visualization # This creates on-the-fly mosaics for current week and historical weeks without saving intermediate files @@ -939,9 +949,8 @@ tryCatch({ ``` \newpage -### Chlorophyll Index (CI) Overview Map - Current Week -```{r render_farm_ci_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} +```{r render_farm_ci_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=6.8, fig.width=8.5, dpi=150, dev='png'} # Create farm-level chlorophyll index map with OpenStreetMap basemap tryCatch({ if (!is.null(farm_ci_current_ll)) { @@ -1015,13 +1024,13 @@ tryCatch({ map <- map + # Add scale bar and theme ggspatial::annotation_scale( - location = "br", + location = "tr", width_hint = 0.25 ) + ggplot2::theme_void() + ggplot2::theme( - legend.position = "bottom", - legend.direction = "horizontal", + legend.position = "right", + legend.direction = "vertical", legend.title = ggplot2::element_text(size = 10), legend.text = ggplot2::element_text(size = 9), plot.title = ggplot2::element_text(hjust = 0.5, size = 12, face = "bold"), @@ -1047,10 +1056,7 @@ tryCatch({ }) ``` -\newpage -### Weekly Chlorophyll Index Difference Map - -```{r render_farm_ci_diff_map, echo=FALSE, fig.height=5.5, fig.width=6.5, dpi=150, dev='png', message=FALSE, warning=FALSE} +```{r render_farm_ci_diff_map, echo=FALSE, message=FALSE, warning=FALSE, fig.height=6.8, fig.width=8.5, dpi=150, dev='png'} # Create farm-level CI difference map (week-over-week change) tryCatch({ if (!is.null(farm_ci_diff_week_ll)) { @@ -1125,13 +1131,13 @@ tryCatch({ map <- map + # Add scale bar and theme ggspatial::annotation_scale( - location = "br", + location = "tr", width_hint = 0.25 ) + ggplot2::theme_void() + ggplot2::theme( - legend.position = "bottom", - legend.direction = "horizontal", + legend.position = "right", + legend.direction = "vertical", legend.title = ggplot2::element_text(size = 10), legend.text = ggplot2::element_text(size = 9), plot.title = ggplot2::element_text(hjust = 0.5, size = 12, face = "bold"), @@ -1157,8 +1163,6 @@ tryCatch({ }) ``` -\newpage - # Section 2: Field-by-Field Analysis ## Overview of Field-Level Insights @@ -1174,33 +1178,10 @@ This section provides detailed, field-specific analyses including chlorophyll in \newpage -```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=6.5, dpi=150, dev='png', message=TRUE, echo=FALSE, warning=TRUE, include=TRUE, results='asis'} +```{r generate_field_visualizations, echo=FALSE, fig.height=3.8, fig.width=10, dev='png', dpi=150, results='asis'} # Generate detailed visualizations for each field using purrr::walk -# DIAGNOSTIC MODE - Remove this after debugging -cat("\n## DIAGNOSTIC: Starting field visualization processing\n\n") tryCatch({ - # Check prerequisites - cat("- Fields to process:", nrow(AllPivots_merged), "\n") - cat("- Field names:", paste(AllPivots_merged$field, collapse = ", "), "\n") - cat("- Weekly mosaic directory:", weekly_CI_mosaic, "\n") - cat("- CI quadrant data available:", !is.null(CI_quadrant), "\n") - cat("- Harvesting data available:", !is.null(harvesting_data), "\n\n") - - # Check if ci_plot function exists - if (!exists("ci_plot")) { - cat("**ERROR: ci_plot() function not found!**\n\n") - stop("ci_plot function missing") - } - - if (!exists("cum_ci_plot")) { - cat("**ERROR: cum_ci_plot() function not found!**\n\n") - stop("cum_ci_plot function missing") - } - - cat("- ci_plot() function:", "FOUND", "\n") - cat("- cum_ci_plot() function:", "FOUND", "\n\n") - # Prepare merged field list and week/year info AllPivots_merged <- AllPivots0 %>% dplyr::filter(!is.na(field), !is.na(sub_field)) %>% @@ -1227,9 +1208,7 @@ tryCatch({ # Helper function to safely load per-field mosaic if it exists load_per_field_mosaic <- function(base_dir, field_name, week, year) { path <- file.path(base_dir, field_name, paste0("week_", sprintf("%02d", week), "_", year, ".tif")) - cat(paste(" [DEBUG] Field:", field_name, "trying path:", path, "\n")) if (file.exists(path)) { - cat(paste(" ✓ File found\n")) tryCatch({ rast_obj <- terra::rast(path) # Extract CI band if present, otherwise first band @@ -1242,8 +1221,6 @@ tryCatch({ message(paste("Warning: Could not load", path, ":", e$message)) return(NULL) }) - } else { - cat(paste(" ✗ File NOT found\n")) } return(NULL) } @@ -1254,7 +1231,7 @@ tryCatch({ tryCatch({ # Add page break before each field (except first) if (!is_first_field) { - cat("\\newpage\n\n") + cat("\\newpage\n") } is_first_field <<- FALSE @@ -1301,7 +1278,7 @@ tryCatch({ borders = borders, colorblind_friendly = colorblind_friendly ) - cat("\n\n") + #cat("\n\n") } else { message(paste("Warning: No raster data found for field", field_name)) } @@ -1332,20 +1309,51 @@ tryCatch({ benchmark_percentiles = c(10, 50, 90), benchmark_data = benchmarks ) - cat("\n\n") + #cat("\n") } # Add field-specific KPI summary if available - # NOTE: generate_field_kpi_summary function not yet implemented - # Skipping field-level KPI text for now; KPI tables are available in Section 1 - if (FALSE) { # Disabled pending function implementation - # if (exists("field_details_table") && !is.null(field_details_table) && nrow(field_details_table) > 0) { - # kpi_summary <- generate_field_kpi_summary(field_name, field_details_table, CI_quadrant) - # if (!is.null(kpi_summary)) { - # cat(kpi_summary) - # cat("\n\n") - # } - # } + if (exists("field_details_table") && !is.null(field_details_table) && nrow(field_details_table) > 0) { + field_kpi <- field_details_table %>% + dplyr::filter(Field_id == field_name) + + if (nrow(field_kpi) > 0) { + # Format KPIs as compact single line (no interpretations, just values) + kpi_parts <- c( + sprintf("**CV:** %.2f", field_kpi$CV), + sprintf("**Mean CI:** %.2f", field_kpi$Mean_CI) + ) + + # Add Weekly_CI_Change if available (note: capital C and I) + if (!is.null(field_kpi$Weekly_CI_Change) && !is.na(field_kpi$Weekly_CI_Change)) { + change_sign <- ifelse(field_kpi$Weekly_CI_Change >= 0, "+", "") + kpi_parts <- c(kpi_parts, sprintf("**Δ CI:** %s%.2f", change_sign, field_kpi$Weekly_CI_Change)) + } + + # Compact trend display with symbols + trend_compact <- case_when( + grepl("Strong growth", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↑↑", + grepl("Growth|Increasing", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↑", + grepl("Stable|No growth", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "→", + grepl("Slight decline", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↓", + grepl("Strong decline|Severe", field_kpi$Trend_Interpretation, ignore.case = TRUE) ~ "↓↓", + TRUE ~ field_kpi$Trend_Interpretation + ) + kpi_parts <- c(kpi_parts, sprintf("**Trend:** %s", trend_compact)) + + if (!is.na(field_kpi$TCH_Forecasted) && field_kpi$TCH_Forecasted > 0) { + kpi_parts <- c(kpi_parts, sprintf("**Yield:** %.1f t/ha", field_kpi$TCH_Forecasted)) + } + + kpi_parts <- c( + kpi_parts, + sprintf("**Gap:** %.0f", field_kpi$Gap_Score), + sprintf("**Weed:** %s", field_kpi$Weed_Pressure_Risk), + sprintf("**Decline:** %s", field_kpi$Decline_Severity) + ) + + cat(paste(kpi_parts, collapse = " | "), "\n") + } } }, error = function(e) { @@ -1396,90 +1404,96 @@ tryCatch({ ``` \newpage -## KPI Summary by Field - -## Detailed Field Performance Summary +## Detailed Field Performance Summary by Field The following table provides a comprehensive overview of all monitored fields with their key performance metrics from the KPI analysis. ```{r detailed_field_table, echo=FALSE, results='asis'} # Detailed field performance table -report_date_obj <- as.Date(report_date) -# Initialize empty dataframe for field_ages if CI_quadrant is unavailable -field_ages <- data.frame(Field = character(), Age_days = numeric()) - -# Try to get field ages from CI quadrant if available -if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { - tryCatch({ - # Identify the current season for each field based on report_date - current_seasons <- CI_quadrant %>% - filter(Date <= report_date_obj) %>% - group_by(field, season) %>% - summarise( - season_start = min(Date), - season_end = max(Date), - .groups = 'drop' - ) %>% - group_by(field) %>% - filter(season == max(season)) %>% - select(field, season) - - # Get current field ages (most recent DOY for each field in their CURRENT SEASON only) - field_ages <- CI_quadrant %>% - inner_join(current_seasons, by = c("field", "season")) %>% - group_by(field) %>% - filter(DOY == max(DOY)) %>% - select(field, DOY) %>% - rename(Field = field, Age_days = DOY) - }, error = function(e) { - safe_log(paste("Error extracting field ages:", e$message), "WARNING") - }) -} else { - safe_log("CI quadrant data unavailable - field ages will not be included in detailed table", "WARNING") -} - -# Transform field_details_table to display format with proper column names if (!exists("field_details_table") || is.null(field_details_table) || nrow(field_details_table) == 0) { safe_log("No field details available for table", "WARNING") cat("No field-level KPI data available for this report period.\n") } else { - # Map raw KPI columns to display names - field_details_clean <- field_details_table %>% + # Calculate field sizes from boundaries (convert to acres) + field_sizes_source <- if (exists("AllPivots_merged") && inherits(AllPivots_merged, "sf")) AllPivots_merged else AllPivots0 + field_sizes_df <- field_sizes_source %>% mutate( - Field = Field_id, - `Field Size (ha)` = NA_real_, # Not available in KPI output, would need to come from boundaries - `Growth Uniformity` = Uniformity_Interpretation, - `Yield Forecast (t/ha)` = TCH_Forecasted, - `Gap Score` = Gap_Score, - `Decline Risk` = Decline_Severity, - `Weed Risk` = Weed_Pressure_Risk, - `Mean CI` = Mean_CI, - `CV Value` = CV + field_size_acres = as.numeric(sf::st_area(geometry) / 4046.86) # m² to acres ) %>% - left_join(field_ages, by = "Field") %>% + sf::st_drop_geometry() %>% + select(field, field_size_acres) + + # Get field ages from CI quadrant if available + field_ages_df <- if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { + tryCatch({ + # Get current season and age for each field + CI_quadrant %>% + filter(Date <= as.Date(report_date)) %>% + group_by(field, season) %>% + summarise(last_date = max(Date), last_doy = max(DOY), .groups = 'drop') %>% + group_by(field) %>% + filter(season == max(season)) %>% + select(field, Age_days = last_doy) + }, error = function(e) { + data.frame(field = character(), Age_days = numeric()) + }) + } else { + data.frame(field = character(), Age_days = numeric()) + } + + # Join field sizes and ages to KPI data, simplified column selection + field_details_clean <- field_details_table %>% + left_join(field_sizes_df, by = c("Field_id" = "field")) %>% + left_join(field_ages_df, by = c("Field_id" = "field")) %>% mutate( # Only show yield forecast for fields >= 240 days old - `Yield Forecast (t/ha)` = if_else(is.na(Age_days) | Age_days < 240, - NA_real_, - `Yield Forecast (t/ha)`), + TCH_Forecasted = if_else(is.na(Age_days) | Age_days < 240, NA_real_, TCH_Forecasted), # Round numeric columns - `Mean CI` = round(`Mean CI`, 2), - `CV Value` = round(`CV Value`, 2), - `Gap Score` = round(`Gap Score`, 0), - `Yield Forecast (t/ha)` = round(`Yield Forecast (t/ha)`, 1) - ) %>% - select(Field, `Field Size (ha)`, `Growth Uniformity`, `Yield Forecast (t/ha)`, - `Gap Score`, `Decline Risk`, `Weed Risk`, `Mean CI`, `CV Value`) + field_size_acres = round(field_size_acres, 1), + Mean_CI = round(Mean_CI, 2), + CV = round(CV, 2), + Gap_Score = round(Gap_Score, 0), + TCH_Forecasted = round(TCH_Forecasted, 1) + ) - # Display the cleaned field table with flextable - col_widths <- c(0.97, 0.73, 0.80, 0.80, 0.65, 0.73, 0.65, 0.56, 0.48) + # Add Weekly_CI_Change if it exists in the data (note: capital C and I) + if ("Weekly_CI_Change" %in% names(field_details_clean)) { + field_details_clean <- field_details_clean %>% + mutate(Weekly_CI_Change = round(Weekly_CI_Change, 2)) %>% + select( + Field = Field_id, + `Field Size (acres)` = field_size_acres, + `Growth Uniformity` = Uniformity_Interpretation, + `Mean CI` = Mean_CI, + `Weekly CI Change` = Weekly_CI_Change, + `Yield Forecast (t/ha)` = TCH_Forecasted, + `Gap Score` = Gap_Score, + `Decline Risk` = Decline_Severity, + `Weed Risk` = Weed_Pressure_Risk, + `CV Value` = CV + ) + } else { + field_details_clean <- field_details_clean %>% + select( + Field = Field_id, + `Field Size (acres)` = field_size_acres, + `Growth Uniformity` = Uniformity_Interpretation, + `Mean CI` = Mean_CI, + `Yield Forecast (t/ha)` = TCH_Forecasted, + `Gap Score` = Gap_Score, + `Decline Risk` = Decline_Severity, + `Weed Risk` = Weed_Pressure_Risk, + `CV Value` = CV + ) + } + # Display the cleaned field table with flextable (fit to page width) ft <- flextable(field_details_clean) %>% set_caption("Detailed Field Performance Summary") %>% - width(width = col_widths) %>% - theme_booktabs() + theme_booktabs() %>% + set_table_properties(width = 1, layout = "autofit") # Fit to 100% page width with auto-adjust knit_print(ft) } @@ -1595,4 +1609,4 @@ ft <- flextable(metadata_info) %>% ft ``` -*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team.* \ No newline at end of file +*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team at info@smartcane.ag.* \ No newline at end of file diff --git a/r_app/91_CI_report_with_kpis_cane_supply.Rmd b/r_app/91_CI_report_with_kpis_cane_supply.Rmd index 085476d..3536556 100644 --- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd +++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd @@ -2,7 +2,7 @@ params: ref: "word-styles-reference-var1.docx" output_file: CI_report.docx - report_date: "2025-09-30" + report_date: "2026-02-04" data_dir: "angata" mail_day: "Wednesday" borders: FALSE @@ -61,6 +61,10 @@ suppressPackageStartupMessages({ library(flextable) # For formatted tables in Word output (professional table styling) }) +# Configure tmap for static plotting (required for legend.outside to work) +tmap_mode("plot") # CRITICAL: Must be "plot" mode for legends outside to render properly +tmap_options(component.autoscale = FALSE) + # Load custom utility functions tryCatch({ source("r_app/report_utils.R") @@ -1043,4 +1047,4 @@ ft <- flextable(metadata_info) %>% ft ``` -*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team.* \ No newline at end of file +*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team at info@smartcane.ag.* \ No newline at end of file diff --git a/r_app/MANUAL_PIPELINE_RUNNER.R b/r_app/MANUAL_PIPELINE_RUNNER.R index b2c20db..7ea203e 100644 --- a/r_app/MANUAL_PIPELINE_RUNNER.R +++ b/r_app/MANUAL_PIPELINE_RUNNER.R @@ -438,8 +438,8 @@ # rmarkdown::render( rmarkdown::render( "r_app/90_CI_report_with_kpis_agronomic_support.Rmd", - params = list(data_dir = "aura", report_date = as.Date("2022-12-08")), - output_file = "SmartCane_Report_agronomic_support_aura_2022-12-08.docx", + params = list(data_dir = "aura", report_date = as.Date("2026-02-04")), + output_file = "SmartCane_Report_agronomic_support_aura_2026-02-04.docx", output_dir = "laravel_app/storage/app/aura/reports" ) # @@ -450,7 +450,7 @@ rmarkdown::render( rmarkdown::render( "r_app/91_CI_report_with_kpis_cane_supply.Rmd", params = list(data_dir = "angata", report_date = as.Date("2026-02-04")), - output_file = "SmartCane_Report_basemap_test.docx", + output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx", output_dir = "laravel_app/storage/app/angata/reports" ) # diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 855d78c..f4346a6 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -24,7 +24,7 @@ subchunkify <- function(g, fig_height=7, fig_width=5) { "\n`","`` ") - cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE)) + cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE)) } #' Creates a Chlorophyll Index map for a pivot @@ -34,12 +34,13 @@ subchunkify <- function(g, fig_height=7, fig_width=5) { #' @param pivot_spans Additional boundary data for the field #' @param show_legend Whether to show the legend (default: FALSE) #' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE) +#' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom") #' @param week Week number to display in the title #' @param age Age of the crop in weeks #' @param borders Whether to display field borders (default: FALSE) #' @return A tmap object with the CI map #' -create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, colorblind = FALSE){ +create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week, age, borders = FALSE, colorblind = FALSE){ # Input validation if (missing(pivot_raster) || is.null(pivot_raster)) { stop("pivot_raster is required") @@ -64,26 +65,29 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (fixed scale 8-1 for consistent comparison, reversed) - map <- map + tm_raster("CI", - col_scale = tm_scale_continuous(values = palette, - limits = c(1,8)), - col_legend = tm_legend(title = "CI", - orientation = if(legend_is_portrait) "portrait" else "landscape", - show = show_legend, - position = if(show_legend) tm_pos_out("left", "center") else c("left", "bottom"), - reverse = TRUE - )) + map <- map + tm_raster( + "CI", + col.scale = tm_scale_continuous( + values = palette, + limits = c(1, 8), + ticks = seq(1, 8, by = 1), + outliers.trunc = c(TRUE, TRUE) + ), + col.legend = tm_legend( + title = "CI", + orientation = if (legend_is_portrait) "portrait" else "landscape", + show = show_legend, + position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"), + reverse = TRUE + ) + ) # Add layout elements - map <- map + tm_title(text = paste0("Max CI week ", week,"\n", age, " weeks (", age * 7, " days) old"), - size = 0.7) - # Add layout configuration to prevent legend rescaling - map <- map + tm_layout(legend.position = c("left", "bottom"), - legend.outside = FALSE, - inner.margins = 0.05, - asp = 1) # Force 1:1 aspect ratio for consistent sizing - - # Add bounds/view settings for fixed aspect ratio - map <- map + tm_view(asp = 1) + map <- map + tm_layout( + main.title = paste0("Max CI week ", week,"\n", age, " weeks (", age * 7, " days) old"), + main.title.size = 0.7, + #legend.height = 0.85, # Constrain vertical legend height to not exceed map + asp = 1 # Fixed aspect ratio + ) # Add borders if requested if (borders) { @@ -105,13 +109,14 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = #' @param pivot_spans Additional boundary data for the field #' @param show_legend Whether to show the legend (default: FALSE) #' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE) +#' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom") #' @param week_1 First week number for comparison #' @param week_2 Second week number for comparison #' @param age Age of the crop in weeks #' @param borders Whether to display field borders (default: TRUE) #' @return A tmap object with the CI difference map #' -create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, colorblind = FALSE){ +create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week_1, week_2, age, borders = TRUE, colorblind = FALSE){ # Input validation if (missing(pivot_raster) || is.null(pivot_raster)) { stop("pivot_raster is required") @@ -136,27 +141,30 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale, reversed) - map <- map + tm_raster("CI", - col_scale = tm_scale_continuous(values = palette, - midpoint = 0, - limits = c(-3, 3)), - col_legend = tm_legend(title = "CI diff.", - orientation = if(legend_is_portrait) "portrait" else "landscape", - show = show_legend, - position = if(show_legend) tm_pos_out("right", "center") else c("left", "bottom"), - reverse = TRUE - )) + map <- map + tm_raster( + "CI", + col.scale = tm_scale_continuous( + values = palette, + limits = c(-3, 3), + ticks = seq(-3, 3, by = 1), + midpoint = 0, + outliers.trunc = c(TRUE, TRUE) + ), + col.legend = tm_legend( + title = "CI diff.", + orientation = if (legend_is_portrait) "portrait" else "landscape", + show = show_legend, + position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"), + reverse = TRUE + ) + ) # Add layout elements - map <- map + tm_title(text = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"), - size = 0.7) - # Add layout configuration to prevent legend rescaling - map <- map + tm_layout(legend.position = c("right", "bottom"), - legend.outside = FALSE, - inner.margins = 0.05, - asp = 1) # Force 1:1 aspect ratio for consistent sizing - - # Add bounds/view settings for fixed aspect ratio - map <- map + tm_view(asp = 1) + map <- map + tm_layout( + main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"), + main.title.size = 0.7, + #legend.height = 0.85, # Constrain vertical legend height to not exceed map + asp = 1 # Fixed aspect ratio + ) # Add borders if requested if (borders) { @@ -271,18 +279,16 @@ ci_plot <- function(pivotName, # Create historical maps only if data is available # Build list with all available maps - order matches original: [m2, m1, current, diff_1w, diff_3w] - # Widths match original hardcoded: c(0.23, 0.18, 0.18, 0.18, 0.23) maps_to_arrange <- list() - widths_to_use <- c() field_heading_note <- "" # Try to create 2-week ago map (legend on left) if (!is.null(singlePivot_m2)) { CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2, show_legend = TRUE, legend_is_portrait = TRUE, + legend_position = "left", week = week_minus_2, age = age - 2, borders = borders, colorblind = colorblind_friendly) maps_to_arrange <- c(maps_to_arrange, list(CImap_m2)) - widths_to_use <- c(widths_to_use, 0.24) } # Try to create 1-week ago map @@ -291,12 +297,10 @@ ci_plot <- function(pivotName, show_legend = FALSE, legend_is_portrait = FALSE, week = week_minus_1, age = age - 1, borders = borders, colorblind = colorblind_friendly) maps_to_arrange <- c(maps_to_arrange, list(CImap_m1)) - widths_to_use <- c(widths_to_use, 0.17) } # Always add current week map (center position) maps_to_arrange <- c(maps_to_arrange, list(CImap)) - widths_to_use <- c(widths_to_use, 0.17) # Try to create 1-week difference map if (!is.null(abs_CI_last_week)) { @@ -304,21 +308,17 @@ ci_plot <- function(pivotName, show_legend = FALSE, legend_is_portrait = FALSE, week_1 = week, week_2 = week_minus_1, age = age, borders = borders, colorblind = colorblind_friendly) maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_last_week)) - widths_to_use <- c(widths_to_use, 0.17) } # Try to create 3-week difference map (legend on right) if (!is.null(abs_CI_three_week)) { CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2, show_legend = TRUE, legend_is_portrait = TRUE, + legend_position = "right", week_1 = week, week_2 = week_minus_3, age = age, borders = borders, colorblind = colorblind_friendly) maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_three_week)) - widths_to_use <- c(widths_to_use, 0.24) } - # Normalize widths to sum to 1 - widths_to_use <- widths_to_use / sum(widths_to_use) - # Add note if historical data is limited if (length(maps_to_arrange) == 1) { field_heading_note <- " (Current week only - historical data not yet available)" @@ -326,8 +326,21 @@ ci_plot <- function(pivotName, field_heading_note <- " (Limited historical data)" } - # Arrange the maps with normalized widths - tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths_to_use))) + # Arrange the maps in a row with more width for first and last (for legends) + # Give maps with legends (1st and 5th) more space: 23%, middle maps get 18% each + widths <- if (length(maps_to_arrange) == 5) { + c(0.23, 0.18, 0.18, 0.18, 0.23) + } else if (length(maps_to_arrange) == 4) { + c(0.25, 0.25, 0.25, 0.25) # Equal if only 4 maps + } else if (length(maps_to_arrange) == 3) { + c(0.33, 0.33, 0.34) # Equal if only 3 maps + } else if (length(maps_to_arrange) == 2) { + c(0.5, 0.5) # Equal if only 2 maps + } else { + NULL # Single map or other cases + } + + tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths))) # Output heading and map to R Markdown age_months <- round(age / 4.348, 1) @@ -448,7 +461,14 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " if (facet_on) { g <- ggplot2::ggplot(data = plot_data) + ggplot2::facet_wrap(~season, scales = "free_x") + - ggplot2::geom_line(ggplot2::aes_string(x = x_var, y = "ci_value", col = "sub_field", group = "sub_field")) + + ggplot2::geom_line( + ggplot2::aes( + x = .data[[x_var]], + y = .data[["ci_value"]], + col = .data[["sub_field"]], + group = .data[["sub_field"]] + ) + ) + ggplot2::labs(title = paste("Plot of", y_label), color = "Field Name", y = y_label, @@ -458,10 +478,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " breaks = scales::breaks_pretty(), labels = function(x) round(as.numeric(x - min(x)) / 30.44, 1))) + ggplot2::theme_minimal() + - ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5), + ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5), axis.text.x.top = ggplot2::element_text(hjust = 0.5), axis.title.x.top = ggplot2::element_text(size = 8), - legend.justification = c(1, 0), legend.position = c(1, 0), + legend.justification = c(1, 0), + legend.position = "inside", + legend.position.inside = c(1, 0), legend.title = ggplot2::element_text(size = 8), legend.text = ggplot2::element_text(size = 8)) + ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE)) @@ -490,22 +512,36 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " ) ggplot2::geom_smooth( data = benchmark_subset, - ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"), - color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE + ggplot2::aes( + x = .data[["benchmark_x"]], + y = .data[["benchmark_value"]], + group = factor(.data[["percentile"]]) + ), + color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE ) } } + # Plot older seasons with lighter lines ggplot2::geom_line( data = plot_data %>% dplyr::filter(!is_latest), - ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"), - size = 0.7, alpha = 0.4 + ggplot2::aes( + x = .data[[x_var]], + y = .data[["ci_value"]], + col = .data[["season"]], + group = .data[["season"]] + ), + linewidth = 0.7, alpha = 0.4 ) + # Plot latest season with thicker, more prominent line ggplot2::geom_line( data = plot_data %>% dplyr::filter(is_latest), - ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"), - size = 1.5, alpha = 1 + ggplot2::aes( + x = .data[[x_var]], + y = .data[["ci_value"]], + col = .data[["season"]], + group = .data[["season"]] + ), + linewidth = 1.5, alpha = 1 ) + ggplot2::labs(title = paste("Plot of", y_label, "for Field", pivotName, title_suffix), color = "Season", @@ -520,10 +556,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " } } + ggplot2::theme_minimal() + - ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5), + ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5), axis.text.x.top = ggplot2::element_text(hjust = 0.5), axis.title.x.top = ggplot2::element_text(size = 8), - legend.justification = c(1, 0), legend.position = c(1, 0), + legend.justification = c(1, 0), + legend.position = "inside", + legend.position.inside = c(1, 0), legend.title = ggplot2::element_text(size = 8), legend.text = ggplot2::element_text(size = 8)) + ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE)) @@ -597,8 +635,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " ) ggplot2::geom_smooth( data = benchmark_subset, - ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"), - color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE + ggplot2::aes( + x = .data[["benchmark_x"]], + y = .data[["benchmark_value"]], + group = factor(.data[["percentile"]]) + ), + color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE ) } } + @@ -606,14 +648,24 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " # Plot older seasons with lighter lines ggplot2::geom_line( data = plot_data_both %>% dplyr::filter(!is_latest), - ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"), - size = 0.7, alpha = 0.4 + ggplot2::aes( + x = .data[[x_var]], + y = .data[["ci_value"]], + col = .data[["season"]], + group = .data[["season"]] + ), + linewidth = 0.7, alpha = 0.4 ) + # Plot latest season with thicker, more prominent line ggplot2::geom_line( data = plot_data_both %>% dplyr::filter(is_latest), - ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"), - size = 1.5, alpha = 1 + ggplot2::aes( + x = .data[[x_var]], + y = .data[["ci_value"]], + col = .data[["season"]], + group = .data[["season"]] + ), + linewidth = 1.5, alpha = 1 ) + ggplot2::labs(title = paste("CI Analysis for Field", pivotName), color = "Season", @@ -630,12 +682,14 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " } } + ggplot2::theme_minimal() + - ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5), - axis.text.x.top = ggplot2::element_text(hjust = 0.5), - axis.title.x.top = ggplot2::element_text(size = 8), - legend.justification = c(1, 0), legend.position = c(1, 0), - legend.title = ggplot2::element_text(size = 8), - legend.text = ggplot2::element_text(size = 8)) + + ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5), + axis.text.x.top = ggplot2::element_text(hjust = 0.5), + axis.title.x.top = ggplot2::element_text(size = 8), + legend.justification = c(1, 0), + legend.position = "inside", + legend.position.inside = c(1, 0), + legend.title = ggplot2::element_text(size = 8), + legend.text = ggplot2::element_text(size = 8)) + ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE)) # For the rolling mean data, we want to set reasonable y-axis limits @@ -653,9 +707,11 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]])) g_both <- g_both + - ggplot2::geom_point(data = dummy_data, - ggplot2::aes_string(x = x_var, y = "ci_value"), - alpha = 0, size = 0) # Invisible points to set scale + ggplot2::geom_point( + data = dummy_data, + ggplot2::aes(x = .data[[x_var]], y = .data[["ci_value"]]), + alpha = 0, size = 0 + ) # Invisible points to set scale # Display the combined faceted plot subchunkify(g_both, 2.8, 10) @@ -692,9 +748,11 @@ cum_ci_plot2 <- function(pivotName){ x = "Date", y = "CI Rate") + theme_minimal() + theme(axis.text.x = element_text(hjust = 0.5), - legend.justification = c(1, 0), legend.position = c(1, 0), - legend.title = element_text(size = 8), - legend.text = element_text(size = 8)) + + legend.justification = c(1, 0), + legend.position = "inside", + legend.position.inside = c(1, 0), + legend.title = element_text(size = 8), + legend.text = element_text(size = 8)) + annotate("text", x = midpoint_date, y = 2, label = "No data available", size = 6, hjust = 0.5) subchunkify(g, 3.2, 10) @@ -1076,7 +1134,7 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr # For categorical data, take the most common value or highest risk level field_summary <- field_data %>% summarise( - field_size = sum(`Field Size (ha)`, na.rm = TRUE), + field_size = sum(`Field Size (acres)`, na.rm = TRUE), uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"), avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)), max_gap_score = max(`Gap Score`, na.rm = TRUE), From 14bd0fa47a51d69eaf30c2de4a53db9bf4b10859 Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 17 Feb 2026 14:00:32 +0100 Subject: [PATCH 20/30] Normalize field detail column names for consistency and improve centroid calculation in cane supply report --- ..._CI_report_with_kpis_agronomic_support.Rmd | 79 ++++++++++++++++--- r_app/91_CI_report_with_kpis_cane_supply.Rmd | 34 ++------ 2 files changed, 73 insertions(+), 40 deletions(-) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 0aa02f7..0b33379 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -2,7 +2,7 @@ params: ref: "word-styles-reference-var1.docx" output_file: "CI_report.docx" - report_date: !r Sys.Date() + report_date: "2026-02-04" #!r Sys.Date() data_dir: "aura" mail_day: "Wednesday" borders: FALSE @@ -179,6 +179,41 @@ if (dir.exists(kpi_data_dir)) { safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields")) safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", "))) + # NORMALIZATION: Ensure critical column names match downstream expectations + # Handle both "Field" and "Field_id" naming conventions + if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { + field_details_table <- field_details_table %>% + dplyr::rename(Field_id = Field) + safe_log(" ✓ Normalized: renamed Field → Field_id") + } + + # Normalize other common column name variations + column_mappings <- list( + c("CV Value", "CV"), + c("CV", "CV"), # Keep as-is + c("Mean CI", "Mean_CI"), + c("Mean_CI", "Mean_CI"), # Keep as-is + c("Yield Forecast (t/ha)", "TCH_Forecasted"), + c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is + c("Gap Score", "Gap_Score"), + c("Gap_Score", "Gap_Score"), # Keep as-is + c("Growth Uniformity", "Growth_Uniformity"), + c("Decline Risk", "Decline_Risk"), + c("Weed Risk", "Weed_Risk"), + c("Moran's I", "Morans_I") + ) + + for (mapping in column_mappings) { + old_name <- mapping[1] + new_name <- mapping[2] + if (old_name != new_name && old_name %in% names(field_details_table) && !new_name %in% names(field_details_table)) { + field_details_table <- field_details_table %>% + dplyr::rename(!!new_name := old_name) + safe_log(paste(" ✓ Normalized:", old_name, "→", new_name)) + } + } + + # Only create summary_tables if not already loaded from RDS if (is.null(summary_tables)) { summary_tables <- list() @@ -783,17 +818,18 @@ if (!exists("field_details_table") || is.null(field_details_table)) { } # Create minimal field details table with actual data we have + NAs for missing KPI columns + # IMPORTANT: Use column names that match downstream code expectations (no spaces, match exact names) field_details_table <- tibble::tibble( - Field = field_names, - `Field Size (acres)` = as.numeric(field_sizes), - `Growth Uniformity` = NA_character_, - `Yield Forecast (t/ha)` = NA_real_, - `Gap Score` = NA_real_, - `Decline Risk` = NA_character_, - `Weed Risk` = NA_character_, - `Mean CI` = NA_real_, - `CV Value` = NA_real_, - `Moran's I` = NA_real_ + Field_id = field_names, + Acreage = as.numeric(field_sizes), + Growth_Uniformity = NA_character_, + TCH_Forecasted = NA_real_, + Gap_Score = NA_real_, + Decline_Risk = NA_character_, + Weed_Risk = NA_character_, + Mean_CI = NA_real_, + CV = NA_real_, + Morans_I = NA_real_ ) safe_log(paste("Created field_details_table from geometries for", nrow(field_details_table), "fields")) } @@ -1353,6 +1389,7 @@ tryCatch({ ) cat(paste(kpi_parts, collapse = " | "), "\n") + cat("\n") # Extra newline for paragraph separation without creating empty pages } } @@ -1444,6 +1481,26 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field } # Join field sizes and ages to KPI data, simplified column selection + # DEFENSIVE: Normalize field_details_table column names one more time before joining + # Ensure it has Field_id column (regardless of whether it was from RDS or fallback) + if (!is.null(field_details_table) && nrow(field_details_table) > 0) { + # If Field exists but Field_id doesn't, rename it + if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { + field_details_table <- field_details_table %>% + dplyr::rename(Field_id = Field) + } + + # Ensure all expected KPI columns exist; add as NA if missing + expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", + "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation", + "Decline_Severity", "Weed_Pressure_Risk") + for (col in expected_cols) { + if (!col %in% names(field_details_table)) { + field_details_table[[col]] <- NA + } + } + } + field_details_clean <- field_details_table %>% left_join(field_sizes_df, by = c("Field_id" = "field")) %>% left_join(field_ages_df, by = c("Field_id" = "field")) %>% diff --git a/r_app/91_CI_report_with_kpis_cane_supply.Rmd b/r_app/91_CI_report_with_kpis_cane_supply.Rmd index 3536556..ea0f92f 100644 --- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd +++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd @@ -514,6 +514,8 @@ tryCatch({ TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM) # Process polygons into points + # IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid + # st_centroid warnings about longitude/latitude data, then transform back to WGS84 points_processed <- field_boundaries_sf %>% st_make_valid() %>% mutate( @@ -529,8 +531,9 @@ tryCatch({ analysis_data %>% select(Field_id, Status_trigger), by = c("field" = "Field_id") ) %>% - st_transform(crs = TARGET_CRS) %>% + st_transform(crs = 32736) %>% # UTM zone 36S (southern Africa) st_centroid() %>% + st_transform(crs = TARGET_CRS) %>% bind_cols(st_coordinates(.)) # Validate coordinates - check for NaN, Inf, or missing values @@ -557,30 +560,8 @@ tryCatch({ labels_vec[length(labels_vec)] <- ">30" labels_vec[1] <- "0.1" - # Create dummy point to anchor hexbin grids for consistency - dummy_point <- data.frame( - field = NA, - sub_field = NA, - area_ac = 0, - Status_trigger = NA, - X = min(points_processed$X, na.rm = TRUE), - Y = min(points_processed$Y, na.rm = TRUE), - geometry = NA - ) - - # Convert dummy point to sf and add xy coordinates - dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready)) - dummy_point <- cbind(dummy_point, st_coordinates(dummy_point)) - - # Mark dummy point with anchor flag before binding - # Referenced: dummy_point, st_as_sf, st_coordinates, area_ac - dummy_point$anchor_dummy <- TRUE - - # Add dummy point to ensure consistent hexbin grid anchoring - points_ready <- rbind(points_ready, dummy_point) - points_not_ready <- rbind(points_not_ready, dummy_point) - # Calculate data bounds for coordinate limits (prevents basemap scale conflicts) + # Use actual data bounds without dummy points to avoid column mismatch x_limits <- c( floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding @@ -657,11 +638,6 @@ tryCatch({ ) ) - # Remove dummy point rows after grid anchoring to prevent dummy cells in plot - # Referenced: points_ready, points_not_ready, anchor_dummy flag filtering - points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE) - points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE) - }, error = function(e) { warning("Error creating hexbin map:", e$message) }) From 640e23981568418d392bd80329936fb123b592f7 Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 17 Feb 2026 14:13:51 +0100 Subject: [PATCH 21/30] Refactor field processing logic to simplify page breaks and enhance markdown formatting in CI report --- r_app/90_CI_report_with_kpis_agronomic_support.Rmd | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 0b33379..9945571 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -1262,15 +1262,8 @@ tryCatch({ } # Iterate through fields using purrr::walk - is_first_field <- TRUE purrr::walk(AllPivots_merged$field, function(field_name) { tryCatch({ - # Add page break before each field (except first) - if (!is_first_field) { - cat("\\newpage\n") - } - is_first_field <<- FALSE - message(paste("Processing field:", field_name)) # Load per-field rasters for all 4 weeks @@ -1388,8 +1381,7 @@ tryCatch({ sprintf("**Decline:** %s", field_kpi$Decline_Severity) ) - cat(paste(kpi_parts, collapse = " | "), "\n") - cat("\n") # Extra newline for paragraph separation without creating empty pages + cat(paste(kpi_parts, collapse = " | "), "\n\n") # Double newline for markdown paragraph break } } From 34159b3003ff01327f443855300713b7d346f664 Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 17 Feb 2026 15:02:30 +0100 Subject: [PATCH 22/30] Enhance yield prediction capabilities by integrating Random Forest model with Forward Feature Selection; add robust data loading and validation for harvesting data across multiple utility scripts. --- r_app/80_calculate_kpis.R | 19 +- r_app/80_utils_agronomic_support.R | 166 +++++++------- r_app/80_utils_cane_supply.R | 53 +++++ r_app/80_utils_common.R | 334 +++++++++++++++++++++++++++++ 4 files changed, 476 insertions(+), 96 deletions(-) diff --git a/r_app/80_calculate_kpis.R b/r_app/80_calculate_kpis.R index 208a373..f73a002 100644 --- a/r_app/80_calculate_kpis.R +++ b/r_app/80_calculate_kpis.R @@ -141,6 +141,10 @@ suppressPackageStartupMessages({ library(writexl) # For writing Excel outputs (KPI summary tables) library(progress) # For progress bars during field processing + # ML models (for yield prediction KPI) + library(caret) # For training Random Forest with cross-validation + library(CAST) # For Forward Feature Selection in caret models + # ML/Analysis (optional - only for harvest model inference) tryCatch({ library(torch) # For PyTorch model inference (harvest readiness prediction) @@ -359,11 +363,8 @@ main <- function() { stop("ERROR loading field boundaries: ", e$message) }) - # Load harvesting data - if (!exists("harvesting_data")) { - warning("harvesting_data not loaded. TCH KPI will use placeholder values.") - harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) - } + # Load harvesting data for yield prediction (using common helper function) + harvesting_data <- load_harvest_data(setup$data_dir) # Extract current week/year from end_date current_week <- as.numeric(format(end_date, "%V")) @@ -379,6 +380,7 @@ main <- function() { ci_rds_path = NULL, harvesting_data = harvesting_data, output_dir = reports_dir_kpi, + data_dir = setup$data_dir, project_dir = project_dir ) @@ -419,11 +421,8 @@ main <- function() { stop("ERROR loading field boundaries: ", e$message) }) - # Load harvesting data - if (!exists("harvesting_data")) { - warning("harvesting_data not loaded. TCH KPI will use placeholder values.") - harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) - } + # Load harvesting data for yield prediction (using common helper function) + harvesting_data <- load_harvest_data(setup$data_dir) # Extract current week/year from end_date current_week <- as.numeric(format(end_date, "%V")) diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 8e55893..a71ea28 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -43,29 +43,6 @@ library(CAST) # These are now sourced from common utils and shared by all client types. # ============================================================================ -#' Prepare harvest predictions and ensure proper alignment with field data -prepare_predictions <- function(harvest_model, field_data, scenario = "optimistic") { - if (is.null(harvest_model) || is.null(field_data)) { - return(NULL) - } - - tryCatch({ - scenario_factor <- switch(scenario, - "pessimistic" = 0.85, - "realistic" = 1.0, - "optimistic" = 1.15, - 1.0) - - predictions <- field_data %>% - mutate(tch_forecasted = field_data$mean_ci * scenario_factor) - - return(predictions) - }, error = function(e) { - message(paste("Error preparing predictions:", e$message)) - return(NULL) - }) -} - # ============================================================================ # AURA KPI CALCULATION FUNCTIONS (6 KPIS) # ============================================================================ @@ -266,74 +243,91 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) { #' KPI 3: Calculate TCH forecasted (tonnes of cane per hectare) #' -#' Projects final harvest tonnage based on CI growth trajectory +#' Projects final harvest tonnage based on historical yield data and CI growth trajectory. +#' Uses a Random Forest model trained on harvest data to predict yields for mature fields. +#' Delegates to calculate_yield_prediction_kpi() in 80_utils_common.R. #' -#' @param field_statistics Current field statistics -#' @param harvesting_data Historical harvest data (with yield observations) -#' @param field_boundaries_sf Field geometries +#' @param field_statistics Current field statistics (dataframe with Mean_CI or mean_ci column) +#' @param harvesting_data Historical harvest data frame (with tonnage_ha column) +#' @param field_boundaries_sf SF object with field geometries +#' @param cumulative_CI_vals_dir Directory with combined CI RDS files (optional) +#' @param data_dir Project data directory (from setup_project_directories or parameters_project.R) +#' Used to build cumulative_CI_vals_dir path if not provided directly (optional) +#' @param project_dir Deprecated: only used if data_dir not provided (optional) #' -#' @return Data frame with field-level TCH forecasts -calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, field_boundaries_sf = NULL) { +#' @return Data frame with field-level yield forecasts ready for orchestrator +#' Columns: field_idx, tch_forecasted (yields in t/ha) +calculate_tch_forecasted_kpi <- function(field_statistics, harvesting_data = NULL, + field_boundaries_sf = NULL, + cumulative_CI_vals_dir = NULL, + data_dir = NULL, + project_dir = NULL) { - # Handle both naming conventions (Field_id/Mean_CI vs field_idx/mean_ci) - if ("Field_id" %in% names(field_statistics)) { - # Add field_idx to match field_boundaries row numbers - field_statistics <- field_statistics %>% - mutate(field_idx = match(Field_id, field_boundaries_sf$field)) - mean_ci_col <- "Mean_CI" - } else { - mean_ci_col <- "mean_ci" - } + # Use common utils yield prediction function (handles all ML logic) + # This replaces the previous linear model (TCH = 50 + CI*10) with proper ML prediction - # Filter out any fields without a match - field_statistics <- field_statistics %>% - filter(!is.na(field_idx)) - - if (nrow(field_statistics) == 0) { - warning("No fields matched between statistics and boundaries") + # Validate required parameters + if (is.null(field_boundaries_sf)) { + safe_log("field_boundaries_sf is NULL in calculate_tch_forecasted_kpi", "WARNING") return(data.frame( field_idx = integer(), - mean_ci = numeric(), tch_forecasted = numeric(), - tch_lower_bound = numeric(), - tch_upper_bound = numeric(), - confidence = character(), stringsAsFactors = FALSE )) } - - result <- data.frame( - field_idx = field_statistics$field_idx, - mean_ci = field_statistics[[mean_ci_col]], - tch_forecasted = NA_real_, - tch_lower_bound = NA_real_, - tch_upper_bound = NA_real_, - confidence = NA_character_, - stringsAsFactors = FALSE - ) - # Base TCH model: TCH = 50 + (CI * 10) - # This is a simplified model; production use should include more variables - - for (i in seq_len(nrow(result))) { - if (is.na(result$mean_ci[i])) { - result$confidence[i] <- "No data" - next + # Determine cumulative CI directory + if (is.null(cumulative_CI_vals_dir)) { + # Priority 1: Use provided data_dir parameter + if (!is.null(data_dir)) { + cumulative_CI_vals_dir <- file.path(data_dir, "extracted_ci", "cumulative_vals") + } else if (exists("data_dir", envir = .GlobalEnv)) { + # Priority 2: Fallback to global data_dir from parameters_project.R + cumulative_CI_vals_dir <- file.path(get("data_dir", envir = .GlobalEnv), "extracted_ci", "cumulative_vals") + } else { + # Priority 3: Last resort - log warning and fail gracefully + safe_log("Missing project data directory configuration: provide data_dir parameter or ensure parameters_project.R has set data_dir globally", "WARNING") + safe_log("No training data available for yield prediction", "WARNING") + return(data.frame( + field_idx = integer(), + tch_forecasted = numeric(), + stringsAsFactors = FALSE + )) } - - ci_val <- result$mean_ci[i] - - # Simple linear model - tch_est <- 50 + (ci_val * 10) - - # Confidence interval based on CI range - tch_lower <- tch_est * 0.85 - tch_upper <- tch_est * 1.15 - - result$tch_forecasted[i] <- round(tch_est, 2) - result$tch_lower_bound[i] <- round(tch_lower, 2) - result$tch_upper_bound[i] <- round(tch_upper, 2) - result$confidence[i] <- "Medium" + } + + # Call the shared yield prediction function from common utils + yield_result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) + + # Extract field-level results from the list + field_results <- yield_result$field_results + + # Convert to format expected by orchestrator + # If no predictions, return empty data frame + if (is.null(field_results) || nrow(field_results) == 0) { + return(data.frame( + field_idx = integer(), + tch_forecasted = numeric(), + stringsAsFactors = FALSE + )) + } + + # Map field names to field_idx using field_boundaries_sf + result <- field_results %>% + mutate( + field_idx = match(field, field_boundaries_sf$field), + tch_forecasted = yield_forecast_t_ha + ) %>% + filter(!is.na(field_idx)) %>% + select(field_idx, tch_forecasted) + + # Ensure result has proper structure even if empty + if (nrow(result) == 0) { + return(data.frame( + field_idx = integer(), + tch_forecasted = numeric(), + stringsAsFactors = FALSE + )) } return(result) @@ -471,7 +465,7 @@ create_summary_tables <- function(all_kpis) { select(field_idx, mean_ci_pct_change, interpretation), tch_forecast = all_kpis$tch_forecasted %>% - select(field_idx, mean_ci, tch_forecasted, tch_lower_bound, tch_upper_bound, confidence), + select(field_idx, tch_forecasted), growth_decline = all_kpis$growth_decline %>% select(field_idx, four_week_trend, trend_interpretation, decline_severity), @@ -479,9 +473,9 @@ create_summary_tables <- function(all_kpis) { weed_pressure = all_kpis$weed_presence %>% select(field_idx, fragmentation_index, weed_pressure_risk), - gap_filling = if (!is.null(all_kpis$gap_filling)) { + gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { all_kpis$gap_filling %>% - select(field_idx, gap_score, gap_level, mean_ci) + select(field_idx, gap_score, gap_level) } else { NULL } @@ -527,9 +521,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee ) %>% left_join( all_kpis$tch_forecasted %>% - select(field_idx, Mean_CI = mean_ci, TCH_Forecasted = tch_forecasted, - TCH_Lower = tch_lower_bound, TCH_Upper = tch_upper_bound, - TCH_Confidence = confidence), + select(field_idx, TCH_Forecasted = tch_forecasted), by = "field_idx" ) %>% left_join( @@ -647,6 +639,7 @@ calculate_all_field_analysis_agronomic_support <- function( ci_rds_path = NULL, harvesting_data = NULL, output_dir = NULL, + data_dir = NULL, project_dir = NULL ) { @@ -794,7 +787,8 @@ calculate_all_field_analysis_agronomic_support <- function( } message("Calculating KPI 3: TCH Forecasted...") - tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf) + tch_kpi <- calculate_tch_forecasted_kpi(current_stats, harvesting_data, field_boundaries_sf, + data_dir = data_dir, project_dir = project_dir) message("Calculating KPI 4: Growth Decline...") growth_decline_kpi <- calculate_growth_decline_kpi( diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index ad2cdf5..fa14f64 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -166,6 +166,59 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me NA_character_ } +#' Calculate yield prediction for CANE_SUPPLY workflows (Wrapper) +#' +#' This function wraps the shared yield prediction model from 80_utils_common.R +#' to provide CANE_SUPPLY clients (e.g., ANGATA) with ML-based yield forecasting. +#' +#' Uses Random Forest with Forward Feature Selection trained on: +#' - Cumulative Canopy Index (CI) from growth model +#' - Days of Year (DOY) / crop age +#' - CI-per-day (growth velocity) +#' +#' Predicts yields for mature fields (DOY >= 240, ~8 months) into quartiles: +#' - Top 25%: High-yield fields +#' - Average: Mid-range yield fields +#' - Lowest 25%: Lower-yield fields +#' +#' @param field_boundaries_sf SF object with field geometries +#' @param harvesting_data Data frame with harvest history (must have tonnage_ha column) +#' @param cumulative_CI_vals_dir Directory with combined CI RDS files +#' +#' @return List with: +#' - summary: Data frame with field_groups, count, and yield quartile predictions +#' - field_results: Data frame with field-level forecasts (yield_forecast_t_ha in t/ha) +#' +#' @details +#' **Data Requirements:** +#' - harvesting_data must include tonnage_ha column (yield in t/ha) for training +#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds" +#' - If either is missing, returns graceful fallback with NA values (not fake numbers) +#' +#' **Integration:** +#' This can be called from calculate_all_field_kpis() in cane_supply workflow to add +#' a new "Yield_Forecast" column to the 22-column KPI output. +#' +#' **Example:** +#' ```r +#' yield_result <- calculate_yield_prediction_kpi_cane_supply( +#' field_boundaries_sf, +#' harvesting_data, +#' file.path(data_dir, "combined_CI") +#' ) +#' # yield_result$summary has quartiles +#' # yield_result$field_results has per-field forecasts +#' ``` +calculate_yield_prediction_kpi_cane_supply <- function(field_boundaries_sf, + harvesting_data, + cumulative_CI_vals_dir) { + + # Call the shared yield prediction function from 80_utils_common.R + result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir) + + return(result) +} + #' Build complete per-field KPI dataframe with all 22 columns #' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 78962e9..29739cf 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -8,8 +8,13 @@ # - Field statistics extraction # - Week/year calculations for consistent date handling # - Excel/CSV/RDS export utilities +# - Yield prediction using ML models (Random Forest with Feature Selection) # # Used by: 80_calculate_kpis.R, all client-specific utils files +# +# NOTE: Libraries required by yield prediction (caret, CAST, here) are loaded +# in the main script 80_calculate_kpis.R, not here. This keeps dependencies +# centralized in the orchestrator script. # ============================================================================ # ============================================================================ @@ -590,6 +595,81 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) }) } +# ============================================================================ +# DATA LOADING HELPERS +# ============================================================================ + +#' Load and validate harvest data from harvest.xlsx +#' +#' Encapsulates harvest data loading with validation, type coercion, and error handling. +#' Returns a data frame with required columns (field, year, tonnage_ha) or an empty +#' data frame with proper structure if loading fails. +#' +#' @param data_dir Path to data directory containing harvest.xlsx +#' +#' @return data.frame with columns: field (character), year (numeric), tonnage_ha (numeric) +#' - On success: data frame with N rows of harvest records +#' - On failure: empty data frame with correct structure (0 rows, 3 columns) +#' +#' @details +#' **File Location**: Expected at `file.path(data_dir, "harvest.xlsx")` +#' +#' **Validation**: +#' - Checks file existence before reading +#' - Validates required columns: field, year, tonnage_ha +#' - Coerces year and tonnage_ha to numeric +#' - Logs status messages for debugging +#' +#' **Error Handling**: +#' - Missing file: Returns empty DF (logs NOTE) +#' - Missing columns: Returns empty DF (logs WARNING) +#' - Read errors: Returns empty DF (logs WARNING) +#' - Always returns valid data frame structure (won't return NULL) +#' +#' **Usage**: +#' ```r +#' harvesting_data <- load_harvest_data(setup$data_dir) +#' # harvesting_data is guaranteed to be a data.frame with 3 columns +#' # even if harvest.xlsx is unavailable or invalid +#' ``` +load_harvest_data <- function(data_dir) { + harvesting_data <- NULL + harvest_file <- file.path(data_dir, "harvest.xlsx") + + if (file.exists(harvest_file)) { + tryCatch({ + harvesting_data <- readxl::read_excel(harvest_file) + + # Ensure required columns are present + required_cols <- c("field", "year", "tonnage_ha") + if (all(required_cols %in% names(harvesting_data))) { + # Convert to data frame and ensure column types + harvesting_data <- as.data.frame(harvesting_data) + harvesting_data$year <- as.numeric(harvesting_data$year) + harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha) + + message(paste(" ✓ Loaded harvest data:", nrow(harvesting_data), "records from harvest.xlsx")) + return(harvesting_data) + } else { + message(paste(" WARNING: harvest.xlsx missing required columns. Expected: field, year, tonnage_ha")) + harvesting_data <- NULL + } + }, error = function(e) { + message(paste(" WARNING: Could not read harvest.xlsx:", e$message)) + }) + } else { + message(paste(" NOTE: harvest.xlsx not found at", harvest_file)) + } + + # Fallback: create empty data frame if loading failed + if (is.null(harvesting_data)) { + message(" WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)") + harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) + } + + return(harvesting_data) +} + # ============================================================================ # FIELD STATISTICS EXTRACTION # ============================================================================ @@ -1386,3 +1466,257 @@ prepare_predictions <- function(predictions, newdata) { dplyr::left_join(., newdata, by = c("field", "sub_field", "season")) ) } + +# ============================================================================ +# YIELD PREDICTION KPI (SHARED ML-BASED MODEL FOR ALL CLIENT TYPES) +# ============================================================================ + +#' Helper function for graceful fallback when training data unavailable +#' +#' @param field_boundaries Field boundaries (sf or SpatVector) +#' @return List with summary and field_results (both with NA values) +create_fallback_result <- function(field_boundaries) { + # Convert to SpatVector if needed (for terra::project) + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries <- terra::vect(field_boundaries) + } + field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection + field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares + total_area <- sum(field_areas) + + summary_result <- data.frame( + field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), + count = c(0, 0, 0, nrow(field_boundaries)), + value = c(NA_real_, NA_real_, NA_real_, round(total_area, 1)), + stringsAsFactors = FALSE + ) + + field_results <- data.frame( + field = character(0), + sub_field = character(0), + Age_days = numeric(0), + yield_forecast_t_ha = numeric(0), + season = numeric(0), + stringsAsFactors = FALSE + ) + + return(list(summary = summary_result, field_results = field_results)) +} + +#' Calculate yield prediction KPI using Random Forest with Feature Selection +#' +#' Trains a Random Forest model on historical harvest data with cumulative CI, +#' days of year (DOY), and CI-per-day as predictors. Uses CAST::ffs() for +#' Forward Feature Selection. Predicts yields for mature fields (DOY >= 240). +#' +#' @param field_boundaries Field boundaries (sf or SpatVector) +#' @param harvesting_data Data frame with harvest data including tonnage_ha column +#' @param cumulative_CI_vals_dir Directory containing "All_pivots_Cumulative_CI_quadrant_year_v2.rds" +#' +#' @return List with: +#' - summary: Data frame with field_groups, count, value (quartiles and total area) +#' - field_results: Data frame with field-level yield forecasts (yield_forecast_t_ha) +#' +#' @details +#' **Training Data Requirements:** +#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds" +#' - harvesting_data must have tonnage_ha column with numeric yield values +#' - Training stops gracefully if either is missing (returns NA values, not fake numbers) +#' +#' **Model Specifications:** +#' - Algorithm: Random Forest (caret + CAST) +#' - Feature Selection: Forward Feature Selection (CAST::ffs) +#' - Cross-validation: 5-fold CV +#' - Predictors: cumulative_CI, DOY, CI_per_day +#' - Mature field threshold: DOY >= 240 (8 months) +#' - Output: Field-level yield forecasts grouped by quartile +#' +#' **Error Handling:** +#' - Missing tonnage_ha: Returns graceful fallback with NA (not zero) values +#' - No training data: Logs WARNING, returns empty field_results +#' - RDS file missing: Returns graceful fallback +#' - Prediction errors: Wrapped in tryCatch, returns fallback on failure +calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) { + safe_log("Calculating yield prediction KPI using Random Forest with Feature Selection") + + tryCatch({ + # Check if tonnage_ha column is present and has valid data + if (is.null(harvesting_data) || + !("tonnage_ha" %in% names(harvesting_data)) || + all(is.na(harvesting_data$tonnage_ha))) { + safe_log("No harvest data available: lacking tonnage_ha column or all values are NA", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Check if CI quadrant RDS file exists + ci_quadrant_path <- file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds") + if (!file.exists(ci_quadrant_path)) { + safe_log(paste("CI quadrant file not found at:", ci_quadrant_path), "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Load CI quadrant data and fill missing field/sub_field values + CI_quadrant <- readRDS(ci_quadrant_path) %>% + dplyr::group_by(model) %>% + tidyr::fill(field, sub_field, .direction = "downup") %>% + dplyr::ungroup() + + # Rename year column to season for consistency + harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year) + + # Join CI and yield data + CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, + by = c("field", "sub_field", "season")) %>% + dplyr::group_by(sub_field, season) %>% + dplyr::slice(which.max(DOY)) %>% + dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% + dplyr::mutate(CI_per_day = cumulative_CI / DOY) + + # Define predictors and response variables + predictors <- c("cumulative_CI", "DOY", "CI_per_day") + response <- "tonnage_ha" + + # Prepare training dataset (fields with harvest data) + CI_and_yield_test <- CI_and_yield %>% + as.data.frame() %>% + dplyr::filter(!is.na(tonnage_ha)) + + # Check if we have minimum training data + if (nrow(CI_and_yield_test) == 0) { + safe_log("No training data available: no fields with tonnage_ha observations", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Pre-clean training data: remove rows with any NAs in predictors or response + # This is required because CAST::ffs doesn't support na.rm parameter + CI_and_yield_test <- CI_and_yield_test %>% + dplyr::filter(!dplyr::if_any(dplyr::all_of(c(predictors, response)), is.na)) + + if (nrow(CI_and_yield_test) == 0) { + safe_log("No complete training data after removing NAs in predictors/response", "WARNING") + return(create_fallback_result(field_boundaries)) + } + + # Prepare prediction dataset (fields without harvest data, mature fields only) + prediction_yields <- CI_and_yield %>% + as.data.frame() %>% + dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Mature fields only + + # Configure model training parameters + ctrl <- caret::trainControl( + method = "cv", + savePredictions = TRUE, + allowParallel = TRUE, + number = 5, + verboseIter = FALSE + ) + + # Train the model with forward feature selection + set.seed(202) # For reproducibility + safe_log("Training Random Forest model with Forward Feature Selection...") + model_ffs_rf <- CAST::ffs( + CI_and_yield_test[, predictors], + CI_and_yield_test[, response], + method = "rf", + trControl = ctrl, + importance = TRUE, + withinSE = TRUE, + tuneLength = 5 + ) + + # Predict yields on validation data (same as training data for RMSE calculation) + pred_ffs_rf <- prepare_predictions( + stats::predict(model_ffs_rf, newdata = CI_and_yield_test), + CI_and_yield_test + ) + + # Extract cross-validated RMSE from the model object (more honest than in-sample RMSE) + # The CAST::ffs model stores CV results in $results data frame + # We extract the RMSE from the best model (lowest RMSE across folds) + if (!is.null(model_ffs_rf$results) && "RMSE" %in% names(model_ffs_rf$results)) { + # Get minimum RMSE from cross-validation results (best model from feature selection) + rmse_value <- min(model_ffs_rf$results$RMSE, na.rm = TRUE) + safe_log(paste("Yield prediction RMSE (cross-validated):", round(rmse_value, 2), "t/ha")) + } else { + # Fallback: compute in-sample RMSE if CV results unavailable, but label it clearly + rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_test$tonnage_ha)^2, na.rm = TRUE)) + safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha")) + } + + # Predict yields for current season (mature fields >= 240 days) + if (nrow(prediction_yields) > 0) { + pred_rf_current_season <- prepare_predictions( + stats::predict(model_ffs_rf, newdata = prediction_yields), + prediction_yields + ) %>% + dplyr::filter(Age_days >= 240) %>% + dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) + } else { + pred_rf_current_season <- data.frame() + } + + # Calculate summary statistics for KPI + if (nrow(pred_rf_current_season) > 0) { + safe_log(paste("Number of fields with yield predictions:", nrow(pred_rf_current_season))) + safe_log(paste("Predicted yield range:", + round(min(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1), + "-", + round(max(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1), + "t/ha")) + + # Calculate quartiles for grouping + yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha, + probs = c(0.25, 0.5, 0.75), na.rm = TRUE) + + # Count fields in each quartile + top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE) + average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] & + pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE) + lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE) + + # Calculate total area + if (!inherits(field_boundaries, "SpatVector")) { + field_boundaries_vect <- terra::vect(field_boundaries) + } else { + field_boundaries_vect <- field_boundaries + } + + # Handle both sf and SpatVector inputs for area calculation + if (inherits(field_boundaries, "sf")) { + field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933") + field_areas <- sf::st_area(field_boundaries_projected) / 10000 # m² to hectares + } else { + field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933") + field_areas <- terra::expanse(field_boundaries_projected) / 10000 + } + total_area <- sum(as.numeric(field_areas)) + + safe_log(paste("Total area:", round(total_area, 1), "hectares")) + + # Build summary result + result <- data.frame( + field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"), + count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)), + value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1), + round(yield_quartiles[1], 1), round(total_area, 1)), + stringsAsFactors = FALSE + ) + + # Prepare field-level results + field_level_results <- pred_rf_current_season %>% + dplyr::select(field, Age_days, predicted_Tcha, season) %>% + dplyr::rename(yield_forecast_t_ha = predicted_Tcha) + + safe_log("✓ Yield prediction complete") + return(list(summary = result, field_results = field_level_results)) + } else { + safe_log("No fields meet maturity threshold (DOY >= 240) for prediction", "WARNING") + return(list(summary = create_fallback_result(field_boundaries)$summary, + field_results = data.frame())) + } + + }, error = function(e) { + safe_log(paste("Error in yield prediction:", e$message), "ERROR") + return(create_fallback_result(field_boundaries)) + }) +} From 253ff51ca261d17807e159b2fd2dc4bc61ca976c Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 17 Feb 2026 21:57:34 +0100 Subject: [PATCH 23/30] Add field patchiness KPI calculation and update summaries; replace weed pressure metrics with patchiness metrics in reports --- r_app/80_utils_agronomic_support.R | 190 ++++++++++++++++-- ..._CI_report_with_kpis_agronomic_support.Rmd | 107 ++++++---- 2 files changed, 238 insertions(+), 59 deletions(-) diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index a71ea28..df5232a 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -64,6 +64,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ cv_value = numeric(), morans_i = numeric(), uniformity_score = numeric(), + uniformity_category = character(), interpretation = character(), stringsAsFactors = FALSE ) @@ -80,6 +81,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ cv_value = NA_real_, morans_i = NA_real_, uniformity_score = NA_real_, + uniformity_category = "No data", interpretation = "No data", stringsAsFactors = FALSE )) @@ -142,16 +144,22 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ # Interpretation if (is.na(cv_val)) { interpretation <- "No data" + uniformity_category <- "No data" } else if (cv_val < 0.08) { interpretation <- "Excellent uniformity" + uniformity_category <- "Excellent" } else if (cv_val < 0.15) { interpretation <- "Good uniformity" + uniformity_category <- "Good" } else if (cv_val < 0.25) { interpretation <- "Acceptable uniformity" + uniformity_category <- "Acceptable" } else if (cv_val < 0.4) { interpretation <- "Poor uniformity" + uniformity_category <- "Poor" } else { interpretation <- "Very poor uniformity" + uniformity_category <- "Very poor" } result <- rbind(result, data.frame( @@ -159,6 +167,7 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ cv_value = cv_val, morans_i = morans_i, uniformity_score = round(uniformity_score, 3), + uniformity_category = uniformity_category, interpretation = interpretation, stringsAsFactors = FALSE )) @@ -446,6 +455,72 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) { return(result) } +#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk) +#' +#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi() +#' @param mean_ci_values Optional vector of mean CI values per field +#' +#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation) +calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) { + result <- weed_kpi_result %>% + mutate( + # Calculate Gini coefficient from CI pixels (proper calculation) + gini_coefficient = NA_real_, + mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_, + # Map weed_pressure_risk to patchiness_risk + patchiness_risk = weed_pressure_risk, + # Create interpretation based on gini and risk + patchiness_interpretation = case_when( + is.na(gini_coefficient) ~ "No data", + gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth", + gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness", + gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness", + TRUE ~ "Mixed heterogeneity" + ) + ) %>% + select(field_idx, gini_coefficient, mean_ci, + patchiness_interpretation, patchiness_risk) + + # Calculate actual Gini coefficients if CI pixels provided + if (!is.null(ci_pixels_by_field)) { + for (i in seq_len(nrow(result))) { + ci_pixels <- ci_pixels_by_field[[i]] + + if (!is.null(ci_pixels) && length(ci_pixels) > 0) { + ci_pixels <- ci_pixels[!is.na(ci_pixels)] + + if (length(ci_pixels) > 1) { + # Calculate Gini coefficient + # Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n + # where x_i are sorted values + ci_sorted <- sort(ci_pixels) + n <- length(ci_sorted) + + numerator <- 2 * sum(seq_len(n) * ci_sorted) + denominator <- n * sum(ci_sorted) + + gini <- (numerator / denominator) - (n + 1) / n + # Clamp to 0-1 range (should be within this by formula but guard against numerical errors) + gini <- max(0, min(1, gini)) + + result$gini_coefficient[i] <- gini + + # Update interpretation based on calculated Gini + result$patchiness_interpretation[i] <- dplyr::case_when( + gini < 0.2 ~ "Uniform growth", + gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness", + gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness", + TRUE ~ "Mixed heterogeneity" + ) + } + } + } + } + + return(result) +} + + # ============================================================================ # KPI ORCHESTRATOR AND REPORTING @@ -459,7 +534,14 @@ calculate_weed_presence_kpi <- function(ci_pixels_by_field) { create_summary_tables <- function(all_kpis) { kpi_summary <- list( uniformity = all_kpis$uniformity %>% - select(field_idx, cv_value, morans_i, uniformity_score, interpretation), + select(field_idx, cv_value, uniformity_category, interpretation), + + spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { + all_kpis$spatial_clustering %>% + select(field_idx, morans_i) + } else { + NULL + }, area_change = all_kpis$area_change %>% select(field_idx, mean_ci_pct_change, interpretation), @@ -470,8 +552,8 @@ create_summary_tables <- function(all_kpis) { growth_decline = all_kpis$growth_decline %>% select(field_idx, four_week_trend, trend_interpretation, decline_severity), - weed_pressure = all_kpis$weed_presence %>% - select(field_idx, fragmentation_index, weed_pressure_risk), + patchiness = all_kpis$patchiness %>% + select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk), gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { all_kpis$gap_filling %>% @@ -497,47 +579,83 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee result <- field_boundaries_sf %>% sf::st_drop_geometry() %>% mutate( - field_idx = row_number(), # ADD THIS: match the integer index used in KPI functions + field_idx = row_number(), Field_id = field, Field_name = field, Week = current_week, Year = current_year ) %>% - select(field_idx, Field_id, Field_name, Week, Year) # Include field_idx first + select(field_idx, Field_id, Field_name, Week, Year) - # Join all KPI results (now field_idx matches on both sides) + # ============================================ + # GROUP 1: FIELD UNIFORMITY (KPI 1) + # ============================================ result <- result %>% left_join( all_kpis$uniformity %>% - select(field_idx, CV = cv_value, Uniformity_Score = uniformity_score, - Morans_I = morans_i, Uniformity_Interpretation = interpretation), + select(field_idx, CV = cv_value, + Uniformity_Category = uniformity_category), by = "field_idx" - ) %>% + ) + + # ============================================ + # GROUP 2: GROWTH & TREND ANALYSIS (KPI 2 + KPI 4) + # ============================================ + # KPI 2: Area Change + result <- result %>% left_join( all_kpis$area_change %>% select(field_idx, Weekly_CI_Change = mean_ci_pct_change, Area_Change_Interpretation = interpretation), by = "field_idx" - ) %>% - left_join( - all_kpis$tch_forecasted %>% - select(field_idx, TCH_Forecasted = tch_forecasted), - by = "field_idx" - ) %>% + ) + + # KPI 4: Growth Decline + result <- result %>% left_join( all_kpis$growth_decline %>% select(field_idx, Four_Week_Trend = four_week_trend, Trend_Interpretation = trend_interpretation, Decline_Severity = decline_severity), by = "field_idx" - ) %>% + ) + + # ============================================ + # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering) + # ============================================ + # KPI 5: Field Patchiness + result <- result %>% left_join( - all_kpis$weed_presence %>% - select(field_idx, Fragmentation_Index = fragmentation_index, - Weed_Pressure_Risk = weed_pressure_risk), + all_kpis$patchiness %>% + select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci, + Patchiness_Interpretation = patchiness_interpretation, + Patchiness_Risk = patchiness_risk), by = "field_idx" ) + # Moran's I (spatial clustering - used in patchiness calculation) + if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { + result <- result %>% + left_join( + all_kpis$spatial_clustering %>% + select(field_idx, Morans_I = morans_i), + by = "field_idx" + ) + } + + # ============================================ + # GROUP 4: YIELD FORECAST (KPI 3) + # ============================================ + result <- result %>% + left_join( + all_kpis$tch_forecasted %>% + select(field_idx, TCH_Forecasted = tch_forecasted), + by = "field_idx" + ) + + # ============================================ + # GROUP 5: DATA QUALITY / GAP FILLING (KPI 6) + # ============================================ # Add gap filling if available if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { result <- result %>% @@ -548,7 +666,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee ) } - # Remove field_idx from final output (it was only needed for joining) + # Remove field_idx from final output result <- result %>% select(-field_idx) @@ -795,9 +913,16 @@ calculate_all_field_analysis_agronomic_support <- function( ci_pixels_by_field ) - message("Calculating KPI 5: Weed Presence...") + message("Calculating KPI 5: Field Patchiness...") weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) + # Convert weed metrics to patchiness metrics (Gini-like + risk combination) + mean_ci_values <- current_stats$Mean_CI + if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) { + mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf)) + } + patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values) + message("Calculating KPI 6: Gap Filling...") # Build list of per-field files for this week per_field_files <- c() @@ -842,10 +967,31 @@ calculate_all_field_analysis_agronomic_support <- function( area_change = area_change_kpi, tch_forecasted = tch_kpi, growth_decline = growth_decline_kpi, - weed_presence = weed_kpi, + patchiness = patchiness_kpi, + spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i), gap_filling = gap_filling_kpi ) + # Deduplicate KPI dataframes to ensure one row per field_idx + # (sometimes joins or calculations can create duplicate rows) + message("Deduplicating KPI results (keeping first occurrence per field)...") + all_kpis$uniformity <- all_kpis$uniformity %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$area_change <- all_kpis$area_change %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$tch_forecasted <- all_kpis$tch_forecasted %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$growth_decline <- all_kpis$growth_decline %>% + distinct(field_idx, .keep_all = TRUE) + all_kpis$patchiness <- all_kpis$patchiness %>% + distinct(field_idx, .keep_all = TRUE) + if (!is.null(all_kpis$spatial_clustering)) { + all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>% + distinct(field_idx, .keep_all = TRUE) + } + all_kpis$gap_filling <- all_kpis$gap_filling %>% + distinct(field_idx, .keep_all = TRUE) + # Built single-sheet field detail table with all KPIs message("\nBuilding comprehensive field detail table...") field_detail_df <- create_field_detail_table( diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 9945571..4d873b2 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -218,10 +218,10 @@ if (dir.exists(kpi_data_dir)) { if (is.null(summary_tables)) { summary_tables <- list() - # 1. Uniformity summary - GROUP BY Uniformity_Interpretation and COUNT - if ("Uniformity_Interpretation" %in% names(field_details_table)) { + # 1. Uniformity summary - GROUP BY Uniformity_Category and COUNT + if ("Uniformity_Category" %in% names(field_details_table)) { summary_tables$uniformity <- field_details_table %>% - group_by(interpretation = Uniformity_Interpretation) %>% + group_by(interpretation = Uniformity_Category) %>% summarise(field_count = n(), .groups = 'drop') safe_log(" ✓ Created uniformity summary") } @@ -242,28 +242,62 @@ if (dir.exists(kpi_data_dir)) { safe_log(" ✓ Created growth_decline summary") } - # 4. Weed pressure summary - GROUP BY Weed_Pressure_Risk and COUNT - if ("Weed_Pressure_Risk" %in% names(field_details_table)) { - summary_tables$weed_pressure <- field_details_table %>% - group_by(weed_pressure_risk = Weed_Pressure_Risk) %>% - summarise(field_count = n(), .groups = 'drop') - safe_log(" ✓ Created weed_pressure summary") - } - - # 5. TCH forecast summary - bin into categories and COUNT - if ("TCH_Forecasted" %in% names(field_details_table)) { - summary_tables$tch_forecast <- field_details_table %>% - filter(!is.na(TCH_Forecasted)) %>% + # 4. Patchiness summary - GROUP BY Patchiness_Risk + Gini ranges + if ("Patchiness_Risk" %in% names(field_details_table) && "Gini_Coefficient" %in% names(field_details_table)) { + summary_tables$patchiness <- field_details_table %>% mutate( - tch_category = case_when( - TCH_Forecasted >= quantile(TCH_Forecasted, 0.75, na.rm = TRUE) ~ "Top 25%", - TCH_Forecasted >= quantile(TCH_Forecasted, 0.25, na.rm = TRUE) ~ "Average", - TRUE ~ "Lowest 25%" + gini_category = case_when( + Gini_Coefficient < 0.2 ~ "Uniform (Gini<0.2)", + Gini_Coefficient < 0.4 ~ "Moderate (Gini 0.2-0.4)", + TRUE ~ "High (Gini≥0.4)" ) ) %>% - group_by(tch_category) %>% + group_by(gini_category, patchiness_risk = Patchiness_Risk) %>% summarise(field_count = n(), .groups = 'drop') - safe_log(" ✓ Created tch_forecast summary") + safe_log(" ✓ Created patchiness summary") + } + + # 5. TCH forecast summary - show actual value ranges (quartiles) instead of counts + if ("TCH_Forecasted" %in% names(field_details_table)) { + tch_values <- field_details_table %>% + filter(!is.na(TCH_Forecasted)) %>% + pull(TCH_Forecasted) + + if (length(tch_values) > 0) { + # Defensive check: if all TCH values are identical, handle as special case + if (length(unique(tch_values)) == 1) { + # All values are identical + identical_value <- tch_values[1] + summary_tables$tch_forecast <- tibble::tibble( + tch_category = "All equal", + range = paste0(round(identical_value, 1), " t/ha"), + field_count = length(tch_values) + ) + safe_log(" ✓ Created tch_forecast summary (all TCH values identical)") + } else { + # Multiple distinct values - use three-quartile approach + q25 <- quantile(tch_values, 0.25, na.rm = TRUE) + q50 <- quantile(tch_values, 0.50, na.rm = TRUE) + q75 <- quantile(tch_values, 0.75, na.rm = TRUE) + min_val <- min(tch_values, na.rm = TRUE) + max_val <- max(tch_values, na.rm = TRUE) + + summary_tables$tch_forecast <- tibble::tibble( + tch_category = c("Top 25%", "Middle 50%", "Bottom 25%"), + range = c( + paste0(round(q75, 1), "-", round(max_val, 1), " t/ha"), + paste0(round(q25, 1), "-", round(q75, 1), " t/ha"), + paste0(round(min_val, 1), "-", round(q25, 1), " t/ha") + ), + field_count = c( + nrow(field_details_table %>% filter(TCH_Forecasted >= q75, !is.na(TCH_Forecasted))), + nrow(field_details_table %>% filter(TCH_Forecasted >= q25 & TCH_Forecasted < q75, !is.na(TCH_Forecasted))), + nrow(field_details_table %>% filter(TCH_Forecasted < q25, !is.na(TCH_Forecasted))) + ) + ) + safe_log(" ✓ Created tch_forecast summary with value ranges") + } + } } # 6. Gap filling summary - GROUP BY Gap_Level and COUNT @@ -544,22 +578,29 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table kpi_display_order <- list( uniformity = list(display = "Field Uniformity", level_col = "interpretation", count_col = "field_count"), area_change = list(display = "Area Change", level_col = "interpretation", count_col = "field_count"), - tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", count_col = "field_count"), - growth_decline = list(display = "Growth Decline", level_col = "trend_interpretation", count_col = "field_count"), - weed_pressure = list(display = "Weed Presence", level_col = "weed_pressure_risk", count_col = "field_count"), + growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"), + patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"), + tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"), gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count") ) - standardize_kpi <- function(df, level_col, count_col) { + standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) { if (is.null(level_col) || !(level_col %in% names(df)) || is.null(count_col) || !(count_col %in% names(df))) { return(NULL) } total <- sum(df[[count_col]], na.rm = TRUE) total <- ifelse(total == 0, NA_real_, total) + # If detail_col is specified, use it as the Level instead + if (!is.null(detail_col) && detail_col %in% names(df)) { + display_level <- df[[detail_col]] + } else { + display_level <- df[[level_col]] + } + df %>% dplyr::transmute( - Level = as.character(.data[[level_col]]), + Level = as.character(display_level), Count = as.integer(round(as.numeric(.data[[count_col]]))), Percent = if (is.na(total)) { NA_real_ @@ -578,17 +619,9 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table kpi_df <- summary_tables[[kpi_key]] if (is.null(kpi_df) || !is.data.frame(kpi_df) || nrow(kpi_df) == 0) next - kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col) - if (is.null(kpi_rows) && kpi_key %in% c("tch_forecast", "gap_filling")) { - numeric_cols <- names(kpi_df)[vapply(kpi_df, is.numeric, logical(1))] - if (length(numeric_cols) > 0) { - kpi_rows <- tibble::tibble( - Level = numeric_cols, - Count = round(as.numeric(kpi_df[1, numeric_cols]), 2), - Percent = NA_real_ - ) - } - } + # Pass detail_col if it exists in config + detail_col <- if (!is.null(config$detail_col)) config$detail_col else NULL + kpi_rows <- standardize_kpi(kpi_df, config$level_col, config$count_col, detail_col) if (!is.null(kpi_rows)) { kpi_rows$KPI <- config$display From f0a3afad5236253304e62e3d884eb3d10b5f49a0 Mon Sep 17 00:00:00 2001 From: Timon Date: Tue, 17 Feb 2026 22:15:33 +0100 Subject: [PATCH 24/30] Refactor risk assessment terminology in reports; replace 'Weed Risk' with 'Patchiness Risk' and update related calculations and alerts for consistency --- r_app/80_utils_cane_supply.R | 14 ++--- ..._CI_report_with_kpis_agronomic_support.Rmd | 54 +++++++++---------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index fa14f64..100d231 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -138,28 +138,28 @@ categorize_cv_trend_long_term <- function(cv_slope) { #' Determine status alert based on harvest probability and crop health #' Priority order: -#' 1. Ready for harvest-check (imminent + mature ≥12 months) -#' 2. Strong decline in crop health (drop ≥2 points but still >1.5) -#' 3. Harvested/bare (Mean CI < 1.5) +#' 1. harvest_ready (imminent + mature ≥12 months) +#' 2. decline_stress (drop ≥2 points but still >1.5) +#' 3. harvested_bare (Mean CI < 1.5) #' @param imminent_prob Numeric harvest probability #' @param age_week Numeric age in weeks #' @param weekly_ci_change Numeric weekly CI change #' @param mean_ci Numeric mean CI value -#' @return Character status alert or NA +#' @return Character status alert code or NA calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, mean_ci) { # Priority 1: Ready for harvest-check if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) { - return("Ready for harvest-check") + return("harvest_ready") } # Priority 2: Strong decline if (!is.na(weekly_ci_change) && weekly_ci_change <= -2.0 && !is.na(mean_ci) && mean_ci > 1.5) { - return("Strong decline in crop health") + return("decline_stress") } # Priority 3: Harvested/bare if (!is.na(mean_ci) && mean_ci < 1.5) { - return("Harvested/bare") + return("harvested_bare") } # Fallback: no alert diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 4d873b2..216d722 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -199,7 +199,7 @@ if (dir.exists(kpi_data_dir)) { c("Gap_Score", "Gap_Score"), # Keep as-is c("Growth Uniformity", "Growth_Uniformity"), c("Decline Risk", "Decline_Risk"), - c("Weed Risk", "Weed_Risk"), + c("Patchiness Risk", "Patchiness_Risk"), c("Moran's I", "Morans_I") ) @@ -539,15 +539,15 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table } } - # 4. Weed pressure insights - group by weed_pressure_risk - if (!is.null(summary_tables$weed_pressure) && nrow(summary_tables$weed_pressure) > 0) { - cat("\n**Weed/Pest Pressure Risk:**\n") - weed_counts <- summary_tables$weed_pressure %>% - dplyr::select(weed_pressure_risk, count = field_count) + # 4. Patchiness insights - group by patchiness_risk + if (!is.null(summary_tables$patchiness) && nrow(summary_tables$patchiness) > 0) { + cat("\n**Field Patchiness Risk:**\n") + patchiness_counts <- summary_tables$patchiness %>% + dplyr::select(patchiness_risk, count = field_count) - for (i in seq_len(nrow(weed_counts))) { - risk <- weed_counts$weed_pressure_risk[i] - count <- weed_counts$count[i] + for (i in seq_len(nrow(patchiness_counts))) { + risk <- patchiness_counts$patchiness_risk[i] + count <- patchiness_counts$count[i] if (!is.na(risk) && !is.na(count) && count > 0) { cat("- ", count, " field(s) at ", risk, " risk\n", sep="") } @@ -684,7 +684,7 @@ generate_field_alerts <- function(field_details_table) { # Check for required columns required_cols <- c("Field", "Field Size (acres)", "Growth Uniformity", "Yield Forecast (t/ha)", - "Gap Score", "Decline Risk", "Weed Risk", "Mean CI", "CV Value", "Moran's I") + "Gap Score", "Decline Risk", "Patchiness Risk", "Mean CI", "CV Value", "Moran's I") missing_cols <- setdiff(required_cols, colnames(field_details_table)) if (length(missing_cols) > 0) { @@ -714,10 +714,10 @@ generate_field_alerts <- function(field_details_table) { any(`Decline Risk` == "Low") ~ "Low", TRUE ~ "Unknown" ), - highest_weed_risk = case_when( - any(`Weed Risk` == "High") ~ "High", - any(`Weed Risk` == "Moderate") ~ "Moderate", - any(`Weed Risk` == "Low") ~ "Low", + highest_patchiness_risk = case_when( + any(`Patchiness Risk` == "High") ~ "High", + any(`Patchiness Risk` == "Medium") ~ "Medium", + any(`Patchiness Risk` == "Minimal") ~ "Minimal", TRUE ~ "Unknown" ), avg_mean_ci = mean(`Mean CI`, na.rm = TRUE), @@ -743,12 +743,12 @@ generate_field_alerts <- function(field_details_table) { } # Priority 3: No alert (no stress) - # Keep other alerts for decline risk, weed risk, gap score + # Keep other alerts for decline risk, patchiness risk, gap score if (field_summary$highest_decline_risk %in% c("High", "Very-high")) { field_alerts <- c(field_alerts, "Growth decline observed") } - if (field_summary$highest_weed_risk == "High") { - field_alerts <- c(field_alerts, "Increased weed presence") + if (field_summary$highest_patchiness_risk == "High") { + field_alerts <- c(field_alerts, "High patchiness detected - recommend scouting") } if (field_summary$max_gap_score > 20) { field_alerts <- c(field_alerts, "Gaps present - recommend review") @@ -859,7 +859,7 @@ if (!exists("field_details_table") || is.null(field_details_table)) { TCH_Forecasted = NA_real_, Gap_Score = NA_real_, Decline_Risk = NA_character_, - Weed_Risk = NA_character_, + Patchiness_Risk = NA_character_, Mean_CI = NA_real_, CV = NA_real_, Morans_I = NA_real_ @@ -1410,7 +1410,7 @@ tryCatch({ kpi_parts <- c( kpi_parts, sprintf("**Gap:** %.0f", field_kpi$Gap_Score), - sprintf("**Weed:** %s", field_kpi$Weed_Pressure_Risk), + sprintf("**Patchiness:** %s", field_kpi$Patchiness_Risk), sprintf("**Decline:** %s", field_kpi$Decline_Severity) ) @@ -1518,7 +1518,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field # Ensure all expected KPI columns exist; add as NA if missing expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation", - "Decline_Severity", "Weed_Pressure_Risk") + "Decline_Severity", "Patchiness_Risk") for (col in expected_cols) { if (!col %in% names(field_details_table)) { field_details_table[[col]] <- NA @@ -1553,7 +1553,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field `Yield Forecast (t/ha)` = TCH_Forecasted, `Gap Score` = Gap_Score, `Decline Risk` = Decline_Severity, - `Weed Risk` = Weed_Pressure_Risk, + `Patchiness Risk` = Patchiness_Risk, `CV Value` = CV ) } else { @@ -1566,7 +1566,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field `Yield Forecast (t/ha)` = TCH_Forecasted, `Gap Score` = Gap_Score, `Decline Risk` = Decline_Severity, - `Weed Risk` = Weed_Pressure_Risk, + `Patchiness Risk` = Patchiness_Risk, `CV Value` = CV ) } @@ -1634,11 +1634,11 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to - **TCH Forecasted:** Provides yield predictions (tons cane per hectare) for mature fields (typically over 240 days old), using a machine learning model trained on historical CI and yield data. Helps plan harvest timing and logistics. - - **Weed Presence Score:** Detects rapid CI increases between weeks as a proxy for weed outbreaks in young fields (< 8 months old). After 8 months, canopy closure prevents weed growth. Risk levels based on percentage of pixels showing rapid growth (> 2.0 CI units increase): - - **Low:** < 10% of field area (minimal weed presence) - - **Moderate:** 10–25% (monitor and scout) - - **High:** > 25% (requires immediate intervention) - - **Note:** Mature fields (≥ 8 months) show "Canopy closed - Low weed risk" as the closed canopy suppresses weed growth. + - **Field Patchiness Score:** Measures field heterogeneity using the Gini coefficient, detecting spatial variation in crop health. High patchiness (Gini > 0.12) may indicate irrigation, pest, or fertility issues requiring targeted scouting: + - **Low:** Gini < 0.08 (excellent uniformity, minimal intervention needed) + - **Medium:** Gini 0.08–0.12 (acceptable variation, routine monitoring) + - **High:** Gini > 0.12 (poor uniformity, recommend field scouting) + - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure. - **Gap Filling Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. From 1f677f2626fea15f7bac623ce46097673b5b0746 Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 18 Feb 2026 09:36:54 +0100 Subject: [PATCH 25/30] Update terminology from DOY to DAH across multiple scripts and reports for consistency in crop age calculations --- python_app/22_harvest_baseline_prediction.py | 2 +- python_app/harvest_date_pred_utils.py | 36 ++++++------- r_app/21_convert_ci_rds_to_csv.R | 12 ++--- r_app/30_growth_model_utils.R | 2 +- r_app/80_utils_cane_supply.R | 4 +- r_app/80_utils_common.R | 26 +++++----- ..._CI_report_with_kpis_agronomic_support.Rmd | 14 ++--- r_app/MANUAL_PIPELINE_RUNNER.R | 2 +- r_app/parameters_project.R | 8 ++- r_app/report_utils.R | 52 +++++++++---------- 10 files changed, 82 insertions(+), 76 deletions(-) diff --git a/python_app/22_harvest_baseline_prediction.py b/python_app/22_harvest_baseline_prediction.py index 4184608..f20f937 100644 --- a/python_app/22_harvest_baseline_prediction.py +++ b/python_app/22_harvest_baseline_prediction.py @@ -111,7 +111,7 @@ def main(): # [3/4] Run model predictions with two-step detection print("\n[3/4] Running two-step harvest detection...") - print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)") + print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)") refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device, phase1_threshold=0.3, phase1_consecutive=2) diff --git a/python_app/harvest_date_pred_utils.py b/python_app/harvest_date_pred_utils.py index 012a9f2..eacb11f 100644 --- a/python_app/harvest_date_pred_utils.py +++ b/python_app/harvest_date_pred_utils.py @@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128, # FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline) # ============================================================================ -def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame: +def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame: """Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV).""" features = pd.DataFrame(index=ci_series.index) @@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p ma = ci_series.rolling(window=window, min_periods=1).mean() features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6) - # DOY normalized - if doy_series is not None: - features['DOY_normalized'] = doy_series / 450.0 + # DAH normalized (Days After Harvest) + if dah_series is not None: + features['DAH_normalized'] = dah_series / 450.0 return features.fillna(0) @@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: data_df: DataFrame with Date and CI data (may be a window after a harvest) feature_names: List of feature names to extract ci_column: Name of CI column - season_anchor_day: Day in FULL sequence where this season started (for DOY reset) - DOY will be recalculated as: 1, 2, 3, ... from this point + season_anchor_day: Day in FULL sequence where this season started (for DAH reset) + DAH will be recalculated as: 1, 2, 3, ... from this point lookback_start: Starting index in original full data (for season reset calculation) Returns: @@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column: # Compute all CI features ci_series = data_df[ci_column].astype(float) - # Compute DOY (age/days since season start) - NOT day-of-year! - # DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) + # Compute DAH (age/days since season start) - NOT day-of-year! + # DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365) # It only resets to 1 after a harvest is detected (new season) - doy_series = None - if 'DOY_normalized' in feature_names: + dah_series = None + if 'DAH_normalized' in feature_names: if season_anchor_day is not None and lookback_start >= season_anchor_day: - # Season was reset after harvest. Recalculate DOY as simple counter from 1 - # This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ... - doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) - elif 'DOY' in data_df.columns: - # Use DOY directly from CSV - already calculated as continuous age counter - doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index) + # Season was reset after harvest. Recalculate DAH as simple counter from 1 + # This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ... + dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) + elif 'DAH' in data_df.columns: + # Use DAH directly from CSV - already calculated as continuous age counter + dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index) else: # Fallback: create continuous age counter (1, 2, 3, ...) - doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) + dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index) - all_features = compute_ci_features(ci_series, doy_series) + all_features = compute_ci_features(ci_series, dah_series) # Select requested features requested = [f for f in feature_names if f in all_features.columns] diff --git a/r_app/21_convert_ci_rds_to_csv.R b/r_app/21_convert_ci_rds_to_csv.R index 78af1bb..282e87f 100644 --- a/r_app/21_convert_ci_rds_to_csv.R +++ b/r_app/21_convert_ci_rds_to_csv.R @@ -14,7 +14,7 @@ # OUTPUT DATA: # - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/ # - Format: CSV (long format) -# - Columns: field, sub_field, Date, FitData, DOY, value +# - Columns: field, sub_field, Date, FitData, DAH, value # # USAGE: # Rscript 21_convert_ci_rds_to_csv.R [project] @@ -38,7 +38,7 @@ # NOTES: # - Data source: Uses interpolated CI data from Script 30 (growth model output) # - Handles both wide format and long format inputs from growth model -# - DOY (Day of Year): Calculated from date for seasonal analysis +# - DAH (Days After Harvest): Calculated from date; represents crop age in days # - Python integration: CSV format compatible with pandas/scikit-learn workflows # - Used by: Python harvest detection models (harvest_date_prediction.py) # - Exports complete growth curves with interpolated values for ML training @@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) { filter(!is.na(FitData)) } -#' Create daily interpolated sequences with DOY for each field +#' Create daily interpolated sequences with DAH for each field #' #' For each field/sub_field combination, creates complete daily sequences from first to last date, #' fills in measurements, and interpolates missing dates. #' #' @param ci_data_long Long format tibble: field, sub_field, Date, FitData -#' @return Tibble with: field, sub_field, Date, FitData, DOY, value +#' @return Tibble with: field, sub_field, Date, FitData, DAH, value create_interpolated_daily_sequences <- function(ci_data_long) { ci_data_long %>% group_by(field, sub_field) %>% @@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) { Date = date_seq, value = NA_real_, FitData = NA_real_, - DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ... + DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ... ) # Fill in actual measurement values @@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) { }) ) %>% unnest(data) %>% - select(field, sub_field, Date, FitData, DOY, value) %>% + select(field, sub_field, Date, FitData, DAH, value) %>% arrange(field, Date) } diff --git a/r_app/30_growth_model_utils.R b/r_app/30_growth_model_utils.R index 647b811..cca107e 100644 --- a/r_app/30_growth_model_utils.R +++ b/r_app/30_growth_model_utils.R @@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, # Add additional columns CI <- CI %>% dplyr::mutate( - DOY = seq(1, n(), 1), + DAH = seq(1, n(), 1), model = paste0("Data", season, " : ", field_name), season = season, subField = field_name diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 100d231..3877c1c 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -173,10 +173,10 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me #' #' Uses Random Forest with Forward Feature Selection trained on: #' - Cumulative Canopy Index (CI) from growth model -#' - Days of Year (DOY) / crop age +#' - Days After Harvest (DAH) / crop age #' - CI-per-day (growth velocity) #' -#' Predicts yields for mature fields (DOY >= 240, ~8 months) into quartiles: +#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles: #' - Top 25%: High-yield fields #' - Average: Mid-range yield fields #' - Lowest 25%: Lower-yield fields diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 29739cf..37ca957 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -1457,7 +1457,7 @@ prepare_predictions <- function(predictions, newdata) { dplyr::mutate( sub_field = newdata$sub_field, field = newdata$field, - Age_days = newdata$DOY, + Age_days = newdata$DAH, total_CI = round(newdata$cumulative_CI, 0), predicted_Tcha = round(predicted_Tcha, 0), season = newdata$season @@ -1506,8 +1506,8 @@ create_fallback_result <- function(field_boundaries) { #' Calculate yield prediction KPI using Random Forest with Feature Selection #' #' Trains a Random Forest model on historical harvest data with cumulative CI, -#' days of year (DOY), and CI-per-day as predictors. Uses CAST::ffs() for -#' Forward Feature Selection. Predicts yields for mature fields (DOY >= 240). +#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for +#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD). #' #' @param field_boundaries Field boundaries (sf or SpatVector) #' @param harvesting_data Data frame with harvest data including tonnage_ha column @@ -1527,8 +1527,8 @@ create_fallback_result <- function(field_boundaries) { #' - Algorithm: Random Forest (caret + CAST) #' - Feature Selection: Forward Feature Selection (CAST::ffs) #' - Cross-validation: 5-fold CV -#' - Predictors: cumulative_CI, DOY, CI_per_day -#' - Mature field threshold: DOY >= 240 (8 months) +#' - Predictors: cumulative_CI, DAH, CI_per_day +#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days) #' - Output: Field-level yield forecasts grouped by quartile #' #' **Error Handling:** @@ -1568,12 +1568,12 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed, by = c("field", "sub_field", "season")) %>% dplyr::group_by(sub_field, season) %>% - dplyr::slice(which.max(DOY)) %>% - dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>% - dplyr::mutate(CI_per_day = cumulative_CI / DOY) + dplyr::slice(which.max(DAH)) %>% + dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>% + dplyr::mutate(CI_per_day = cumulative_CI / DAH) # Define predictors and response variables - predictors <- c("cumulative_CI", "DOY", "CI_per_day") + predictors <- c("cumulative_CI", "DAH", "CI_per_day") response <- "tonnage_ha" # Prepare training dataset (fields with harvest data) @@ -1600,7 +1600,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu # Prepare prediction dataset (fields without harvest data, mature fields only) prediction_yields <- CI_and_yield %>% as.data.frame() %>% - dplyr::filter(is.na(tonnage_ha) & DOY >= 240) # Mature fields only + dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only # Configure model training parameters ctrl <- caret::trainControl( @@ -1643,13 +1643,13 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha")) } - # Predict yields for current season (mature fields >= 240 days) + # Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days) if (nrow(prediction_yields) > 0) { pred_rf_current_season <- prepare_predictions( stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields ) %>% - dplyr::filter(Age_days >= 240) %>% + dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>% dplyr::select(c("field", "Age_days", "predicted_Tcha", "season")) } else { pred_rf_current_season <- data.frame() @@ -1710,7 +1710,7 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu safe_log("✓ Yield prediction complete") return(list(summary = result, field_results = field_level_results)) } else { - safe_log("No fields meet maturity threshold (DOY >= 240) for prediction", "WARNING") + safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING") return(list(summary = create_fallback_result(field_boundaries)$summary, field_results = data.frame())) } diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 216d722..55b81de 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -300,7 +300,7 @@ if (dir.exists(kpi_data_dir)) { } } - # 6. Gap filling summary - GROUP BY Gap_Level and COUNT + # 6. Gaps summary - GROUP BY Gap_Level and COUNT if ("Gap_Level" %in% names(field_details_table)) { summary_tables$gap_filling <- field_details_table %>% group_by(gap_level = Gap_Level) %>% @@ -581,7 +581,7 @@ if (exists("summary_tables") && !is.null(summary_tables) && length(summary_table growth_decline = list(display = "Growth Decline (4-Week Trend)", level_col = "trend_interpretation", count_col = "field_count"), patchiness = list(display = "Field Patchiness", level_col = "gini_category", count_col = "field_count", detail_col = "patchiness_risk"), tch_forecast = list(display = "TCH Forecasted", level_col = "tch_category", detail_col = "range", count_col = "field_count"), - gap_filling = list(display = "Gap Filling", level_col = "gap_level", count_col = "field_count") + gap_filling = list(display = "Gaps", level_col = "gap_level", count_col = "field_count") ) standardize_kpi <- function(df, level_col, count_col, detail_col = NULL) { @@ -1349,8 +1349,8 @@ tryCatch({ ci_quadrant_data <- if (project_dir == "esa" && field_name == "00F25") { CI_quadrant %>% dplyr::filter(field == "00F25") %>% - dplyr::arrange(DOY) %>% - dplyr::group_by(DOY) %>% + dplyr::arrange(DAH) %>% + dplyr::group_by(DAH) %>% dplyr::slice(1) %>% dplyr::ungroup() } else { @@ -1494,10 +1494,10 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field CI_quadrant %>% filter(Date <= as.Date(report_date)) %>% group_by(field, season) %>% - summarise(last_date = max(Date), last_doy = max(DOY), .groups = 'drop') %>% + summarise(last_date = max(Date), last_dah = max(DAH), .groups = 'drop') %>% group_by(field) %>% filter(season == max(season)) %>% - select(field, Age_days = last_doy) + select(field, Age_days = last_dah) }, error = function(e) { data.frame(field = character(), Age_days = numeric()) }) @@ -1640,7 +1640,7 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to - **High:** Gini > 0.12 (poor uniformity, recommend field scouting) - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure. - - **Gap Filling Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. + - **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. 2. **Overview Map: Growth on Farm:** Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. diff --git a/r_app/MANUAL_PIPELINE_RUNNER.R b/r_app/MANUAL_PIPELINE_RUNNER.R index 7ea203e..0a6f7d6 100644 --- a/r_app/MANUAL_PIPELINE_RUNNER.R +++ b/r_app/MANUAL_PIPELINE_RUNNER.R @@ -239,7 +239,7 @@ # # OUTPUT: # - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv -# - Columns: field, sub_field, Date, FitData, DOY, value +# - Columns: field, sub_field, Date, FitData, DAH, value # # PARAMETERS: # PROJECT: angata, chemba, xinavane, esa, simba diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index f0b5802..daae442 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -33,8 +33,14 @@ suppressPackageStartupMessages({ }) # ============================================================================== -# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION +# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING # ============================================================================== + +# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH) +# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts +# ~240 days ≈ 8 months, typical sugarcane maturity window +DAH_MATURITY_THRESHOLD <- 240 + # Maps project names to client types for pipeline control # This determines which scripts run and what outputs they produce diff --git a/r_app/report_utils.R b/r_app/report_utils.R index f4346a6..3827593 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -357,10 +357,10 @@ ci_plot <- function(pivotName, #' Creates a plot showing Chlorophyll Index data over time for a pivot field #' #' @param pivotName The name or ID of the pivot field to visualize -#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns +#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns #' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both" #' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE) -#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days") +#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days") #' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE) #' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE) #' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE) @@ -393,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " # Process data data_ci2 <- data_ci %>% - dplyr::mutate(CI_rate = cumulative_CI / DOY, + dplyr::mutate(CI_rate = cumulative_CI / DAH, week = lubridate::week(Date)) %>% dplyr::group_by(field) %>% dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE), @@ -448,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " # Determine x-axis variable based on x_unit parameter x_var <- if (x_unit == "days") { - if (facet_on) "Date" else "DOY" + if (facet_on) "Date" else "DAH" } else { "week" } @@ -457,7 +457,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " "days" = if (facet_on) "Date" else "Age of Crop (Days)", "weeks" = "Week Number") - # Create plot with either facets by season or overlay by DOY/week + # Create plot with either facets by season or overlay by DAH/week if (facet_on) { g <- ggplot2::ggplot(data = plot_data) + ggplot2::facet_wrap(~season, scales = "free_x") + @@ -502,12 +502,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " benchmark_subset <- benchmark_data %>% dplyr::filter(ci_type == ci_type_filter) %>% dplyr::mutate( - benchmark_x = if (x_var == "DOY") { - DOY + benchmark_x = if (x_var == "DAH") { + DAH } else if (x_var == "week") { - DOY / 7 # Approximate conversion + DAH / 7 # Approximate conversion } else { - DOY # For Date, use DOY as is (may not align perfectly) + DAH # For Date, use DAH as is (may not align perfectly) } ) ggplot2::geom_smooth( @@ -549,7 +549,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " x = x_label) + color_scale + { - if (x_var == "DOY") { + if (x_var == "DAH") { ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1))) } else if (x_var == "week") { ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1))) @@ -597,7 +597,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " # Determine x-axis variable based on x_unit parameter x_var <- if (x_unit == "days") { - if (facet_on) "Date" else "DOY" + if (facet_on) "Date" else "DAH" } else { "week" } @@ -620,12 +620,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " if (!is.null(benchmark_data)) { benchmark_subset <- benchmark_data %>% dplyr::mutate( - benchmark_x = if (x_var == "DOY") { - DOY + benchmark_x = if (x_var == "DAH") { + DAH } else if (x_var == "week") { - DOY / 7 + DAH / 7 } else { - DOY + DAH }, ci_type_label = case_when( ci_type == "value" ~ "10-Day Rolling Mean CI", @@ -673,7 +673,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = " x = x_label) + color_scale + { - if (x_var == "DOY") { + if (x_var == "DAH") { ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1))) } else if (x_var == "week") { ggplot2::scale_x_continuous(breaks = seq(0, 64, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1))) @@ -840,11 +840,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) { #' Computes historical percentile benchmarks for CI data per estate #' -#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns +#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns #' @param estate_name Name of the estate/client to filter data for #' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90)) #' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3) -#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data +#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data #' compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) { # Input validation @@ -873,7 +873,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c # Prepare data for both CI types data_prepared <- data_filtered %>% dplyr::ungroup() %>% # Ensure no existing groupings - dplyr::select(DOY, value, cumulative_CI, season) %>% + dplyr::select(DAH, value, cumulative_CI, season) %>% tidyr::pivot_longer( cols = c("value", "cumulative_CI"), names_to = "ci_type", @@ -881,9 +881,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c ) %>% dplyr::filter(!is.na(ci_value)) # Remove NA values - # Compute percentiles for each DOY and ci_type + # Compute percentiles for each DAH and ci_type benchmarks <- data_prepared %>% - dplyr::group_by(DOY, ci_type) %>% + dplyr::group_by(DAH, ci_type) %>% dplyr::summarise( p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_), p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_), @@ -891,7 +891,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c n_observations = n(), .groups = 'drop' ) %>% - dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data + dplyr::filter(n_observations >= min_seasons) %>% # Only include DAHs with sufficient data tidyr::pivot_longer( cols = c(p10, p50, p90), names_to = "percentile", @@ -908,7 +908,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c # Rename columns for clarity benchmarks <- benchmarks %>% - dplyr::select(DOY, ci_type, percentile, benchmark_value) + dplyr::select(DAH, ci_type, percentile, benchmark_value) safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO") @@ -1095,7 +1095,7 @@ get_field_priority_level <- function(cv, morans_i) { #' #' @param field_name Name of the field to summarize #' @param field_details_table Data frame with field-level KPI details -#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns +#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns #' @param report_date Report date (used for filtering current season data) #' @return Formatted text string with field KPI summary #' @@ -1116,10 +1116,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr } current_season <- current_season_data %>% pull(season) - # Get the most recent DOY from the current season + # Get the most recent DAH from the current season field_age_data <- CI_quadrant %>% filter(field == field_name, season == current_season) %>% - pull(DOY) + pull(DAH) field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_ # Filter data for this specific field From af5c53e084435cb37a13b94ba4818e7bee11a63e Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 18 Feb 2026 10:09:10 +0100 Subject: [PATCH 26/30] Refactor patchiness KPI calculation to combine Gini Coefficient and Moran's I for comprehensive field heterogeneity assessment; update report to reflect new metrics and interpretations. --- r_app/80_utils_agronomic_support.R | 253 +++++++++--------- ..._CI_report_with_kpis_agronomic_support.Rmd | 83 ++++-- 2 files changed, 195 insertions(+), 141 deletions(-) diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index df5232a..adb629a 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -400,121 +400,143 @@ calculate_growth_decline_kpi <- function(ci_values_list) { return(result) } -#' KPI 5: Calculate weed presence indicator + #' -#' Detects field fragmentation/patchiness (potential weed/pest pressure) +#' Combines two complementary metrics for comprehensive heterogeneity assessment: +#' - Gini Coefficient: Distribution inequality of CI values (0=uniform, 1=unequal) +#' - Moran's I: Spatial autocorrelation (-1 to +1, indicates clustering vs dispersal) #' #' @param ci_pixels_by_field List of CI pixel arrays for each field +#' @param field_boundaries_sf SF object with field geometries +#' @param mosaic_dir Directory path to per-field mosaic files (for Moran's I) +#' @param week_file Week file pattern (for Moran's I calculation) +#' @param mean_ci_values Optional vector of mean CI values per field #' -#' @return Data frame with fragmentation indicators -calculate_weed_presence_kpi <- function(ci_pixels_by_field) { +#' @return Data frame with gini_coefficient, morans_i, patchiness_risk, patchiness_interpretation +calculate_patchiness_kpi <- function(ci_pixels_by_field, field_boundaries_sf = NULL, + mosaic_dir = NULL, week_file = NULL, mean_ci_values = NULL) { + + n_fields <- length(ci_pixels_by_field) + result <- data.frame( - field_idx = seq_len(length(ci_pixels_by_field)), - cv_value = NA_real_, - low_ci_percent = NA_real_, - fragmentation_index = NA_real_, - weed_pressure_risk = NA_character_, + field_idx = seq_len(n_fields), + gini_coefficient = NA_real_, + morans_i = NA_real_, + patchiness_risk = NA_character_, + patchiness_interpretation = NA_character_, stringsAsFactors = FALSE ) - for (field_idx in seq_len(length(ci_pixels_by_field))) { - ci_pixels <- ci_pixels_by_field[[field_idx]] + # Determine if per-field structure available + is_per_field <- !is.null(mosaic_dir) && !is.null(week_file) && !is.null(field_boundaries_sf) + + for (i in seq_len(n_fields)) { + ci_pixels <- ci_pixels_by_field[[i]] if (is.null(ci_pixels) || length(ci_pixels) == 0) { - result$weed_pressure_risk[field_idx] <- "No data" + result$patchiness_risk[i] <- "No data" + result$patchiness_interpretation[i] <- "No data" next } ci_pixels <- ci_pixels[!is.na(ci_pixels)] if (length(ci_pixels) == 0) { - result$weed_pressure_risk[field_idx] <- "No data" + result$patchiness_risk[i] <- "No data" + result$patchiness_interpretation[i] <- "No data" next } - cv_val <- calculate_cv(ci_pixels) - low_ci_pct <- sum(ci_pixels < 1.5) / length(ci_pixels) * 100 - fragmentation <- cv_val * low_ci_pct / 100 - - result$cv_value[field_idx] <- cv_val - result$low_ci_percent[field_idx] <- round(low_ci_pct, 2) - result$fragmentation_index[field_idx] <- round(fragmentation, 3) - - if (is.na(fragmentation)) { - result$weed_pressure_risk[field_idx] <- "No data" - } else if (fragmentation > 0.15) { - result$weed_pressure_risk[field_idx] <- "High" - } else if (fragmentation > 0.08) { - result$weed_pressure_risk[field_idx] <- "Medium" - } else if (fragmentation > 0.04) { - result$weed_pressure_risk[field_idx] <- "Low" - } else { - result$weed_pressure_risk[field_idx] <- "Minimal" + # ========================================= + # METRIC 1: Calculate Gini Coefficient + # ========================================= + gini <- NA_real_ + if (length(ci_pixels) > 1) { + ci_sorted <- sort(ci_pixels) + n <- length(ci_sorted) + numerator <- 2 * sum(seq_len(n) * ci_sorted) + denominator <- n * sum(ci_sorted) + gini <- (numerator / denominator) - (n + 1) / n + gini <- max(0, min(1, gini)) # Clamp to 0-1 } - } - - return(result) -} - -#' KPI 5: Calculate field patchiness (combines fragmentation into Gini-like metric + risk) -#' -#' @param weed_kpi_result Data frame from calculate_weed_presence_kpi() -#' @param mean_ci_values Optional vector of mean CI values per field -#' -#' @return Data frame with patchiness indicators (gini_coefficient, patchiness_risk, interpretation) -calculate_patchiness_kpi <- function(weed_kpi_result, ci_pixels_by_field = NULL, mean_ci_values = NULL) { - result <- weed_kpi_result %>% - mutate( - # Calculate Gini coefficient from CI pixels (proper calculation) - gini_coefficient = NA_real_, - mean_ci = if (!is.null(mean_ci_values)) mean_ci_values[field_idx] else NA_real_, - # Map weed_pressure_risk to patchiness_risk - patchiness_risk = weed_pressure_risk, - # Create interpretation based on gini and risk - patchiness_interpretation = case_when( - is.na(gini_coefficient) ~ "No data", - gini_coefficient < 0.2 & patchiness_risk %in% c("Low", "Minimal") ~ "Uniform growth", - gini_coefficient < 0.4 & patchiness_risk %in% c("Low", "Medium") ~ "Moderate patchiness", - gini_coefficient >= 0.4 & patchiness_risk %in% c("High", "Medium") ~ "High patchiness", - TRUE ~ "Mixed heterogeneity" - ) - ) %>% - select(field_idx, gini_coefficient, mean_ci, - patchiness_interpretation, patchiness_risk) - - # Calculate actual Gini coefficients if CI pixels provided - if (!is.null(ci_pixels_by_field)) { - for (i in seq_len(nrow(result))) { - ci_pixels <- ci_pixels_by_field[[i]] + result$gini_coefficient[i] <- gini + + # ========================================= + # METRIC 2: Calculate Moran's I (spatial clustering) + # ========================================= + morans_i <- NA_real_ + if (is_per_field) { + field_name <- field_boundaries_sf$field[i] + field_mosaic_path <- file.path(mosaic_dir, field_name, week_file) - if (!is.null(ci_pixels) && length(ci_pixels) > 0) { - ci_pixels <- ci_pixels[!is.na(ci_pixels)] - - if (length(ci_pixels) > 1) { - # Calculate Gini coefficient - # Formula: Gini = (2 * sum(i * x_i)) / (n * sum(x_i)) - (n+1)/n - # where x_i are sorted values - ci_sorted <- sort(ci_pixels) - n <- length(ci_sorted) + if (file.exists(field_mosaic_path)) { + tryCatch({ + field_raster <- terra::rast(field_mosaic_path)[["CI"]] + single_field <- field_boundaries_sf[i, ] + morans_result <- calculate_spatial_autocorrelation(field_raster, single_field) - numerator <- 2 * sum(seq_len(n) * ci_sorted) - denominator <- n * sum(ci_sorted) - - gini <- (numerator / denominator) - (n + 1) / n - # Clamp to 0-1 range (should be within this by formula but guard against numerical errors) - gini <- max(0, min(1, gini)) - - result$gini_coefficient[i] <- gini - - # Update interpretation based on calculated Gini - result$patchiness_interpretation[i] <- dplyr::case_when( - gini < 0.2 ~ "Uniform growth", - gini < 0.4 & result$patchiness_risk[i] %in% c("Low", "Medium", "Minimal") ~ "Moderate patchiness", - gini >= 0.4 & result$patchiness_risk[i] %in% c("High", "Medium") ~ "High patchiness", - TRUE ~ "Mixed heterogeneity" - ) - } + if (is.list(morans_result)) { + morans_i <- morans_result$morans_i + } else { + morans_i <- morans_result + } + }, error = function(e) { + safe_log(paste("Warning: Moran's I failed for field", field_name, ":", e$message), "WARNING") + }) } } + result$morans_i[i] <- morans_i + + # ========================================= + # RISK DETERMINATION: Gini + Moran's I combination + # ========================================= + # Logic: + # - High Gini (>0.3) + High Moran's I (>0.85) = High patchiness (localized clusters) + # - High Gini + Low Moran's I = Medium patchiness (scattered heterogeneity) + # - Low Gini (<0.15) = Minimal patchiness (uniform) + # - Moderate Gini = Low to Medium patchiness + + if (is.na(gini)) { + result$patchiness_risk[i] <- "No data" + } else if (gini < 0.15) { + result$patchiness_risk[i] <- "Minimal" + } else if (gini < 0.30) { + # Low-to-moderate Gini + if (!is.na(morans_i) && morans_i > 0.85) { + result$patchiness_risk[i] <- "Medium" # Some clustering + } else { + result$patchiness_risk[i] <- "Low" + } + } else if (gini < 0.50) { + # High Gini + if (!is.na(morans_i) && morans_i > 0.85) { + result$patchiness_risk[i] <- "High" # Localized problem clusters + } else { + result$patchiness_risk[i] <- "Medium" # Scattered issues + } + } else { + # Very high Gini (>0.5) + result$patchiness_risk[i] <- "High" + } + + # ========================================= + # INTERPRETATION: Combined Gini + Moran's I narrative + # ========================================= + result$patchiness_interpretation[i] <- dplyr::case_when( + is.na(gini) ~ "No data", + gini < 0.15 & (is.na(morans_i) | morans_i < 0.75) ~ + "Excellent uniformity - minimal patchiness", + gini < 0.30 & (is.na(morans_i) | morans_i < 0.75) ~ + "Good uniformity - low patchiness", + gini < 0.30 & !is.na(morans_i) & morans_i > 0.85 ~ + "Moderate uniformity with localized clustering", + gini < 0.50 & (is.na(morans_i) | morans_i < 0.75) ~ + "Poor uniformity - scattered heterogeneity", + gini < 0.50 & !is.na(morans_i) & morans_i > 0.85 ~ + "Poor uniformity with clustered problem areas", + gini >= 0.50 ~ + "Severe heterogeneity - requires field investigation", + TRUE ~ "Mixed heterogeneity" + ) } return(result) @@ -536,13 +558,6 @@ create_summary_tables <- function(all_kpis) { uniformity = all_kpis$uniformity %>% select(field_idx, cv_value, uniformity_category, interpretation), - spatial_clustering = if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { - all_kpis$spatial_clustering %>% - select(field_idx, morans_i) - } else { - NULL - }, - area_change = all_kpis$area_change %>% select(field_idx, mean_ci_pct_change, interpretation), @@ -553,7 +568,7 @@ create_summary_tables <- function(all_kpis) { select(field_idx, four_week_trend, trend_interpretation, decline_severity), patchiness = all_kpis$patchiness %>% - select(field_idx, gini_coefficient, patchiness_interpretation, patchiness_risk), + select(field_idx, gini_coefficient, morans_i, patchiness_interpretation, patchiness_risk), gap_filling = if (!is.null(all_kpis$gap_filling) && nrow(all_kpis$gap_filling) > 0) { all_kpis$gap_filling %>% @@ -621,28 +636,19 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee ) # ============================================ - # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5 + Spatial Clustering) + # GROUP 3: FIELD HETEROGENEITY/PATCHINESS (KPI 5) # ============================================ - # KPI 5: Field Patchiness + # KPI 5: Field Patchiness (Gini + Moran's I combination) result <- result %>% left_join( all_kpis$patchiness %>% - select(field_idx, Gini_Coefficient = gini_coefficient, Mean_CI = mean_ci, + select(field_idx, Gini_Coefficient = gini_coefficient, + Morans_I = morans_i, Patchiness_Interpretation = patchiness_interpretation, Patchiness_Risk = patchiness_risk), by = "field_idx" ) - # Moran's I (spatial clustering - used in patchiness calculation) - if (!is.null(all_kpis$spatial_clustering) && nrow(all_kpis$spatial_clustering) > 0) { - result <- result %>% - left_join( - all_kpis$spatial_clustering %>% - select(field_idx, Morans_I = morans_i), - by = "field_idx" - ) - } - # ============================================ # GROUP 4: YIELD FORECAST (KPI 3) # ============================================ @@ -914,14 +920,14 @@ calculate_all_field_analysis_agronomic_support <- function( ) message("Calculating KPI 5: Field Patchiness...") - weed_kpi <- calculate_weed_presence_kpi(ci_pixels_by_field) - - # Convert weed metrics to patchiness metrics (Gini-like + risk combination) - mean_ci_values <- current_stats$Mean_CI - if (is.null(mean_ci_values) || length(mean_ci_values) != nrow(field_boundaries_sf)) { - mean_ci_values <- rep(NA_real_, nrow(field_boundaries_sf)) - } - patchiness_kpi <- calculate_patchiness_kpi(weed_kpi, ci_pixels_by_field, mean_ci_values) + # Calculate patchiness using both Gini coefficient and Moran's I spatial clustering + patchiness_kpi <- calculate_patchiness_kpi( + ci_pixels_by_field, + field_boundaries_sf = field_boundaries_sf, + mosaic_dir = current_mosaic_dir, + week_file = week_file, + mean_ci_values = current_stats$Mean_CI + ) message("Calculating KPI 6: Gap Filling...") # Build list of per-field files for this week @@ -968,7 +974,6 @@ calculate_all_field_analysis_agronomic_support <- function( tch_forecasted = tch_kpi, growth_decline = growth_decline_kpi, patchiness = patchiness_kpi, - spatial_clustering = uniformity_kpi %>% select(field_idx, morans_i), gap_filling = gap_filling_kpi ) @@ -985,10 +990,6 @@ calculate_all_field_analysis_agronomic_support <- function( distinct(field_idx, .keep_all = TRUE) all_kpis$patchiness <- all_kpis$patchiness %>% distinct(field_idx, .keep_all = TRUE) - if (!is.null(all_kpis$spatial_clustering)) { - all_kpis$spatial_clustering <- all_kpis$spatial_clustering %>% - distinct(field_idx, .keep_all = TRUE) - } all_kpis$gap_filling <- all_kpis$gap_filling %>% distinct(field_idx, .keep_all = TRUE) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 55b81de..4725229 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -1622,25 +1622,79 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to ### What You'll Find in This Report: 1. **Key Performance Indicators (KPIs):** - The report provides a farm-wide analysis based on the Chlorophyll Index (CI) changes. KPIs are calculated field by field and summarized in tables. The current KPIs included are: + The report provides a farm-wide analysis based on weekly Chlorophyll Index (CI) measurements. Five comprehensive KPIs are calculated field by field to assess crop health: + + - **KPI 1: Field Uniformity** — Measures how consistently crop is developing across the field + - **Metric:** Coefficient of Variation (CV) of CI pixel values + - **Calculation:** CV = (Standard Deviation of CI) / (Mean CI) + - **Categories:** + - **Excellent:** CV < 0.08 (very uniform growth, minimal intervention needed) + - **Good:** CV < 0.15 (acceptable uniformity, routine monitoring) + - **Acceptable:** CV < 0.25 (moderate variation, monitor irrigation/fertility) + - **Poor:** CV < 0.4 (high variation, investigate management issues) + - **Very poor:** CV ≥ 0.4 (severe variation, immediate field scout required) + - **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease. - - **Field Uniformity:** Assesses the consistency of crop growth within each field using the coefficient of variation (CV) of CI values. Uniformity levels are classified as: - - **Excellent:** CV < 0.08 (very uniform growth) - - **Good:** CV < 0.15 (acceptable uniformity) - - **Moderate:** CV < 0.25 (some variation present) - - **Poor:** CV ≥ 0.25 (high variation - investigate irrigation, fertility, or pests) + - **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines + - **Calculation:** ((Current Mean CI − Previous Mean CI) / Previous Mean CI) × 100 + - **Categories:** + - **Rapid growth:** > 15% increase (excellent weekly progress) + - **Positive growth:** 5–15% increase (steady improvement) + - **Stable:** −5% to +5% (field maintained, no significant change) + - **Declining:** −5% to −15% (slow decline, warrant closer monitoring) + - **Rapid decline:** < −15% (alert: urgent issue requiring investigation) + - **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention. - - **Area Change:** Summarizes the proportion of field area that is improving, stable, or declining week-over-week, based on CI changes. Helps identify fields requiring immediate attention. + - **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields + - **Applies to:** Fields ≥ 240 days old (mature stage) + - **Method:** Random Forest machine learning model trained on historical harvest data and CI trajectories + - **Inputs:** Days after harvest (DAH) and CI growth rate (CI_per_day) + - **Output:** Predicted tons of cane per hectare (t/ha) + - **Why it matters:** Helps plan harvest timing, mill throughput, and revenue forecasting for mature crops. - - **TCH Forecasted:** Provides yield predictions (tons cane per hectare) for mature fields (typically over 240 days old), using a machine learning model trained on historical CI and yield data. Helps plan harvest timing and logistics. + - **KPI 4: Growth Decline (4-Week Trend)** — Assesses short-term growth trajectory using linear regression + - **Calculation:** Linear slope of CI values over the previous 4 weeks + - **Categories:** + - **Strong growth:** Slope > 0.1 CI units/week (excellent sustained progress) + - **Weak growth:** Slope 0–0.1 (slow improvement, monitor closely) + - **Slight decline:** Slope −0.1–0 (low severity, non-urgent observation) + - **Moderate decline:** Slope −0.3 to −0.1 (medium severity, scouting recommended) + - **Strong decline:** Slope < −0.3 (high severity, immediate field investigation required) + - **Why it matters:** Trend analysis reveals whether crop is accelerating, stalling, or stressed over time. - - **Field Patchiness Score:** Measures field heterogeneity using the Gini coefficient, detecting spatial variation in crop health. High patchiness (Gini > 0.12) may indicate irrigation, pest, or fertility issues requiring targeted scouting: - - **Low:** Gini < 0.08 (excellent uniformity, minimal intervention needed) - - **Medium:** Gini 0.08–0.12 (acceptable variation, routine monitoring) - - **High:** Gini > 0.12 (poor uniformity, recommend field scouting) - - **Note:** Young crops (< 3 months) naturally show higher patchiness as they establish; this decreases with canopy closure. + - **KPI 5: Field Patchiness (Heterogeneity)** — Combines two complementary spatial metrics for comprehensive heterogeneity assessment + - **Metric 1: Gini Coefficient** — Statistical measure of distribution inequality in CI pixel values + - **Formula:** (2 × Σ(i × sorted_CI)) / (n × Σ(sorted_CI)) − (n+1)/n + - **Range:** 0 (perfectly uniform) to 1 (highly unequal) + - **Interpretation:** Low Gini (< 0.15) = good uniformity; High Gini (> 0.3) = significant heterogeneity + - **Metric 2: Moran's I** — Spatial autocorrelation indicating whether high/low areas are clustered or scattered + - **Range:** −1 (dispersed pattern) to +1 (strong clustering) + - **Thresholds:** Moran's I > 0.85 indicates clustered problem areas; < 0.75 suggests scattered issues + - **Risk Determination (Gini + Moran's I Combined):** + - **Minimal Risk:** Gini < 0.15 (excellent uniformity regardless of spatial pattern) + - **Low Risk:** Gini 0.15–0.30, Moran's I < 0.85 (moderate variation, scattered distribution) + - **Medium Risk:** Gini 0.15–0.30, Moran's I > 0.85 OR Gini 0.30–0.50, Moran's I < 0.85 (notable issues) + - **High Risk:** Gini > 0.30, Moran's I > 0.85 (severe heterogeneity with localized clusters—urgent scouting needed) + - **Why it matters:** High patchiness may indicate irrigation inefficiencies, localized pest pressure, fertility variation, or disease spread. Combined Gini + Moran's I reveals not just *how much* variation exists, but also *how it's distributed* spatially. CI reflects chlorophyll = nitrogen status + plant health + vigor. High CV/Patchiness often signals N gaps, water stress, pests (borers), or ratoon decline. + + - **Uniformity vs. Patchiness — What's the Difference?** + Both KPIs measure variation, but they answer different questions and drive different management actions: + - **Uniformity (CV-based)** answers: "*Is* growth even across the field?" — it detects whether a problem exists but not where. + - **Patchiness (Gini + Moran's I)** answers: "*Where* are problems and how are they arranged?" — it reveals the spatial pattern. + + **Practical example:** Two fields both score "Poor" on Uniformity (CV = 0.25). However: + - Field A has scattered low-CI patches (Moran's I = 0.6) → suggests *random* stress (disease pressure, uneven irrigation) + - Field B has clustered low-CI in one corner (Moran's I = 0.95) → suggests *localized* problem (drainage, compaction, pest hotspot) + + Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.** - - **Gap Score:** Indicates the proportion of a field with low CI values (lowest 25% of the distribution), highlighting areas with poor crop establishment or gaps that may need replanting. + - **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment + - **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution) + - **Levels:** + - **Minimal:** < 10% of field (good establishment, limited replanting needed) + - **Moderate:** 10–25% of field (monitor gap expansion, coordinate with agronomy) + - **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation) + - **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention. 2. **Overview Map: Growth on Farm:** Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. @@ -1659,7 +1713,6 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to --- - ### Historical Benchmark Lines The CI time series graphs include historical benchmark lines for the 10th, 50th, and 90th percentiles of CI values across all fields and seasons. From f1821dab592bcd5532166cad52d1b9d8a0bcfcbb Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 18 Feb 2026 10:54:42 +0100 Subject: [PATCH 27/30] Enhance project configuration and reporting utilities; update .gitignore for PNG exceptions, add CI change thresholds, and improve historical data handling in KPI calculations. --- r_app/.gitignore | 6 + r_app/80_utils_agronomic_support.R | 126 ++++++++++++++++-- r_app/80_utils_cane_supply.R | 29 +++- r_app/80_utils_common.R | 39 +++++- ..._CI_report_with_kpis_agronomic_support.Rmd | 19 +-- r_app/91_CI_report_with_kpis_cane_supply.Rmd | 10 +- r_app/parameters_project.R | 27 +++- r_app/report_utils.R | 29 +--- 8 files changed, 224 insertions(+), 61 deletions(-) diff --git a/r_app/.gitignore b/r_app/.gitignore index d159461..7cd7b7e 100644 --- a/r_app/.gitignore +++ b/r_app/.gitignore @@ -8,8 +8,14 @@ renv *.tmp *.swp *.save + +# Ignore ALL PNG files by default (generated outputs, analysis plots, etc.) *.png +# EXCEPTIONS: Explicitly track intentional PNG assets +# Uncomment or add lines below for PNG files that should be committed to git +!r_app/CI_graph_example.png + # Ignore files related to Rproj .Rproj.user/ .Rhistory diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index adb629a..9dcf3db 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -24,8 +24,6 @@ library(tidyr) library(readxl) library(writexl) library(spdep) -library(caret) -library(CAST) # ============================================================================ # SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R) @@ -588,7 +586,7 @@ create_summary_tables <- function(all_kpis) { #' @param current_year Current year #' #' @return Data frame with one row per field, all KPI columns -create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_week, current_year) { +create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_week, current_year, current_stats = NULL) { # Start with field identifiers AND field_idx for joining result <- field_boundaries_sf %>% @@ -602,6 +600,20 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee ) %>% select(field_idx, Field_id, Field_name, Week, Year) + # ============================================ + # GROUP 0: MEAN CI (from field statistics) + # ============================================ + if (!is.null(current_stats)) { + result <- result %>% + left_join( + current_stats %>% + select(Field_id, Mean_CI), + by = "Field_id" + ) + } else { + result$Mean_CI <- NA_real_ + } + # ============================================ # GROUP 1: FIELD UNIFORMITY (KPI 1) # ============================================ @@ -609,7 +621,8 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee left_join( all_kpis$uniformity %>% select(field_idx, CV = cv_value, - Uniformity_Category = uniformity_category), + Uniformity_Category = uniformity_category, + Uniformity_Interpretation = interpretation), by = "field_idx" ) @@ -915,9 +928,95 @@ calculate_all_field_analysis_agronomic_support <- function( data_dir = data_dir, project_dir = project_dir) message("Calculating KPI 4: Growth Decline...") - growth_decline_kpi <- calculate_growth_decline_kpi( - ci_pixels_by_field - ) + + # Load historical field statistics to build weekly mean CI time series per field + # (growth_decline_kpi expects temporal series, not spatial pixel arrays) + weekly_mean_ci_by_field <- list() + + # Build list of weekly mean CI values for each field (4-week lookback) + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + weekly_ci_values <- c() + } + + # Try to load historical data for trend calculation + if (!is.null(output_dir) && !is.null(project_dir)) { + tryCatch({ + historical_data <- load_historical_field_data( + project_dir = project_dir, + current_week = current_week, + current_year = current_year, + reports_dir = output_dir, + num_weeks = 4, + auto_generate = FALSE, + field_boundaries_sf = field_boundaries_sf + ) + + if (!is.null(historical_data) && length(historical_data) > 0) { + message(" Building weekly mean CI time series from historical data...") + + # Initialize list with empty vectors for each field + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + weekly_mean_ci_by_field[[field_idx]] <- c() + } + + # Extract Mean_CI from each historical week (reverse order to go chronologically) + for (hist_idx in rev(seq_along(historical_data))) { + hist_week <- historical_data[[hist_idx]] + hist_data <- hist_week$data + + # Extract Mean_CI column if available + if ("Mean_CI" %in% names(hist_data)) { + # Match fields between historical data and field_boundaries + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + field_name <- field_boundaries_sf$field[field_idx] + + # Find matching row in historical data by field name/ID + field_row <- which( + (hist_data$Field_id == field_name | hist_data$Field_name == field_name) & + !is.na(hist_data$Mean_CI) + ) + + if (length(field_row) > 0) { + mean_ci_val <- as.numeric(hist_data$Mean_CI[field_row[1]]) + if (!is.na(mean_ci_val)) { + weekly_mean_ci_by_field[[field_idx]] <- c(weekly_mean_ci_by_field[[field_idx]], mean_ci_val) + } + } + } + } + } + + message(paste(" ✓ Loaded weekly Mean_CI for", sum(sapply(weekly_mean_ci_by_field, length) > 0), "fields")) + } + }, error = function(e) { + message(paste(" Note: Could not load historical field data for trend analysis:", e$message)) + }) + } + + # If no historical data available, create empty vectors (will result in "Insufficient data") + if (length(weekly_mean_ci_by_field) == 0 || all(sapply(weekly_mean_ci_by_field, length) == 0)) { + message(" Warning: No historical weekly CI data available - using current week only") + for (field_idx in seq_len(nrow(field_boundaries_sf))) { + # Use current week mean CI as single-point series (insufficient for trend) + if (!is.null(current_stats) && nrow(current_stats) > 0) { + field_name <- field_boundaries_sf$field[field_idx] + matching_row <- which( + (current_stats$Field_id == field_name | current_stats$Field_name == field_name) & + !is.na(current_stats$Mean_CI) + ) + if (length(matching_row) > 0) { + weekly_mean_ci_by_field[[field_idx]] <- c(as.numeric(current_stats$Mean_CI[matching_row[1]])) + } else { + weekly_mean_ci_by_field[[field_idx]] <- NA_real_ + } + } else { + weekly_mean_ci_by_field[[field_idx]] <- NA_real_ + } + } + } + + # Calculate growth decline using weekly time series (not spatial pixel arrays) + growth_decline_kpi <- calculate_growth_decline_kpi(weekly_mean_ci_by_field) message("Calculating KPI 5: Field Patchiness...") # Calculate patchiness using both Gini coefficient and Moran's I spatial clustering @@ -943,6 +1042,16 @@ calculate_all_field_analysis_agronomic_support <- function( # Use the common wrapper function (same as cane supply) gap_scores_result <- calculate_gap_scores(per_field_files, field_boundaries_sf) + # Guard against NULL or empty result from calculate_gap_scores + if (is.null(gap_scores_result) || nrow(gap_scores_result) == 0) { + message(" Warning: calculate_gap_scores returned NULL/empty - creating fallback") + gap_scores_result <- data.frame( + Field_id = field_boundaries_sf$field, + gap_score = NA_real_, + stringsAsFactors = FALSE + ) + } + # Convert to the format expected by orchestrator gap_filling_kpi <- gap_scores_result %>% mutate(field_idx = match(Field_id, field_boundaries_sf$field)) %>% @@ -999,7 +1108,8 @@ calculate_all_field_analysis_agronomic_support <- function( field_boundaries_sf = field_boundaries_sf, all_kpis = all_kpis, current_week = current_week, - current_year = current_year + current_year = current_year, + current_stats = current_stats ) # Create summary tables diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 3877c1c..c4c1afa 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -27,6 +27,15 @@ library(tidyr) library(readxl) library(writexl) +# ============================================================================ +# ALERT THRESHOLDS & CONFIGURATION CONSTANTS +# ============================================================================ + +# CI change thresholds for alert categorization +# These values are project-standard and should be consistent across all workflows +CI_CHANGE_DECLINE_THRESHOLD <- -0.5 # Weekly CI change threshold for decline alerts +CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase alerts + # ============================================================================ # ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section) # ============================================================================ @@ -139,7 +148,7 @@ categorize_cv_trend_long_term <- function(cv_slope) { #' Determine status alert based on harvest probability and crop health #' Priority order: #' 1. harvest_ready (imminent + mature ≥12 months) -#' 2. decline_stress (drop ≥2 points but still >1.5) +#' 2. decline_stress (drop ≥CI_CHANGE_DECLINE_THRESHOLD but still >1.5) #' 3. harvested_bare (Mean CI < 1.5) #' @param imminent_prob Numeric harvest probability #' @param age_week Numeric age in weeks @@ -152,8 +161,8 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me return("harvest_ready") } - # Priority 2: Strong decline - if (!is.na(weekly_ci_change) && weekly_ci_change <= -2.0 && !is.na(mean_ci) && mean_ci > 1.5) { + # Priority 2: Strong decline (using configurable threshold) + if (!is.na(weekly_ci_change) && weekly_ci_change <= CI_CHANGE_DECLINE_THRESHOLD && !is.na(mean_ci) && mean_ci > 1.5) { return("decline_stress") } @@ -582,7 +591,7 @@ calculate_field_analysis_cane_supply <- function(setup, # ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== message("\n4. Loading harvest probabilities from script 31...") - harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats") + harvest_prob_dir <- setup$kpi_field_stats_dir harvest_prob_file <- file.path(harvest_prob_dir, sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) message(paste(" Looking for:", harvest_prob_file)) @@ -634,13 +643,23 @@ calculate_field_analysis_cane_supply <- function(setup, # print(head(field_analysis_df[, available_cols], 10)) # } - # # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ========== + # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ========== # farm_kpi_results <- calculate_farm_level_kpis( # field_analysis_df, # current_week, # current_year, # end_date # ) + + # For now, farm-level KPIs are not implemented in CANE_SUPPLY workflow + farm_kpi_results <- NULL + + # ========== RETURN RESULTS ========== + return(list( + field_analysis_df = field_analysis_df, + farm_kpi_results = farm_kpi_results, + export_paths = export_paths + )) } # ============================================================================ diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index 37ca957..d3d2ca2 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -17,6 +17,25 @@ # centralized in the orchestrator script. # ============================================================================ +# ============================================================================ +# LOAD PROJECT CONFIGURATION (Guard against re-sourcing) +# ============================================================================ +# Ensure parameters_project.R has been sourced to provide global configuration +# (PROJECT, data_dir, field_boundaries_path, etc.). Use a sentinel to avoid double-sourcing. +if (!exists("PROJECT", envir = .GlobalEnv)) { + tryCatch({ + source(here::here("r_app", "parameters_project.R")) + }, error = function(e) { + # Fallback: try relative path if here() doesn't work + tryCatch({ + source("parameters_project.R") + }, error = function(e2) { + warning(paste("Could not source parameters_project.R:", e2$message, + "- using defaults or expecting caller to set PROJECT/data_dir")) + }) + }) +} + # ============================================================================ # CONSTANTS (from 80_calculate_kpis.R) # ============================================================================ @@ -495,8 +514,15 @@ calculate_gap_scores <- function(per_field_files, field_boundaries_sf) { dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop") message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields")) - message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", - round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%")) + + # Guard against all-NA values which would produce Inf/-Inf warnings + if (any(is.finite(gap_scores_df$gap_score))) { + min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2) + max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2) + message(paste(" Gap score range:", min_score, "-", max_score, "%")) + } else { + message(" Gap score range: All values are NA (no valid gap scores)") + } } else { message(" WARNING: No gap scores calculated from per-field mosaics") gap_scores_df <- NULL @@ -645,6 +671,8 @@ load_harvest_data <- function(data_dir) { if (all(required_cols %in% names(harvesting_data))) { # Convert to data frame and ensure column types harvesting_data <- as.data.frame(harvesting_data) + # CRITICAL: Coerce field to character to preserve leading zeros (e.g., "01", "02") + harvesting_data$field <- as.character(harvesting_data$field) harvesting_data$year <- as.numeric(harvesting_data$year) harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha) @@ -664,7 +692,12 @@ load_harvest_data <- function(data_dir) { # Fallback: create empty data frame if loading failed if (is.null(harvesting_data)) { message(" WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)") - harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric()) + harvesting_data <- data.frame( + field = character(), # Explicitly character to preserve leading zeros when data is added + year = numeric(), + tonnage_ha = numeric(), + stringsAsFactors = FALSE + ) } return(harvesting_data) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 4725229..666e6b3 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -376,13 +376,13 @@ prev_week_1_date <- report_date_obj - 7 prev_week_2_date <- report_date_obj - 14 prev_week_3_date <- report_date_obj - 21 -week_minus_1 <- lubridate::isoweek(prev_week_1_date) +week_minus_1 <- sprintf("%02d", lubridate::isoweek(prev_week_1_date)) week_minus_1_year <- lubridate::isoyear(prev_week_1_date) -week_minus_2 <- lubridate::isoweek(prev_week_2_date) +week_minus_2 <- sprintf("%02d", lubridate::isoweek(prev_week_2_date)) week_minus_2_year <- lubridate::isoyear(prev_week_2_date) -week_minus_3 <- lubridate::isoweek(prev_week_3_date) +week_minus_3 <- sprintf("%02d", lubridate::isoweek(prev_week_3_date)) week_minus_3_year <- lubridate::isoyear(prev_week_3_date) # Format current week with leading zeros @@ -468,10 +468,14 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) { Satellite Based Field Reporting ::: + + ::: {custom-style="Cover_subtitle" style="text-align:center; margin-top:18px;"} Chlorophyll Index (CI) Monitoring Report — `r toupper(params$data_dir)` Farm (Week `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); if (!is.null(params$week)) params$week else format(rd, '%V') }`, `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); format(rd, '%Y') }`) ::: +\newpage + ## Report Summary **Farm Location:** `r toupper(project_dir)` Estate @@ -907,8 +911,8 @@ tryCatch({ # Aggregate mosaics for three weeks: current, week-1, week-3 farm_mosaic_current <- aggregate_mosaics_safe(current_week, current_iso_year, "current week") - farm_mosaic_minus_1 <- aggregate_mosaics_safe(week_minus_1, week_minus_1_year, "week-1") - farm_mosaic_minus_3 <- aggregate_mosaics_safe(week_minus_3, week_minus_3_year, "week-3") + farm_mosaic_minus_1 <- aggregate_mosaics_safe(as.numeric(week_minus_1), week_minus_1_year, "week-1") + farm_mosaic_minus_3 <- aggregate_mosaics_safe(as.numeric(week_minus_3), week_minus_3_year, "week-3") # Extract CI band (5th band, or named "CI") from each aggregated mosaic farm_ci_current <- NULL @@ -1547,7 +1551,6 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field select( Field = Field_id, `Field Size (acres)` = field_size_acres, - `Growth Uniformity` = Uniformity_Interpretation, `Mean CI` = Mean_CI, `Weekly CI Change` = Weekly_CI_Change, `Yield Forecast (t/ha)` = TCH_Forecasted, @@ -1561,7 +1564,6 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field select( Field = Field_id, `Field Size (acres)` = field_size_acres, - `Growth Uniformity` = Uniformity_Interpretation, `Mean CI` = Mean_CI, `Yield Forecast (t/ha)` = TCH_Forecasted, `Gap Score` = Gap_Score, @@ -1616,8 +1618,9 @@ The Chlorophyll Index (CI) is a vegetation index that measures the relative amou CI values typically range from 0 (bare soil or severely stressed vegetation) to 7+ (very healthy, dense vegetation). For sugarcane, values between 3-7 generally indicate good crop health, depending on the growth stage. +
![Chlorophyll Index Example](CI_graph_example.png) - +
### What You'll Find in This Report: diff --git a/r_app/91_CI_report_with_kpis_cane_supply.Rmd b/r_app/91_CI_report_with_kpis_cane_supply.Rmd index ea0f92f..6929b80 100644 --- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd +++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd @@ -487,7 +487,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na ## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready -```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE} +```{r overview_map, fig.width=8, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE} # Create a hexbin overview map with ggplot tryCatch({ # Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk) @@ -643,7 +643,7 @@ tryCatch({ }) ``` -\newpage + ## 1.2 Key Performance Indicators ```{r combined_kpi_table, echo=FALSE, results='asis'} @@ -931,9 +931,9 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to -```{r ci_fig, echo=FALSE, fig.align='right', out.width='40%', fig.cap="Chlorophyll Index Example"} -knitr::include_graphics("CI_graph_example.png") -``` +
+![Chlorophyll Index Example](CI_graph_example.png) +
### Data File Structure and Columns diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index daae442..517fd48 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -623,13 +623,19 @@ detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_ #' Detect mosaic mode from project structure #' +#' Determine mosaic architecture (legacy detection function) +#' +#' NOTE: This is a legacy function kept for backward compatibility. +#' The project has moved to per-field (single-file) architecture. +#' `weekly_tile_max` is no longer created in all_dirs, so this will always return "single-file" +#' #' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics #' #' @param project_dir Character. Project name -#' @return Character. "tiled" or "single-file" +#' @return Character. "tiled" or "single-file" (now always "single-file") detect_mosaic_mode <- function(project_dir) { # Per-field architecture is standard - always return "single-file" - # unless weekly_tile_max directory exists with content + # Legacy support: check if weekly_tile_max exists (it won't in standard setup) mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max") if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) { @@ -662,11 +668,15 @@ get_project_storage_path <- function(project_dir, subdir = NULL) { return(path) } -#' Get mosaic directory +#' Get mosaic directory (legacy function) +#' +#' NOTE: This is a legacy helper kept for backward compatibility. +#' In the standard per-field workflow, this returns weekly_mosaic directory. +#' The "tiled" mode is no longer created (weekly_tile_max_dir was removed from all_dirs). #' #' @param project_dir Character. Project name -#' @param mosaic_mode Character. "tiled" or "single-file" -#' @return Character. Full path to mosaic directory +#' @param mosaic_mode Character. "tiled" or "single-file" (auto-detects if "auto") +#' @return Character. Full path to mosaic directory (typically weekly_mosaic) get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") { if (mosaic_mode == "auto") { mosaic_mode <- detect_mosaic_mode(project_dir) @@ -718,11 +728,14 @@ check_harvest_output_exists <- function(project_dir, week_num, year_num) { file.exists(path) } -#' Get mosaic verification directory +#' Get mosaic verification directory (legacy function) +#' +#' NOTE: This is a legacy helper kept for backward compatibility. +#' Standard workflow uses weekly_mosaic; tiled mode is no longer created. #' #' @param project_dir Character. Project name #' @param mosaic_mode Character. "tiled" or "single-file" -#' @return Character. Full path to mosaic directory +#' @return Character. Full path to mosaic directory for verification get_mosaic_verification_dir <- function(project_dir, mosaic_mode) { base <- file.path("laravel_app", "storage", "app", project_dir) diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 3827593..8b3afa1 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -802,30 +802,9 @@ get_week_path <- function(mosaic_path, input_date, week_offset) { target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed target_year <- lubridate::isoyear(target_date) - # Primary approach: Try single-file mosaic path first + # Load single-file mosaic for the given week path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif")) - # Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles - if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) { - # Try to locate tile-based mosaics in weekly_tile_max instead - tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path) - - # Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.) - if (dir.exists(tile_mosaic_path)) { - tile_files <- list.files(tile_mosaic_path, - pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"), - full.names = TRUE) - - if (length(tile_files) > 0) { - # Found tiles - return the first tile as primary, note that multiple tiles exist - safe_log(paste("Single-file mosaic not found for week", target_week, target_year, - "but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO") - # Return first tile - caller should aggregate if needed - path_to_week <- tile_files[1] # Return first tile; downstream can handle multiple tiles - } - } - } - # Log the path calculation safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO") @@ -1169,10 +1148,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr } kpi_text <- paste0( - "Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels, + "Size: ", round(field_summary$field_size * 0.404686, 1), " ha | Mean CI: ", round(field_summary$avg_mean_ci, 2), + " | Growth Uniformity: ", field_summary$uniformity_levels, " | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1), - " | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk, - " | Mean CI: ", round(field_summary$avg_mean_ci, 2) + " | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk ) # Wrap in smaller text HTML tags for Word output From b2d4093601f3d7d24aff4ae6547f072fc0dfd851 Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 18 Feb 2026 12:02:25 +0100 Subject: [PATCH 28/30] Refactor KPI reporting and status alert logic; streamline directory structure for KPI reports and enhance field analysis summaries with additional metrics and alerts. --- r_app/80_utils_cane_supply.R | 81 ++++-- ..._CI_report_with_kpis_agronomic_support.Rmd | 2 +- r_app/91_CI_report_with_kpis_cane_supply.Rmd | 232 ++++++++++++------ r_app/parameters_project.R | 13 +- 4 files changed, 226 insertions(+), 102 deletions(-) diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index c4c1afa..6de2778 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -145,33 +145,68 @@ categorize_cv_trend_long_term <- function(cv_slope) { } } -#' Determine status alert based on harvest probability and crop health +#' Determine status alert for CANE_SUPPLY client (harvest/milling workflow) +#' +#' Alerts focus on: harvest readiness, crop health monitoring, exception detection +#' Uses WEEKLY trends (Four_week_trend) not daily noise for consistency +#' Designed for harvest/milling clients who manage expectation, not daily operations +#' #' Priority order: -#' 1. harvest_ready (imminent + mature ≥12 months) -#' 2. decline_stress (drop ≥CI_CHANGE_DECLINE_THRESHOLD but still >1.5) -#' 3. harvested_bare (Mean CI < 1.5) -#' @param imminent_prob Numeric harvest probability -#' @param age_week Numeric age in weeks -#' @param weekly_ci_change Numeric weekly CI change -#' @param mean_ci Numeric mean CI value +#' 1. harvest_ready → Schedule harvest operations +#' 2. harvested_bare → Record completion / detect unexpected harvest +#' 3. stress_detected → Monitor crop health (consistent decline) +#' 4. germination_delayed → Early warning for young fields +#' 5. growth_on_track → Positive signal (no action needed) +#' 6. NA → Normal growth (no alert) +#' +#' @param imminent_prob Numeric harvest probability (0-1) +#' @param age_week Numeric age in weeks since planting/harvest +#' @param mean_ci Numeric mean Chlorophyll Index +#' @param four_week_trend Numeric 4-week trend in CI (slope of growth) +#' @param weekly_ci_change Numeric week-over-week CI change +#' @param cv Numeric coefficient of variation (field uniformity) #' @return Character status alert code or NA -calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, mean_ci) { - # Priority 1: Ready for harvest-check +calculate_status_alert <- function(imminent_prob, age_week, mean_ci, + four_week_trend, weekly_ci_change, cv) { + + # Priority 1: HARVEST READY - highest business priority + # Field is mature (≥12 months) AND harvest model predicts imminent harvest if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) { return("harvest_ready") } - # Priority 2: Strong decline (using configurable threshold) - if (!is.na(weekly_ci_change) && weekly_ci_change <= CI_CHANGE_DECLINE_THRESHOLD && !is.na(mean_ci) && mean_ci > 1.5) { - return("decline_stress") - } - - # Priority 3: Harvested/bare + # Priority 2: HARVESTED/BARE - indicator of completion (or unexpected harvest) + # Mean CI dropped below vegetative threshold if (!is.na(mean_ci) && mean_ci < 1.5) { return("harvested_bare") } - # Fallback: no alert + # Priority 3: STRESS DETECTED - consistent health decline (weekly trend) + # Uses Four_week_trend (smooth trend) NOT daily fluctuation to avoid noise + # Crop declining but not yet bare → opportunity to investigate + if (!is.na(four_week_trend) && four_week_trend < -0.3 && + !is.na(mean_ci) && mean_ci > 1.5) { + return("stress_detected") + } + + # Priority 4: GERMINATION DELAYED - early warning for young fields + # Age 4-8 weeks is typical germination window; low CI = slow start + if (!is.na(age_week) && age_week >= 4 && age_week <= 8 && + !is.na(mean_ci) && mean_ci < 1.5) { + return("germination_delayed") + } + + # Priority 5: GROWTH ON TRACK - positive operational status + # Field is healthy, uniform, and growing steadily (no action needed) + # Conditions: good uniformity (CV < 0.15) AND stable/positive weekly trend + if (!is.na(cv) && cv < 0.15 && + !is.na(four_week_trend) && four_week_trend >= -0.2 && + !is.na(weekly_ci_change) && weekly_ci_change >= -0.2) { + return("growth_on_track") + } + + # Default: NORMAL GROWTH (no specific alert) + # Field is growing but may have minor variability; continues normal monitoring NA_character_ } @@ -290,14 +325,16 @@ calculate_all_field_kpis <- function(current_stats, } }, - # Column 14: Status_Alert (multi-priority logic) + # Column 14: Status_Alert (multi-priority logic for harvest/milling workflow) Status_Alert = { sapply(seq_len(nrow(current_stats)), function(idx) { calculate_status_alert( - Imminent_prob[idx], - Age_week[idx], - Weekly_ci_change[idx], - Mean_CI[idx] + imminent_prob = Imminent_prob[idx], + age_week = Age_week[idx], + mean_ci = Mean_CI[idx], + four_week_trend = Four_week_trend[idx], + weekly_ci_change = Weekly_ci_change[idx], + cv = CV[idx] ) }) }, diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 666e6b3..60f9d93 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -111,7 +111,7 @@ safe_log(paste("weekly_CI_mosaic path:", weekly_CI_mosaic)) # NO workspace-wide fallback that might load wrong project # Build expected KPI file path strictly from project_dir -kpi_data_dir <- file.path(paths$reports_dir, "kpis") # Should be: laravel_app/storage/app/{project}/reports/kpis +kpi_data_dir <- paths$kpi_reports_dir # file.path(paths$reports_dir, "kpis") # Should be: laravel_app/storage/app/{project}/reports/kpis # Calculate week from report_date current_week <- as.numeric(format(as.Date(report_date), "%V")) diff --git a/r_app/91_CI_report_with_kpis_cane_supply.Rmd b/r_app/91_CI_report_with_kpis_cane_supply.Rmd index 6929b80..f9edca8 100644 --- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd +++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd @@ -144,12 +144,12 @@ week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year) # Candidate filenames we expect (exact and common variants) expected_summary_names <- c( - paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"), - paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"), - paste0(project_dir, "_kpi_summary_tables.rds"), - "kpi_summary_tables.rds", - paste0("kpi_summary_tables_", week_suffix, ".rds"), - paste0("kpi_summary_tables_", date_suffix, ".rds") + paste0(project_dir, "_field_analysis_", week_suffix, ".rds"), + paste0(project_dir, "_field_analysis_", date_suffix, ".rds"), + paste0(project_dir, "_field_analysis.rds"), + "field_analysis.rds", + paste0("field_analysis_", week_suffix, ".rds"), + paste0("field_analysis_", date_suffix, ".rds") ) expected_field_details_names <- c( @@ -169,13 +169,26 @@ try_load_from_dir <- function(dir, candidates) { return(NULL) } -# Try primary directory first +# Try primary directory first (field_level/) summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names) field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names) -# If not found, perform a workspace-wide search (slower) limited to laravel_app storage +# If not found, try parent directory (kpis/) where RDS is often saved by Script 80 if (is.null(summary_file) || is.null(field_details_file)) { - safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files")) + parent_dir <- dirname(kpi_data_dir) # One level up: reports/kpis/ + safe_log(paste("KPI files not found in", kpi_data_dir, "—trying parent directory:", parent_dir)) + + if (is.null(summary_file)) { + summary_file <- try_load_from_dir(parent_dir, expected_summary_names) + } + if (is.null(field_details_file)) { + field_details_file <- try_load_from_dir(parent_dir, expected_field_details_names) + } +} + +# If still not found, perform a workspace-wide search (slower) limited to laravel_app storage +if (is.null(summary_file) || is.null(field_details_file)) { + safe_log(paste("KPI files not found in", kpi_data_dir, "or parent directory—searching workspace for RDS files")) # List rds files under laravel_app/storage/app recursively files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE) # Try to match by expected names @@ -487,7 +500,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na ## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready -```{r overview_map, fig.width=8, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE} +```{r overview_map, fig.width=7, fig.height=6, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE} # Create a hexbin overview map with ggplot tryCatch({ # Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk) @@ -647,8 +660,8 @@ tryCatch({ ## 1.2 Key Performance Indicators ```{r combined_kpi_table, echo=FALSE, results='asis'} -# Create summary KPI table from field_analysis_summary data -# This shows: Phases, Triggers, Area Change, and Total Farm acreage +# Create consolidated KPI table from field_analysis data +# Shows: Phases, Triggers, Area Change, Cloud Influence, and Total Farm if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { # Load field analysis data @@ -659,24 +672,46 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na !is.data.frame(summary_data$field_analysis_summary)) { # Create summary by aggregating by Status_Alert and Phase categories - # This groups fields by their phase and status to show distribution phase_summary <- field_analysis_df %>% filter(!is.na(Phase)) %>% group_by(Phase) %>% - summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>% + summarise( + Acreage = sum(Acreage, na.rm = TRUE), + Field_count = n_distinct(Field_id), + .groups = "drop" + ) %>% mutate(Category = Phase) %>% - select(Category, Acreage) + select(Category, Acreage, Field_count) - # Try to create Status trigger summary - use Status_Alert if available, otherwise use empty + # Create Status trigger summary - includes both active alerts and "No active triggers" trigger_summary <- tryCatch({ - field_analysis_df %>% + # Active alerts (fields with non-NA Status_Alert) + active_alerts <- field_analysis_df %>% filter(!is.na(Status_Alert), Status_Alert != "") %>% group_by(Status_Alert) %>% - summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>% + summarise( + Acreage = sum(Acreage, na.rm = TRUE), + Field_count = n_distinct(Field_id), + .groups = "drop" + ) %>% mutate(Category = Status_Alert) %>% - select(Category, Acreage) + select(Category, Acreage, Field_count) + + # No active triggers (fields with NA Status_Alert) + no_alerts <- field_analysis_df %>% + filter(is.na(Status_Alert) | Status_Alert == "") %>% + summarise( + Acreage = sum(Acreage, na.rm = TRUE), + Field_count = n_distinct(Field_id), + .groups = "drop" + ) %>% + mutate(Category = "No active triggers") %>% + select(Category, Acreage, Field_count) + + # Combine active alerts and no-alert fields + bind_rows(active_alerts, no_alerts) }, error = function(e) { - data.frame(Category = character(), Acreage = numeric()) + data.frame(Category = character(), Acreage = numeric(), Field_count = numeric()) }) # Combine into summary @@ -689,25 +724,38 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na # Phase names and trigger names to extract from summary phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase") - trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected", - "Germination Complete", "Germination Started", "No Active Trigger", - "Ready for harvest-check", "Strong decline in crop health", "Harvested/bare") + # Trigger names now include both active alerts AND "No active triggers" (calculated dynamically above) + trigger_names <- c("harvest_ready", "harvested_bare", "stress_detected", + "germination_delayed", "growth_on_track", "No active triggers") # Extract phase distribution - match on category names directly if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) { + # Phase rows with field count phase_rows <- field_analysis_summary %>% filter(Category %in% phase_names) %>% - select(Category, Acreage) %>% + select(Category, Acreage, Field_count) %>% mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1) - # Extract status triggers - match on category names directly + # Trigger rows with field count trigger_rows <- field_analysis_summary %>% filter(Category %in% trigger_names) %>% - select(Category, Acreage) %>% - mutate(KPI_Group = "STATUS TRIGGERS", .before = 1) + select(Category, Acreage, Field_count) %>% + mutate(KPI_Group = "OPERATIONAL ALERTS", .before = 1) + + # If no triggers found, add a placeholder row + if (nrow(trigger_rows) == 0) { + trigger_rows <- data.frame( + KPI_Group = "OPERATIONAL ALERTS", + Category = "No active triggers", + Acreage = 0, + Field_count = 0, + stringsAsFactors = FALSE + ) + } # Calculate area change from field_analysis data total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE) + total_fields <- n_distinct(field_analysis_df$Field_id) # Parse Weekly_ci_change to determine improvement/decline parse_ci_change <- function(change_str) { @@ -721,10 +769,20 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change) - improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE) - declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE) - stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 & - field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE) + # Area change rows with field count + improving_df <- field_analysis_df %>% + filter(ci_change_numeric > 0.2) + stable_df <- field_analysis_df %>% + filter(ci_change_numeric >= -0.2 & ci_change_numeric <= 0.2) + declining_df <- field_analysis_df %>% + filter(ci_change_numeric < -0.2) + + improving_acreage <- sum(improving_df$Acreage, na.rm = TRUE) + improving_field_count <- n_distinct(improving_df$Field_id) + stable_acreage <- sum(stable_df$Acreage, na.rm = TRUE) + stable_field_count <- n_distinct(stable_df$Field_id) + declining_acreage <- sum(declining_df$Acreage, na.rm = TRUE) + declining_field_count <- n_distinct(declining_df$Field_id) improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0) declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0) @@ -741,24 +799,54 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na KPI_Group = "AREA CHANGE", Category = c("Improving", "Stable", "Declining"), Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)), + Field_count = c(improving_field_count, stable_field_count, declining_field_count), Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")), stringsAsFactors = FALSE ) + # Cloud influence rows with field count - aggregate by Cloud_category + cloud_rows <- tryCatch({ + field_analysis_df %>% + filter(!is.na(Cloud_category)) %>% + group_by(Cloud_category) %>% + summarise( + Acreage = sum(Acreage, na.rm = TRUE), + Field_count = n_distinct(Field_id), + .groups = "drop" + ) %>% + mutate( + KPI_Group = "CLOUD INFLUENCE", + Category = Cloud_category, + Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"), + Acreage = round(Acreage, 2) + ) %>% + select(KPI_Group, Category, Acreage, Field_count, Percent) + }, error = function(e) { + data.frame( + KPI_Group = character(), + Category = character(), + Acreage = numeric(), + Field_count = numeric(), + Percent = character() + ) + }) + # Total farm row total_row <- data.frame( KPI_Group = "TOTAL FARM", Category = "Total Acreage", Acreage = round(total_acreage, 2), + Field_count = total_fields, Percent = "100%", stringsAsFactors = FALSE ) - # Combine all rows with percentages for all + # Combine all rows combined_df <- bind_rows( phase_pcts, trigger_pcts, area_change_rows, + cloud_rows, total_row ) @@ -769,7 +857,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na KPI_display = if_else(row_number() == 1, KPI_Group, "") ) %>% ungroup() %>% - select(KPI_display, Category, Acreage, Percent) + select(KPI_display, Category, Acreage, Percent, Field_count) # Render as flextable with merged cells ft <- flextable(combined_df) %>% @@ -777,7 +865,8 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na KPI_display = "KPI Category", Category = "Item", Acreage = "Acreage", - Percent = "Percent" + Percent = "Percentage of total fields", + Field_count = "# Fields" ) %>% merge_v(j = "KPI_display") %>% autofit() @@ -787,8 +876,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na phase_count <- nrow(phase_rows) trigger_count <- nrow(trigger_rows) area_count <- nrow(area_change_rows) + cloud_count <- nrow(cloud_rows) - # Add lines after phases, triggers, and area change groups (before totals) + # Add lines after phases, triggers, area change, and cloud groups (before totals) if (phase_count > 0) { ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1)) } @@ -798,6 +888,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na if (area_count > 0) { ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1)) } + if (cloud_count > 0) { + ft <- ft %>% hline(i = phase_count + trigger_count + area_count + cloud_count, border = officer::fp_border(width = 1)) + } ft } else { @@ -808,40 +901,6 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na } ``` -## Cloud Coverage Summary - -```{r cloud_coverage_summary, echo=FALSE} -# Display cloud coverage summary aggregated by category -# Cloud coverage data is included in the field_analysis RDS from Script 80 -if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { - field_analysis_df <- summary_data$field_analysis - - # Aggregate cloud coverage by category - cloud_summary <- field_analysis_df %>% - filter(!is.na(Cloud_category)) %>% - group_by(Cloud_category) %>% - summarise( - "Number of Fields" = n(), - "Total Acreage" = round(sum(Acreage, na.rm = TRUE), 1), - .groups = "drop" - ) %>% - rename("Cloud Category" = Cloud_category) %>% - arrange(`Cloud Category`) - if (nrow(cloud_summary) > 0) { - # Create flextable - ft <- flextable(cloud_summary) %>% - autofit() %>% - theme_vanilla() - - ft - } else { - cat("Cloud coverage data not available for summary.\n") - } -} else { - cat("Field analysis data not available for cloud coverage summary.\n") -} -``` - ```{r data, message=TRUE, warning=TRUE, include=FALSE} # All data comes from the field analysis performed in 09_field_analysis_weekly.R # The report renders KPI tables and field summaries from that data @@ -1005,15 +1064,42 @@ Both algorithms are not always in sync, and can have contradictory results. Wide ## Report Metadata ```{r report_metadata, echo=FALSE} +# Calculate total area from field analysis data +total_area_acres <- 0 +if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { + total_area_acres <- sum(summary_data$field_analysis$Acreage, na.rm = TRUE) +} + +# Calculate total fields +total_fields_count <- 0 +if (exists("AllPivots0")) { + total_fields_count <- nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise()) +} else if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) { + total_fields_count <- n_distinct(summary_data$field_analysis$Field_id) +} + metadata_info <- data.frame( - Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"), + Metric = c( + "Report Generated", + "Data Source", + "Analysis Period", + "Total Fields [number]", + "Total Area [acres]", + "Next Update", + "Service provided", + "Starting date service" + ), Value = c( format(Sys.time(), "%Y-%m-%d %H:%M:%S"), paste("Project", toupper(project_dir)), paste("Week", current_week, "of", year), - ifelse(exists("AllPivots0"), nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise()), "Unknown"), - "Next Wednesday" - ) + ifelse(total_fields_count > 0, total_fields_count, "Unknown"), + ifelse(total_area_acres > 0, round(total_area_acres, 0), "Unknown"), + "Next Wednesday", + "Cane Supply Office - Weekly", + "23 dec 2025" + ), + stringsAsFactors = FALSE ) ft <- flextable(metadata_info) %>% diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 517fd48..7ee1267 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -173,9 +173,10 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { # TIER 6: KPI & REPORTING (Scripts 80/90/91 output) reports_dir <- here(laravel_storage_dir, "reports") - kpi_reports_dir <- here(reports_dir, "kpis", "field_level") - kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats") - kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis") + kpi_reports_dir <- here(reports_dir, "kpis") + #kpi_reports_dir <- here(reports_dir, "kpis", "field_level") + #kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats") + #kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis") # TIER 7: SUPPORT (various scripts) vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction @@ -188,7 +189,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir, growth_model_interpolated_dir, weekly_mosaic_dir, - reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir, + reports_dir, kpi_reports_dir, #kpi_field_stats_dir, kpi_field_analysis_dir, data_dir, vrt_dir, harvest_dir, log_dir ) @@ -227,8 +228,8 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { # TIER 6: KPI & reporting reports_dir = reports_dir, kpi_reports_dir = kpi_reports_dir, - kpi_field_stats_dir = kpi_field_stats_dir, - kpi_field_analysis_dir = kpi_field_analysis_dir, + #kpi_field_stats_dir = kpi_field_stats_dir, + #kpi_field_analysis_dir = kpi_field_analysis_dir, # TIER 7: Support data_dir = data_dir, From 1500bbcb1ce4dcf49bbda253a53cdb376fc11208 Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 18 Feb 2026 13:26:32 +0100 Subject: [PATCH 29/30] Refactor KPI calculations and reporting utilities; normalize field details columns, update area change metrics, and enhance .gitignore for PNG exceptions. --- python_app/harvest_date_pred_utils.py | 4 +- r_app/.gitignore | 3 +- r_app/80_utils_agronomic_support.R | 37 +++++----- r_app/80_utils_cane_supply.R | 34 +++------ r_app/80_utils_common.R | 8 ++- ..._CI_report_with_kpis_agronomic_support.Rmd | 69 +++++++------------ r_app/report_utils.R | 31 ++++++++- 7 files changed, 93 insertions(+), 93 deletions(-) diff --git a/python_app/harvest_date_pred_utils.py b/python_app/harvest_date_pred_utils.py index eacb11f..d746e3a 100644 --- a/python_app/harvest_date_pred_utils.py +++ b/python_app/harvest_date_pred_utils.py @@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame: def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device, threshold=0.45, consecutive_days=2): """ - Phase 1: Growing window detection with DOY season reset. + Phase 1: Growing window detection with DAH season reset. - For each detected harvest, reset DOY counter for the next season. + For each detected harvest, reset DAH counter for the next season. This allows the model to detect multiple consecutive harvests in multi-year data. """ harvest_dates = [] diff --git a/r_app/.gitignore b/r_app/.gitignore index 7cd7b7e..3efde43 100644 --- a/r_app/.gitignore +++ b/r_app/.gitignore @@ -14,8 +14,7 @@ renv # EXCEPTIONS: Explicitly track intentional PNG assets # Uncomment or add lines below for PNG files that should be committed to git -!r_app/CI_graph_example.png - +!CI_graph_example.png # Ignore files related to Rproj .Rproj.user/ .Rhistory diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 9dcf3db..9354d44 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -181,12 +181,18 @@ calculate_field_uniformity_kpi <- function(ci_pixels_by_field, field_boundaries_ #' @param previous_stats Previous week field statistics #' #' @return Data frame with field-level CI changes -calculate_area_change_kpi <- function(current_stats, previous_stats) { +calculate_area_change_kpi <- function(current_stats, previous_stats, field_boundaries_sf = NULL) { + + # Initialize field index vector + field_idx_vec <- seq_len(nrow(current_stats)) + if (!is.null(field_boundaries_sf) && "Field_id" %in% names(current_stats)) { + field_idx_vec <- match(current_stats$Field_id, field_boundaries_sf$field) + } # Initialize result data frame result <- data.frame( - field_idx = seq_len(nrow(current_stats)), - mean_ci_pct_change = NA_real_, + field_idx = field_idx_vec, + mean_ci_abs_change = NA_real_, interpretation = NA_character_, stringsAsFactors = FALSE ) @@ -223,19 +229,19 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) { # Find matching previous CI value prev_ci <- prev_lookup[[as.character(current_field_id)]] - if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci) && prev_ci > 0) { - # Calculate percentage change - pct_change <- ((current_ci - prev_ci) / prev_ci) * 100 - result$mean_ci_pct_change[i] <- round(pct_change, 2) + if (!is.null(prev_ci) && !is.na(prev_ci) && !is.na(current_ci)) { + # Calculate absolute change (CI units) + abs_change <- current_ci - prev_ci + result$mean_ci_abs_change[i] <- round(abs_change, 2) # Add interpretation - if (pct_change > 15) { + if (abs_change > 0.5) { result$interpretation[i] <- "Rapid growth" - } else if (pct_change > 5) { + } else if (abs_change > 0.2) { result$interpretation[i] <- "Positive growth" - } else if (pct_change > -5) { + } else if (abs_change >= -0.2) { result$interpretation[i] <- "Stable" - } else if (pct_change > -15) { + } else if (abs_change >= -0.5) { result$interpretation[i] <- "Declining" } else { result$interpretation[i] <- "Rapid decline" @@ -243,8 +249,7 @@ calculate_area_change_kpi <- function(current_stats, previous_stats) { } else { result$interpretation[i] <- "No previous data" } - } - + } return(result) } @@ -557,7 +562,7 @@ create_summary_tables <- function(all_kpis) { select(field_idx, cv_value, uniformity_category, interpretation), area_change = all_kpis$area_change %>% - select(field_idx, mean_ci_pct_change, interpretation), + select(field_idx, mean_ci_abs_change, interpretation), tch_forecast = all_kpis$tch_forecasted %>% select(field_idx, tch_forecasted), @@ -633,7 +638,7 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee result <- result %>% left_join( all_kpis$area_change %>% - select(field_idx, Weekly_CI_Change = mean_ci_pct_change, + select(field_idx, Weekly_CI_Change = mean_ci_abs_change, Area_Change_Interpretation = interpretation), by = "field_idx" ) @@ -918,7 +923,7 @@ calculate_all_field_analysis_agronomic_support <- function( } else { area_change_kpi <- data.frame( field_idx = seq_len(nrow(field_boundaries_sf)), - mean_ci_pct_change = NA_real_, + mean_ci_abs_change = NA_real_, interpretation = rep("No previous data", nrow(field_boundaries_sf)) ) } diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 6de2778..693fe0f 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -45,13 +45,16 @@ CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase #' @return data.frame with field and acreage columns calculate_field_acreages <- function(field_boundaries_sf) { tryCatch({ - lookup_df <- field_boundaries_sf %>% + # Project to equal-area CRS (EPSG:6933) for accurate area calculations + field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933") + + lookup_df <- field_boundaries_proj %>% sf::st_drop_geometry() %>% as.data.frame() %>% mutate( - geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) { + geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) { tryCatch({ - sf::st_is_valid(field_boundaries_sf[idx, ]) + sf::st_is_valid(field_boundaries_proj[idx, ]) }, error = function(e) FALSE) }), area_ha = 0 @@ -61,7 +64,7 @@ calculate_field_acreages <- function(field_boundaries_sf) { valid_indices <- which(lookup_df$geometry_valid) areas_ha <- vapply(valid_indices, function(idx) { tryCatch({ - area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ])) + area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ])) area_m2 / 10000 }, error = function(e) NA_real_) }, numeric(1)) @@ -555,23 +558,7 @@ calculate_field_analysis_cane_supply <- function(setup, # ========== PHASE 3: LOAD PLANTING DATES ========== message("\nLoading harvest data from harvest.xlsx for planting dates...") - harvest_file_path <- file.path(data_dir, "harvest.xlsx") - - harvesting_data <- tryCatch({ - if (file.exists(harvest_file_path)) { - harvest_raw <- readxl::read_excel(harvest_file_path) - harvest_raw$season_start <- as.Date(harvest_raw$season_start) - harvest_raw$season_end <- as.Date(harvest_raw$season_end) - message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows")) - harvest_raw - } else { - message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path)) - NULL - } - }, error = function(e) { - message(paste(" ERROR loading harvest.xlsx:", e$message)) - NULL - }) + harvesting_data <- load_harvesting_data(data_dir) planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) @@ -628,9 +615,8 @@ calculate_field_analysis_cane_supply <- function(setup, # ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== message("\n4. Loading harvest probabilities from script 31...") - harvest_prob_dir <- setup$kpi_field_stats_dir - harvest_prob_file <- file.path(harvest_prob_dir, - sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year)) + # Use get_harvest_output_path() to safely build path (avoids NULL setup$kpi_field_stats_dir) + harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year) message(paste(" Looking for:", harvest_prob_file)) imminent_prob_data <- tryCatch({ diff --git a/r_app/80_utils_common.R b/r_app/80_utils_common.R index d3d2ca2..071b025 100644 --- a/r_app/80_utils_common.R +++ b/r_app/80_utils_common.R @@ -406,7 +406,7 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { # Gap score using 2σ below median to detect outliers median_ci <- median(valid_values) sd_ci <- sd(valid_values) - outlier_threshold <- median_ci - (2 * sd_ci) + outlier_threshold <- median_ci - (1 * sd_ci) low_ci_pixels <- sum(valid_values < outlier_threshold) total_pixels <- length(valid_values) gap_score <- round((low_ci_pixels / total_pixels) * 100, 2) @@ -415,7 +415,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) { gap_level <- dplyr::case_when( gap_score < 10 ~ "Minimal", gap_score < 25 ~ "Moderate", - TRUE ~ "Significant" + gap_score >= 25 ~ "Significant", + TRUE ~ NA_character_ ) field_results <- rbind(field_results, data.frame( @@ -1603,7 +1604,8 @@ calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cu dplyr::group_by(sub_field, season) %>% dplyr::slice(which.max(DAH)) %>% dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>% - dplyr::mutate(CI_per_day = cumulative_CI / DAH) + dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_)) + # Define predictors and response variables predictors <- c("cumulative_CI", "DAH", "CI_per_day") diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 60f9d93..5b8f268 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -179,24 +179,15 @@ if (dir.exists(kpi_data_dir)) { safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields")) safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", "))) - # NORMALIZATION: Ensure critical column names match downstream expectations - # Handle both "Field" and "Field_id" naming conventions - if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { - field_details_table <- field_details_table %>% - dplyr::rename(Field_id = Field) - safe_log(" ✓ Normalized: renamed Field → Field_id") - } + # NORMALIZATION: Normalize column structure (Field→Field_id rename + expected_cols injection) + field_details_table <- normalize_field_details_columns(field_details_table) # Normalize other common column name variations column_mappings <- list( c("CV Value", "CV"), - c("CV", "CV"), # Keep as-is c("Mean CI", "Mean_CI"), - c("Mean_CI", "Mean_CI"), # Keep as-is c("Yield Forecast (t/ha)", "TCH_Forecasted"), - c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is c("Gap Score", "Gap_Score"), - c("Gap_Score", "Gap_Score"), # Keep as-is c("Growth Uniformity", "Growth_Uniformity"), c("Decline Risk", "Decline_Risk"), c("Patchiness Risk", "Patchiness_Risk"), @@ -721,6 +712,7 @@ generate_field_alerts <- function(field_details_table) { highest_patchiness_risk = case_when( any(`Patchiness Risk` == "High") ~ "High", any(`Patchiness Risk` == "Medium") ~ "Medium", + any(`Patchiness Risk` == "Low") ~ "Low", any(`Patchiness Risk` == "Minimal") ~ "Minimal", TRUE ~ "Unknown" ), @@ -1510,25 +1502,9 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field } # Join field sizes and ages to KPI data, simplified column selection - # DEFENSIVE: Normalize field_details_table column names one more time before joining - # Ensure it has Field_id column (regardless of whether it was from RDS or fallback) - if (!is.null(field_details_table) && nrow(field_details_table) > 0) { - # If Field exists but Field_id doesn't, rename it - if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { - field_details_table <- field_details_table %>% - dplyr::rename(Field_id = Field) - } - - # Ensure all expected KPI columns exist; add as NA if missing - expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", - "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation", - "Decline_Severity", "Patchiness_Risk") - for (col in expected_cols) { - if (!col %in% names(field_details_table)) { - field_details_table[[col]] <- NA - } - } - } + # DEFENSIVE: Normalize field_details_table column structure before joining + # Uses shared normalization function to ensure Field_id exists and all expected columns are present + field_details_table <- normalize_field_details_columns(field_details_table) field_details_clean <- field_details_table %>% left_join(field_sizes_df, by = c("Field_id" = "field")) %>% @@ -1540,7 +1516,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field field_size_acres = round(field_size_acres, 1), Mean_CI = round(Mean_CI, 2), CV = round(CV, 2), - Gap_Score = round(Gap_Score, 0), + Gap_Score = round(Gap_Score, 2), TCH_Forecasted = round(TCH_Forecasted, 1) ) @@ -1554,7 +1530,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field `Mean CI` = Mean_CI, `Weekly CI Change` = Weekly_CI_Change, `Yield Forecast (t/ha)` = TCH_Forecasted, - `Gap Score` = Gap_Score, + `Gap Score %` = Gap_Score, `Decline Risk` = Decline_Severity, `Patchiness Risk` = Patchiness_Risk, `CV Value` = CV @@ -1566,7 +1542,7 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field `Field Size (acres)` = field_size_acres, `Mean CI` = Mean_CI, `Yield Forecast (t/ha)` = TCH_Forecasted, - `Gap Score` = Gap_Score, + `Gap Score %` = Gap_Score, `Decline Risk` = Decline_Severity, `Patchiness Risk` = Patchiness_Risk, `CV Value` = CV @@ -1639,13 +1615,13 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to - **Why it matters:** Uniform fields are easier to manage and typically produce better yields. Uneven growth suggests irrigation problems, fertility gaps, pests, or disease. - **KPI 2: Area Change (Weekly Growth)** — Tracks week-over-week CI changes to detect rapid improvements or declines - - **Calculation:** ((Current Mean CI − Previous Mean CI) / Previous Mean CI) × 100 + - **Calculation:** Current Mean CI − Previous Mean CI (absolute change in CI units) - **Categories:** - - **Rapid growth:** > 15% increase (excellent weekly progress) - - **Positive growth:** 5–15% increase (steady improvement) - - **Stable:** −5% to +5% (field maintained, no significant change) - - **Declining:** −5% to −15% (slow decline, warrant closer monitoring) - - **Rapid decline:** < −15% (alert: urgent issue requiring investigation) + - **Rapid growth:** > +0.5 (excellent weekly progress) + - **Positive growth:** +0.2 to +0.5 (steady improvement) + - **Stable:** −0.2 to +0.2 (field maintained, no significant change) + - **Declining:** −0.5 to −0.2 (slow decline, warrant closer monitoring) + - **Rapid decline:** < −0.5 (alert: urgent issue requiring investigation) - **Why it matters:** Week-to-week changes reveal developing problems early, enabling timely intervention. - **KPI 3: TCH Forecasted (Yield Prediction)** — Predicts final harvest tonnage for mature fields @@ -1692,12 +1668,15 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to Your scouting and remediation strategy should differ: Field A might need systemic irrigation adjustment or disease management; Field B might need soil remediation in the affected corner. **Patchiness tells you *where to focus your effort*.** - **KPI 6: Gap Score (Establishment Quality)** — Quantifies field gaps and areas of poor crop establishment - - **Metric:** Percentage of field area with very low CI values (lowest 25% of pixel distribution) - - **Levels:** - - **Minimal:** < 10% of field (good establishment, limited replanting needed) - - **Moderate:** 10–25% of field (monitor gap expansion, coordinate with agronomy) - - **Significant:** ≥ 25% of field (consider targeted replanting or rehabilitation) - - **Why it matters:** High gap scores indicate potential yield losses and may warrant management intervention. + - **Calculation Method:** Statistical outlier detection (2σ method) + - Identifies pixels with CI below: **Median CI − (2 × Standard Deviation)** + - Calculates: **Gap Score = (Outlier Pixels / Total Pixels) × 100** + - Example: If 2 of 100 pixels fall below threshold → Gap Score = 2% + - **Score Ranges & Interpretation:** + - **0–10%:** Minimal gaps (excellent establishment, healthy field) + - **10–25%:** Moderate gaps (monitor for expansion, coordinate with agronomy) + - **≥ 25%:** Significant gaps (consider targeted replanting or rehabilitation) + - **Why it matters:** Gap scores reveal areas of poor establishment that may indicate early growth problems or harvest-related residue issues. Lower is better (0–3% is typical for healthy fields). 2. **Overview Map: Growth on Farm:** Provides a traffic light overview of field-by-field growth status for quick prioritization and reporting. diff --git a/r_app/report_utils.R b/r_app/report_utils.R index 8b3afa1..38ecbd2 100644 --- a/r_app/report_utils.R +++ b/r_app/report_utils.R @@ -1165,4 +1165,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr }) } - +#' Normalize field_details_table column structure +#' +#' Standardizes column names and ensures all expected KPI columns exist. +#' Handles Field → Field_id rename and injects missing columns as NA. +#' +#' @param field_details_table data.frame to normalize +#' @return data.frame with standardized column structure +normalize_field_details_columns <- function(field_details_table) { + if (is.null(field_details_table) || nrow(field_details_table) == 0) { + return(field_details_table) + } + + # Rename Field → Field_id if needed + if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) { + field_details_table <- field_details_table %>% + dplyr::rename(Field_id = Field) + } + + # Ensure all expected KPI columns exist; add as NA if missing + expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", + "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation", + "Decline_Severity", "Patchiness_Risk") + for (col in expected_cols) { + if (!col %in% names(field_details_table)) { + field_details_table[[col]] <- NA + } + } + + return(field_details_table) +} From 29a85357a52703e2163d6cba48b8c8d941fe83c8 Mon Sep 17 00:00:00 2001 From: Timon Date: Wed, 18 Feb 2026 14:53:44 +0100 Subject: [PATCH 30/30] Refactor field analysis utilities and reporting; update column names for alerts, enhance directory setup, and improve KPI calculation robustness. --- python_app/model_config.json | 2 +- r_app/80_utils_agronomic_support.R | 12 +++- r_app/80_utils_cane_supply.R | 55 +++++++++++++------ ..._CI_report_with_kpis_agronomic_support.Rmd | 38 ++++++++++++- r_app/parameters_project.R | 7 +-- 5 files changed, 87 insertions(+), 27 deletions(-) diff --git a/python_app/model_config.json b/python_app/model_config.json index 3be1268..c0dcbd3 100644 --- a/python_app/model_config.json +++ b/python_app/model_config.json @@ -44,7 +44,7 @@ "7d_std", "14d_std", "21d_std", - "DOY_normalized" + "DAH_normalized" ], "model": { "type": "LSTM", diff --git a/r_app/80_utils_agronomic_support.R b/r_app/80_utils_agronomic_support.R index 9354d44..9408b2b 100644 --- a/r_app/80_utils_agronomic_support.R +++ b/r_app/80_utils_agronomic_support.R @@ -789,8 +789,14 @@ calculate_all_field_analysis_agronomic_support <- function( # DETECT STRUCTURE FIRST - before any use of is_per_field week_file <- sprintf("week_%02d_%d.tif", current_week, current_year) - field_dirs <- list.dirs(current_mosaic_dir, full.names = FALSE, recursive = FALSE) - field_dirs <- field_dirs[field_dirs != ""] + + # Safely identify immediate child directories (not including root) + # Use list.files + dir.exists filter instead of list.dirs for robustness + all_entries <- list.files(current_mosaic_dir, full.names = FALSE) + field_dirs <- all_entries[sapply( + file.path(current_mosaic_dir, all_entries), + dir.exists + )] is_per_field <- length(field_dirs) > 0 && file.exists(file.path(current_mosaic_dir, field_dirs[1], week_file)) @@ -919,7 +925,7 @@ calculate_all_field_analysis_agronomic_support <- function( message("Calculating KPI 2: Area Change...") if (!is.null(previous_stats)) { - area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats) + area_change_kpi <- calculate_area_change_kpi(current_stats, previous_stats, field_boundaries_sf) } else { area_change_kpi <- data.frame( field_idx = seq_len(nrow(field_boundaries_sf)), diff --git a/r_app/80_utils_cane_supply.R b/r_app/80_utils_cane_supply.R index 693fe0f..7c75958 100644 --- a/r_app/80_utils_cane_supply.R +++ b/r_app/80_utils_cane_supply.R @@ -31,10 +31,16 @@ library(writexl) # ALERT THRESHOLDS & CONFIGURATION CONSTANTS # ============================================================================ -# CI change thresholds for alert categorization +# CI change thresholds for alert categorization and status determination # These values are project-standard and should be consistent across all workflows -CI_CHANGE_DECLINE_THRESHOLD <- -0.5 # Weekly CI change threshold for decline alerts -CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase alerts +CI_CHANGE_RAPID_GROWTH_THRESHOLD <- 0.5 # Weekly CI change for positive growth alert +CI_CHANGE_POSITIVE_GROWTH_THRESHOLD <- 0.2 # Weekly CI change for acceptable growth +CI_CHANGE_STABLE_THRESHOLD <- -0.2 # Weekly CI change for stable status (between -0.2 and +0.2) +CI_CHANGE_STRESS_TREND_THRESHOLD <- -0.3 # 4-week trend threshold for stress detection +CI_CHANGE_RAPID_DECLINE_THRESHOLD <- -0.5 # Weekly CI change for rapid decline alert +# Deprecated aliases (for backward compatibility if needed): +CI_CHANGE_DECLINE_THRESHOLD <- CI_CHANGE_RAPID_DECLINE_THRESHOLD # Weekly CI change threshold for decline alerts +CI_CHANGE_INCREASE_THRESHOLD <- CI_CHANGE_RAPID_GROWTH_THRESHOLD # Weekly CI change threshold for increase alerts # ============================================================================ # ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section) @@ -73,6 +79,9 @@ calculate_field_acreages <- function(field_boundaries_sf) { # Convert hectares to acres lookup_df %>% mutate(acreage = area_ha / 0.404686) %>% + # Aggregate by field to handle multi-row fields (e.g., sub_fields) + group_by(field) %>% + summarise(acreage = sum(acreage, na.rm = TRUE), .groups = "drop") %>% select(field, acreage) }, error = function(e) { message(paste("Warning: Could not calculate acreages from geometries -", e$message)) @@ -94,15 +103,26 @@ calculate_age_week <- function(planting_date, reference_date) { #' Assign crop phase based on age in weeks #' +#' Determines crop phase from age in weeks using canonical PHASE_DEFINITIONS +#' from 80_utils_common.R for consistency across all workflows. +#' #' @param age_week Numeric age in weeks -#' @return Character phase name +#' @return Character phase name (from PHASE_DEFINITIONS or "Unknown") +#' +#' @details +#' Uses the shared PHASE_DEFINITIONS to ensure identical phase boundaries +#' across all scripts. This wrapper delegates to get_phase_by_age() which +#' is the authoritative phase lookup function. +#' +#' Phase boundaries (from PHASE_DEFINITIONS): +#' - Germination: 0-6 weeks +#' - Tillering: 4-16 weeks +#' - Grand Growth: 17-39 weeks +#' - Maturation: 39+ weeks calculate_phase <- function(age_week) { - if (is.na(age_week)) return(NA_character_) - if (age_week >= 0 & age_week < 4) return("Germination") - if (age_week >= 4 & age_week < 17) return("Tillering") - if (age_week >= 17 & age_week < 39) return("Grand Growth") - if (age_week >= 39) return("Maturation") - NA_character_ + # Delegate to canonical get_phase_by_age() from 80_utils_common.R + # This ensures all phase boundaries are consistent across workflows + get_phase_by_age(age_week) } #' Bin percentage into 10% intervals with special handling for 90-100% @@ -187,7 +207,8 @@ calculate_status_alert <- function(imminent_prob, age_week, mean_ci, # Priority 3: STRESS DETECTED - consistent health decline (weekly trend) # Uses Four_week_trend (smooth trend) NOT daily fluctuation to avoid noise # Crop declining but not yet bare → opportunity to investigate - if (!is.na(four_week_trend) && four_week_trend < -0.3 && + # References: CI_CHANGE_STRESS_TREND_THRESHOLD for 4-week trend detection + if (!is.na(four_week_trend) && four_week_trend < CI_CHANGE_STRESS_TREND_THRESHOLD && !is.na(mean_ci) && mean_ci > 1.5) { return("stress_detected") } @@ -202,9 +223,10 @@ calculate_status_alert <- function(imminent_prob, age_week, mean_ci, # Priority 5: GROWTH ON TRACK - positive operational status # Field is healthy, uniform, and growing steadily (no action needed) # Conditions: good uniformity (CV < 0.15) AND stable/positive weekly trend + # References: CI_CHANGE_STABLE_THRESHOLD (±0.2 = stable, no change) if (!is.na(cv) && cv < 0.15 && - !is.na(four_week_trend) && four_week_trend >= -0.2 && - !is.na(weekly_ci_change) && weekly_ci_change >= -0.2) { + !is.na(four_week_trend) && four_week_trend >= CI_CHANGE_STABLE_THRESHOLD && + !is.na(weekly_ci_change) && weekly_ci_change >= CI_CHANGE_STABLE_THRESHOLD) { return("growth_on_track") } @@ -322,7 +344,7 @@ calculate_all_field_kpis <- function(current_stats, # Column 13: Imminent_prob (from script 31 or NA) Imminent_prob = { if (!is.null(imminent_prob_data)) { - imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)] + as.numeric(imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]) } else { rep(NA_real_, nrow(current_stats)) } @@ -558,7 +580,8 @@ calculate_field_analysis_cane_supply <- function(setup, # ========== PHASE 3: LOAD PLANTING DATES ========== message("\nLoading harvest data from harvest.xlsx for planting dates...") - harvesting_data <- load_harvesting_data(data_dir) + # Use load_harvest_data() from 80_utils_common.R for consistency with 80_calculate_kpis.R + harvesting_data <- load_harvest_data(data_dir) planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf) @@ -615,7 +638,7 @@ calculate_field_analysis_cane_supply <- function(setup, # ========== PHASE 6: LOAD HARVEST PROBABILITIES ========== message("\n4. Loading harvest probabilities from script 31...") - # Use get_harvest_output_path() to safely build path (avoids NULL setup$kpi_field_stats_dir) + # Use get_harvest_output_path() to safely build path (stored in kpi_reports_dir) harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year) message(paste(" Looking for:", harvest_prob_file)) diff --git a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd index 5b8f268..958b2be 100644 --- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd +++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd @@ -773,7 +773,43 @@ generate_field_alerts <- function(field_details_table) { # Generate and display alerts table if (exists("field_details_table") && !is.null(field_details_table) && nrow(field_details_table) > 0) { - alerts_data <- generate_field_alerts(field_details_table) + # Adapter: Map normalized column names back to legacy names for generate_field_alerts() + # (generates from the normalized schema created by normalize_field_details_columns + column_mappings) + field_details_for_alerts <- field_details_table + + # Rename normalized columns back to legacy names (only if they exist) + if ("Field_id" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(Field = Field_id) + } + if ("Mean_CI" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Mean CI` = Mean_CI) + } + if ("CV" %in% names(field_details_for_alerts) && !("CV Value" %in% names(field_details_for_alerts))) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`CV Value` = CV) + } + if ("TCH_Forecasted" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Yield Forecast (t/ha)` = TCH_Forecasted) + } + if ("Gap_Score" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Gap Score` = Gap_Score) + } + if ("Growth_Uniformity" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Growth Uniformity` = Growth_Uniformity) + } + if ("Decline_Risk" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Decline Risk` = Decline_Risk) + } + if ("Decline_Severity" %in% names(field_details_for_alerts) && !("Decline Risk" %in% names(field_details_for_alerts))) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Decline Risk` = Decline_Severity) + } + if ("Patchiness_Risk" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Patchiness Risk` = Patchiness_Risk) + } + if ("Morans_I" %in% names(field_details_for_alerts)) { + field_details_for_alerts <- field_details_for_alerts %>% dplyr::rename(`Moran's I` = Morans_I) + } + + alerts_data <- generate_field_alerts(field_details_for_alerts) if (!is.null(alerts_data) && nrow(alerts_data) > 0) { ft <- flextable(alerts_data) %>% # set_caption("Field Alerts Summary") %>% diff --git a/r_app/parameters_project.R b/r_app/parameters_project.R index 7ee1267..761cc3f 100644 --- a/r_app/parameters_project.R +++ b/r_app/parameters_project.R @@ -174,9 +174,6 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { # TIER 6: KPI & REPORTING (Scripts 80/90/91 output) reports_dir <- here(laravel_storage_dir, "reports") kpi_reports_dir <- here(reports_dir, "kpis") - #kpi_reports_dir <- here(reports_dir, "kpis", "field_level") - #kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats") - #kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis") # TIER 7: SUPPORT (various scripts) vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction @@ -189,7 +186,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir, growth_model_interpolated_dir, weekly_mosaic_dir, - reports_dir, kpi_reports_dir, #kpi_field_stats_dir, kpi_field_analysis_dir, + reports_dir, kpi_reports_dir, data_dir, vrt_dir, harvest_dir, log_dir ) @@ -228,8 +225,6 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") { # TIER 6: KPI & reporting reports_dir = reports_dir, kpi_reports_dir = kpi_reports_dir, - #kpi_field_stats_dir = kpi_field_stats_dir, - #kpi_field_analysis_dir = kpi_field_analysis_dir, # TIER 7: Support data_dir = data_dir,