Update .gitignore to include cache files, enhance R Markdown report paths, and improve KPI calculation script for clarity and error handling

2026-02-10 17:50:12 +01:00 · 2026-02-10 17:50:12 +01:00 · 3d3df151d3
parent f2da320fb6
commit 3d3df151d3
6 changed files with 164 additions and 192 deletions
--- a/.gitignore
+++ b/.gitignore
@ -45,6 +45,10 @@ output/
 reports/
 *.docx
 # Cache Files
 rosm.cache/
 *.cache
 # Logs
 *.log
 package_manager.log
--- a/r_app/80_utils_agronomic_support.R
+++ b/r_app/80_utils_agronomic_support.R
@ -369,7 +369,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
  for (i in seq_len(nrow(field_boundaries))) {
    field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
-    sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_    field_vect <- field_boundaries_vect[i]
+    sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
    field_vect <- field_boundaries_vect[i]
    # Extract CI values using helper function
    ci_values <- extract_ci_values(ci_raster, field_vect)
--- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd
+++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd
@ -12,7 +12,7 @@ params:
  x_axis_unit: "days"  # x-axis unit for trend plots: "days" or "weeks"
 output:
   word_document:
-    reference_docx: !expr file.path("word-styles-reference-var1.docx")
+    reference_docx: !expr file.path(here::here("r_app/word-styles-reference-var1.docx"))
    toc: no
 editor_options:
  chunk_output_type: console
@ -53,6 +53,8 @@ suppressPackageStartupMessages({
  # Visualization
  library(tmap)       # For interactive maps (field boundary visualization)
  library(ggspatial)  # For basemap tiles (annotation_map_tile for hexbin overlay)
  library(hexbin)     # For hexbin plots in ggplot2
  # Reporting
  library(knitr)      # For R Markdown document generation (code execution and output)
@ -95,6 +97,20 @@ tryCatch({
 # Load centralized paths
 paths <- setup_project_directories(project_dir)
 # Make paths globally available for use in other chunks (especially hexbin)
 assign("paths", paths, envir = globalenv())
 # Load field boundaries for hexbin map and spatial operations
 tryCatch({
  boundaries_list <- load_field_boundaries(paths$data_dir)
  field_boundaries_sf <- boundaries_list$field_boundaries_sf
  assign("field_boundaries_sf", field_boundaries_sf, envir = globalenv())
  safe_log("✓ Field boundaries loaded successfully")
 }, error = function(e) {
  safe_log(paste("⚠ Could not load field boundaries:", e$message), "WARNING")
  safe_log("Hexbin map will not be available", "WARNING")
 })
 # Log initial configuration
 safe_log("Starting the R Markdown script with KPIs")
 safe_log(paste("mail_day params:", params$mail_day))
@ -102,7 +118,7 @@ safe_log(paste("report_date params:", params$report_date))
 safe_log(paste("mail_day variable:", mail_day))
 ```
-```{r load_kpi_data, message=FALSE, warning=FALSE}
+```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
 ## SIMPLE KPI LOADING - robust lookup with fallbacks
 # First, show working directory for debugging
@ -194,6 +210,9 @@ if (!is.null(summary_file) && file.exists(summary_file)) {
  cat(paste("  File size:", file.size(summary_file), "bytes\n"))
  summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL })
  # Explicitly assign to global environment for downstream chunks
  assign("summary_data", summary_data, envir = globalenv())
  if (!is.null(summary_data)) {
    cat(paste("  ✓ Loaded successfully. Class:", class(summary_data), "\n"))
    if (is.list(summary_data)) {
@ -243,7 +262,9 @@ if (!is.null(field_details_file) && file.exists(field_details_file)) {
  if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
    field_details_table <- summary_data$field_analysis %>%
      rename(`Mean CI` = Mean_CI, `CV Value` = CV, Field = Field_id)
-    safe_log("Extracted field details from field_analysis data")}
+    safe_log("Extracted field details from field_analysis data")
  }
 }
 if (kpi_files_exist) {
  safe_log("✓ KPI summary tables loaded successfully")
@ -446,39 +467,37 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
 **Section 1:** Cane supply zone analyses, summaries and Key Performance Indicators (KPIs)  
 **Section 2:** Explanation of the report, definitions, methodology, and CSV export structure 
 \newpage
 # Section 1: Farm-wide Analyses and KPIs
 ## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready  
-```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE}
+```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
 # Create a hexbin overview map with ggplot
 tryCatch({
-  # Try to load in the field analysis from this week
+  # Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
-  tryCatch({
+  if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
-    # Ensure week is zero-padded (e.g., 04) to match filenames like *_week04_2026.xlsx
+    analysis_data <- summary_data$field_analysis %>% 
-    week_padded <- sprintf("%02d", as.numeric(current_week))
+      select(Field_id, Status_Alert) %>%
-    analysis_path <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_analysis", paste0(params$data_dir, "_field_analysis_week", week_padded, "_", year, ".xlsx"))
+      rename(Status_trigger = Status_Alert)  # Rename to Status_trigger for compatibility with hexbin logic
-    analysis_data <- read_excel(analysis_path)
+  } else {
  }, error = function(e) {
    warning("Error loading field analysis data:", e$message)
  })
  # Fallback: if analysis_data failed to load, create an empty tibble with required columns
  if (!exists("analysis_data") || is.null(analysis_data)) {
    analysis_data <- tibble(Field_id = character(), Status_trigger = character())
  }
  # Load field boundaries if not already available (should be from initialize_project_config)
  if (!exists("field_boundaries_sf")) {
    if (exists("paths") && !is.null(paths$data_dir)) {
      boundaries_list <- load_field_boundaries(paths$data_dir)
      field_boundaries_sf <- boundaries_list$field_boundaries_sf
    } else {
      stop("Cannot locate field boundaries: paths$data_dir not available")
    }
  }
  # Define constants
  ACRE_CONV <- 4046.856
-  TARGET_CRS <- 32736
+  TARGET_CRS <- 4326  # WGS84 for web basemap compatibility (was 32736 UTM)
  # Process polygons into points
  points_processed <- field_boundaries_sf %>%
@ -500,22 +519,31 @@ tryCatch({
    st_centroid() %>%
    bind_cols(st_coordinates(.))
-  # Subsetting
+  # Validate coordinates - check for NaN, Inf, or missing values
  if (any(!is.finite(points_processed$X)) || any(!is.finite(points_processed$Y))) {
    points_processed <- points_processed %>%
      filter(is.finite(X) & is.finite(Y))
  }
  # Final validation: ensure we have valid coordinates before proceeding
  valid_x <- is.finite(points_processed$X) & is.finite(points_processed$Y)
  if (!any(valid_x)) {
    stop("No valid coordinates found in field boundaries. Cannot create hexbin map.")
  }
  # Subsetting by harvest status
  points_ready <- points_processed %>% 
    filter(Status_trigger == "harvest_ready")
  points_not_ready <- points_processed %>%
    filter(Status_trigger != "harvest_ready" | is.na(Status_trigger))
-  
+
-  # Generate breaks for the plotting
+  # Generate breaks for color gradients
  breaks_vec <- c(0, 5, 10, 15, 20, 30, 35)
  labels_vec <- as.character(breaks_vec)
  labels_vec[length(labels_vec)] <- ">30"
  labels_vec[1] <- "0.1"
-  # Set CRS
+  # Create dummy point to anchor hexbin grids for consistency
  map_crs <- st_crs(points_processed)
  # Create dummy point to anchor hexbin grids
  dummy_point <- data.frame(
    field = NA,
    sub_field = NA,
@ -526,92 +554,96 @@ tryCatch({
    geometry = NA
  )
-  # Add dummy point
+  # Convert dummy point to sf and add xy coordinates
  dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
  dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
  # Add dummy point to ensure consistent hexbin grid anchoring
  points_ready <- rbind(points_ready, dummy_point)
  points_not_ready <- rbind(points_not_ready, dummy_point)
-  # Create the plot
+  # Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
-  hexbin <- ggplot() +
+  x_limits <- c(
-    # Add OSM basemap
+    floor(min(points_processed$X, na.rm = TRUE) * 20) / 20,  # Round down to avoid edge clipping
-    annotation_map_tile(type = "osm", zoom = 11, progress = "none", alpha = 0.5) +
+    ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20   # Round up for padding
-    # Add the hexbins for not ready points
+  )
  y_limits <- c(
    floor(min(points_processed$Y, na.rm = TRUE) * 20) / 20,
    ceiling(max(points_processed$Y, na.rm = TRUE) * 20) / 20
  )
  # Create hexbin map with enhanced aesthetics, basemap, and proper legend
  ggplot() +
    # OpenStreetMap basemap (zoom=11 appropriate for agricultural fields)
    ggspatial::annotation_map_tile(type = "osm", zoom = 11, progress = "none", alpha = 0.5) +
    # Hexbin for NOT ready fields (light background)
    geom_hex(
-        data = points_not_ready, 
+      data = points_not_ready, 
-        aes(x = X, y = Y, weight = area_ac, alpha = "Not harvest ready within 1 month."), 
+      aes(x = X, y = Y, weight = area_ac, alpha = "Not harvest ready"), 
-        binwidth = c(1500, 1500), 
+      binwidth = c(0.012, 0.012),
-        fill = "#ffffff",
+      fill = "#ffffff",
-        colour = "#0000009a",
+      colour = "#0000009a",
-        linewidth = 0.1
+      linewidth = 0.1
    ) +
-    # Add the hexbins for ready points
+    # Hexbin for READY fields (colored gradient)
    geom_hex(
-        data = points_ready, 
+      data = points_ready, 
-        aes(x = X, y = Y, weight = area_ac), 
+      aes(x = X, y = Y, weight = area_ac), 
-        binwidth = c(1500, 1500), 
+      binwidth = c(0.012, 0.012),
-        alpha = 0.9,
+      alpha = 0.9,
-        colour = "#0000009a",
+      colour = "#0000009a",
-        linewidth = 0.1
+      linewidth = 0.1
    ) +
-    # Create colour bins
+    # Color gradient scale for acreage
    scale_fill_viridis_b(
-        option = "viridis",
+      option = "viridis",
-        direction = -1,
+      direction = -1,
-        breaks = breaks_vec,        # Use our 0-50 sequence
+      breaks = breaks_vec,
-        labels = labels_vec,        # Use our custom ">50" labels
+      labels = labels_vec,
-        limits = c(0, 35),          # Limit the scale
+      limits = c(0, 35),
-        oob = scales::squish,       # Squish higher values into the top bin
+      oob = scales::squish,
-        name = "Total Acres"
+      name = "Total Acres"
    ) +
-    # Titles
+    # Alpha scale for "not ready" status indication
    labs(
        subtitle = "Acres of fields 'harvest ready within a month'"
    ) +
    # Set the CRS
    coord_sf(crs = map_crs) +
    theme_minimal() + 
     # Legend trick to add the gray colours
    scale_alpha_manual(
-        name = NULL,    # No title needed for this specific legend item
+      name = NULL,
-        values = 0.8    # This sets the actual transparency for the map
+      values = 0.8
    ) +
-    # Legend customisation
+    # Titles and subtitle
    labs(subtitle = "Acres of fields 'harvest ready within a month'") +
    # Spatial coordinate system with explicit bounds (prevents basemap calc_limits conflicts)
    coord_sf(crs = 4326, xlim = x_limits, ylim = y_limits, expand = FALSE) +
    # Theme customization
    theme_minimal() +
    theme(
-        legend.position = "right",
+      legend.position = "right",
-        legend.box = "vertical",
+      legend.box = "vertical",
-        legend.title.align = 0.5,    # Center the legend title
+      legend.title.align = 0.5,
-        plot.title = element_text(face = "bold", size = 16),
+      plot.subtitle = element_text(size = 11, face = "bold")
        plot.subtitle = element_text(size = 11)
    ) +
-    # Customise the look of the horizontal bar
+    # Custom legend guides with enhanced styling
    guides(
-        # The colour bar
+      fill = guide_coloursteps(
-        fill = guide_coloursteps(
+        barwidth = 1, 
-            barwidth = 1, 
+        barheight = 20, 
-            barheight = 20, 
+        title.position = "top",
-            title.position = "top",
+        order = 1   
-            order = 1   
+      ),
      alpha = guide_legend(
        override.aes = list(
          fill = "#ffffff",     
          colour = "#0000009a", 
          shape = 22           
        ),
-        # The not ready box
+        order = 2      
-        alpha = guide_legend(
+      )
            override.aes = list(
                fill = "#ffffff",     
                colour = "#0000009a", 
                shape = 22           
            ),
            order = 2      
        )
    )
  hexbin 
 }, error = function(e) {
  warning("Error creating hexbin map:", e$message)
 })
 ```
 \newpage
 ## 1.2 Key Performance Indicators 
@ -780,80 +812,35 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
 ## Cloud Coverage Summary
 ```{r cloud_coverage_summary, echo=FALSE}
-# Display per-field cloud coverage summary
+# Display cloud coverage summary aggregated by category
-if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) {
+# Cloud coverage data is included in the field_analysis RDS from Script 80
  # Prepare cloud coverage table for display
  # Handle both old and new column naming conventions
  cloud_display <- per_field_cloud_coverage %>%
    mutate(
      Field = if_else("field" %in% names(per_field_cloud_coverage), field_id, 
                      if_else("Field" %in% names(per_field_cloud_coverage), Field, field_id)),
      Clear_Percent = pct_clear,
      Cloud_Acreage = if_else("Cloud_Acreage" %in% names(per_field_cloud_coverage), Cloud_Acreage, 
                              as.numeric(NA)),
      Total_Acreage = if_else("Total_Acreage" %in% names(per_field_cloud_coverage), Total_Acreage,
                              as.numeric(NA))
    ) %>%
    select(Field, Cloud_category, Clear_Percent, missing_pixels, clear_pixels, total_pixels) %>%
    rename(
      "Field" = Field,
      "Cloud Status" = Cloud_category,
      "Clear %" = Clear_Percent,
      "Cloud Pixels" = missing_pixels,
      "Clear Pixels" = clear_pixels,
      "Total Pixels" = total_pixels
    ) %>%
    arrange(Field)
  # Create flextable
  ft <- flextable(cloud_display) %>%
    autofit()
  ft
 } else if (exists("cloud_coverage_available") && !cloud_coverage_available) {
  cat("Cloud coverage data not available for this week.\n")
 } else {
  cat("Cloud coverage data not loaded.\n")
 }
 ```
 ## Field Alerts
 ```{r field_alerts_table, echo=FALSE}
 # Generate alerts table from field analysis status triggers
 if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
-  field_analysis_table <- summary_data$field_analysis
+  field_analysis_df <- summary_data$field_analysis
-  # Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger)
+  # Aggregate cloud coverage by category
-  alerts_data <- field_analysis_table %>%
+  cloud_summary <- field_analysis_df %>%
-    filter(!is.na(Status_Alert), Status_Alert != "") %>%
+    filter(!is.na(Cloud_category)) %>%
-    select(Field_id, Status_Alert) %>%
+    group_by(Cloud_category) %>%
-    rename(Field = Field_id, Alert = Status_Alert)
+    summarise(
      "Number of Fields" = n(),
      "Total Acreage" = round(sum(Acreage, na.rm = TRUE), 1),
      .groups = "drop"
    ) %>%
    rename("Cloud Category" = Cloud_category) %>%
    arrange("Cloud Category")
-  if (nrow(alerts_data) > 0) {
+  if (nrow(cloud_summary) > 0) {
-    # Format alert messages for display
+    # Create flextable
-    alerts_data <- alerts_data %>%
+    ft <- flextable(cloud_summary) %>%
-      mutate(
+      autofit() %>%
-        Alert = case_when(
+      theme_vanilla()
          Alert == "germination_started" ~ "Germination started - crop emerging",
          Alert == "germination_complete" ~ "Germination complete - established",
          Alert == "stress_detected_whole_field" ~ "Stress detected - check irrigation/disease",
          Alert == "strong_recovery" ~ "Strong recovery - growth accelerating",
          Alert == "growth_on_track" ~ "Growth on track - normal progression",
          Alert == "maturation_progressing" ~ "Maturation progressing - ripening phase",
          Alert == "harvest_ready" ~ "Harvest ready - 45+ weeks old",
          TRUE ~ Alert
        )
      )
    ft <- flextable(alerts_data) %>%
      autofit()
    ft
  } else {
-    cat("No active status triggers this week.\n")
+    cat("Cloud coverage data not available for summary.\n")
  }
 } else {
-  cat("Field analysis data not available for alerts.\n")
+  cat("Field analysis data not available for cloud coverage summary.\n")
 }
 ```
--- a/r_app/FIX_INDENTATION.R
+++ b/r_app/FIX_INDENTATION.R
@ -1,20 +0,0 @@
 # Fix indentation for lines 408-1022 in 80_calculate_kpis.R
 # These lines should be inside the else-if block at the CANE_SUPPLY_WORKFLOW level
 file_path <- "r_app/80_calculate_kpis.R"
 lines <- readLines(file_path)
 # Lines 408-1021 (0-indexed: 407-1020) need 2 more spaces of indentation
 for (i in 408:1021) {
  if (i <= length(lines)) {
    line <- lines[i]
    # Skip empty or whitespace-only lines
    if (nchar(trimws(line)) > 0) {
      # Add 2 spaces
      lines[i] <- paste0("  ", line)
    }
  }
 }
 writeLines(lines, file_path)
 cat("Fixed indentation for lines 408-1022\n")
--- a/r_app/MANUAL_PIPELINE_RUNNER.R
+++ b/r_app/MANUAL_PIPELINE_RUNNER.R
@ -436,21 +436,23 @@
 #   From R console or R script:
 #
 #   rmarkdown::render(
-#     "r_app/90_CI_report_with_kpis_simple.Rmd",
+rmarkdown::render(
-#     params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
+  "r_app/90_CI_report_with_kpis_cane_supply.Rmd",
-#     output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
+  params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
-#     output_dir = "laravel_app/storage/app/angata/reports"
+  output_file = "SmartCane_Report_cane_supply_angata_2026-02-10_FIXED.docx",
-#   )
+  output_dir = "laravel_app/storage/app/angata/reports"
 )
 #
 # COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
 #   From R console or R script:
 #
 #   rmarkdown::render(
-#     "r_app/91_CI_report_with_kpis_Angata.Rmd",
+rmarkdown::render(
-#     params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
+  "r_app/91_CI_report_with_kpis_cane_supply.Rmd",
-#     output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
+  params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
-#     output_dir = "laravel_app/storage/app/angata/reports"
+  output_file = "SmartCane_Report_basemap_test.docx",
-#   )
+  output_dir = "laravel_app/storage/app/angata/reports"
 )
 #
 # EXPECTED OUTPUT:
 #   File: SmartCane_Report_*_{PROJECT}_{DATE}.docx
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@ -190,7 +190,6 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
  # TIER 8: CONFIG & METADATA PATHS
  field_boundaries_path <- here(data_dir, "pivot.geojson")
  tiling_config_path <- here(laravel_storage_dir, "tiling_config.json")
  # Return comprehensive list
  return(list(
@ -231,8 +230,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
    log_dir = log_dir,
    # TIER 8: Metadata
-    field_boundaries_path = field_boundaries_path,
+    field_boundaries_path = field_boundaries_path
    tiling_config_path = tiling_config_path
  ))
 }