Update .gitignore to include cache files, enhance R Markdown report paths, and improve KPI calculation script for clarity and error handling

2026-02-10 17:50:12 +01:00 · 2026-02-10 17:50:12 +01:00 · 3d3df151d3
parent f2da320fb6
commit 3d3df151d3
6 changed files with 164 additions and 192 deletions
--- a/.gitignore
+++ b/.gitignore
@ -45,6 +45,10 @@ output/
 reports/
 *.docx

+# Cache Files
+rosm.cache/
+*.cache
+
 # Logs
 *.log
 package_manager.log
--- a/r_app/80_utils_agronomic_support.R
+++ b/r_app/80_utils_agronomic_support.R
@ -369,7 +369,8 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {

  for (i in seq_len(nrow(field_boundaries))) {
    field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
-    sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_    field_vect <- field_boundaries_vect[i]
+    sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
+    field_vect <- field_boundaries_vect[i]

    # Extract CI values using helper function
    ci_values <- extract_ci_values(ci_raster, field_vect)
--- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd
+++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd
@ -12,7 +12,7 @@ params:
  x_axis_unit: "days"  # x-axis unit for trend plots: "days" or "weeks"
 output:
   word_document:
-    reference_docx: !expr file.path("word-styles-reference-var1.docx")
+    reference_docx: !expr file.path(here::here("r_app/word-styles-reference-var1.docx"))
    toc: no
 editor_options:
  chunk_output_type: console
@ -53,6 +53,8 @@ suppressPackageStartupMessages({
  
  # Visualization
  library(tmap)       # For interactive maps (field boundary visualization)
+  library(ggspatial)  # For basemap tiles (annotation_map_tile for hexbin overlay)
+  library(hexbin)     # For hexbin plots in ggplot2
  
  # Reporting
  library(knitr)      # For R Markdown document generation (code execution and output)
@ -95,6 +97,20 @@ tryCatch({
 # Load centralized paths
 paths <- setup_project_directories(project_dir)

+# Make paths globally available for use in other chunks (especially hexbin)
+assign("paths", paths, envir = globalenv())
+
+# Load field boundaries for hexbin map and spatial operations
+tryCatch({
+  boundaries_list <- load_field_boundaries(paths$data_dir)
+  field_boundaries_sf <- boundaries_list$field_boundaries_sf
+  assign("field_boundaries_sf", field_boundaries_sf, envir = globalenv())
+  safe_log("✓ Field boundaries loaded successfully")
+}, error = function(e) {
+  safe_log(paste("⚠ Could not load field boundaries:", e$message), "WARNING")
+  safe_log("Hexbin map will not be available", "WARNING")
+})
+
 # Log initial configuration
 safe_log("Starting the R Markdown script with KPIs")
 safe_log(paste("mail_day params:", params$mail_day))
@ -102,7 +118,7 @@ safe_log(paste("report_date params:", params$report_date))
 safe_log(paste("mail_day variable:", mail_day))
 ```

-```{r load_kpi_data, message=FALSE, warning=FALSE}
+```{r load_kpi_data, message=FALSE, warning=FALSE, include=FALSE}
 ## SIMPLE KPI LOADING - robust lookup with fallbacks

 # First, show working directory for debugging
@ -194,6 +210,9 @@ if (!is.null(summary_file) && file.exists(summary_file)) {
  cat(paste("  File size:", file.size(summary_file), "bytes\n"))
  summary_data <- tryCatch(readRDS(summary_file), error = function(e) { cat(paste("ERROR reading RDS:", e$message, "\n")); NULL })
  
+  # Explicitly assign to global environment for downstream chunks
+  assign("summary_data", summary_data, envir = globalenv())
+  
  if (!is.null(summary_data)) {
    cat(paste("  ✓ Loaded successfully. Class:", class(summary_data), "\n"))
    if (is.list(summary_data)) {
@ -243,7 +262,9 @@ if (!is.null(field_details_file) && file.exists(field_details_file)) {
  if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
    field_details_table <- summary_data$field_analysis %>%
      rename(`Mean CI` = Mean_CI, `CV Value` = CV, Field = Field_id)
-    safe_log("Extracted field details from field_analysis data")}
+    safe_log("Extracted field details from field_analysis data")
+  }
+}

 if (kpi_files_exist) {
  safe_log("✓ KPI summary tables loaded successfully")
@ -446,39 +467,37 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
 **Section 1:** Cane supply zone analyses, summaries and Key Performance Indicators (KPIs)  
 **Section 2:** Explanation of the report, definitions, methodology, and CSV export structure 

-
-
-
-
-
-
 \newpage

 # Section 1: Farm-wide Analyses and KPIs

 ## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready  

-```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE}
+```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
 # Create a hexbin overview map with ggplot
 tryCatch({
-  # Try to load in the field analysis from this week
-  tryCatch({
-    # Ensure week is zero-padded (e.g., 04) to match filenames like *_week04_2026.xlsx
-    week_padded <- sprintf("%02d", as.numeric(current_week))
-    analysis_path <- file.path("laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_analysis", paste0(params$data_dir, "_field_analysis_week", week_padded, "_", year, ".xlsx"))
-    analysis_data <- read_excel(analysis_path)
-  }, error = function(e) {
-    warning("Error loading field analysis data:", e$message)
-  })
-  
-  # Fallback: if analysis_data failed to load, create an empty tibble with required columns
-  if (!exists("analysis_data") || is.null(analysis_data)) {
+  # Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
+  if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
+    analysis_data <- summary_data$field_analysis %>% 
+      select(Field_id, Status_Alert) %>%
+      rename(Status_trigger = Status_Alert)  # Rename to Status_trigger for compatibility with hexbin logic
+  } else {
    analysis_data <- tibble(Field_id = character(), Status_trigger = character())
  }
  
+  # Load field boundaries if not already available (should be from initialize_project_config)
+  if (!exists("field_boundaries_sf")) {
+    if (exists("paths") && !is.null(paths$data_dir)) {
+      boundaries_list <- load_field_boundaries(paths$data_dir)
+      field_boundaries_sf <- boundaries_list$field_boundaries_sf
+    } else {
+      stop("Cannot locate field boundaries: paths$data_dir not available")
+    }
+  }
+  
  # Define constants
  ACRE_CONV <- 4046.856
-  TARGET_CRS <- 32736
+  TARGET_CRS <- 4326  # WGS84 for web basemap compatibility (was 32736 UTM)
  
  # Process polygons into points
  points_processed <- field_boundaries_sf %>%
@ -500,22 +519,31 @@ tryCatch({
    st_centroid() %>%
    bind_cols(st_coordinates(.))
  
-  # Subsetting
+  # Validate coordinates - check for NaN, Inf, or missing values
+  if (any(!is.finite(points_processed$X)) || any(!is.finite(points_processed$Y))) {
+    points_processed <- points_processed %>%
+      filter(is.finite(X) & is.finite(Y))
+  }
+  
+  # Final validation: ensure we have valid coordinates before proceeding
+  valid_x <- is.finite(points_processed$X) & is.finite(points_processed$Y)
+  if (!any(valid_x)) {
+    stop("No valid coordinates found in field boundaries. Cannot create hexbin map.")
+  }
+  
+  # Subsetting by harvest status
  points_ready <- points_processed %>% 
    filter(Status_trigger == "harvest_ready")
  points_not_ready <- points_processed %>%
    filter(Status_trigger != "harvest_ready" | is.na(Status_trigger))
-  
-  # Generate breaks for the plotting
+
+  # Generate breaks for color gradients
  breaks_vec <- c(0, 5, 10, 15, 20, 30, 35)
  labels_vec <- as.character(breaks_vec)
  labels_vec[length(labels_vec)] <- ">30"
  labels_vec[1] <- "0.1"
  
-  # Set CRS
-  map_crs <- st_crs(points_processed)
-  
-  # Create dummy point to anchor hexbin grids
+  # Create dummy point to anchor hexbin grids for consistency
  dummy_point <- data.frame(
    field = NA,
    sub_field = NA,
@ -526,92 +554,96 @@ tryCatch({
    geometry = NA
  )
  
-  # Add dummy point
+  # Convert dummy point to sf and add xy coordinates
  dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
  dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
+  
+  # Add dummy point to ensure consistent hexbin grid anchoring
  points_ready <- rbind(points_ready, dummy_point)
  points_not_ready <- rbind(points_not_ready, dummy_point)

-  # Create the plot
-  hexbin <- ggplot() +
-    # Add OSM basemap
-    annotation_map_tile(type = "osm", zoom = 11, progress = "none", alpha = 0.5) +
-    # Add the hexbins for not ready points
+  # Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
+  x_limits <- c(
+    floor(min(points_processed$X, na.rm = TRUE) * 20) / 20,  # Round down to avoid edge clipping
+    ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20   # Round up for padding
+  )
+  y_limits <- c(
+    floor(min(points_processed$Y, na.rm = TRUE) * 20) / 20,
+    ceiling(max(points_processed$Y, na.rm = TRUE) * 20) / 20
+  )
+
+  # Create hexbin map with enhanced aesthetics, basemap, and proper legend
+  ggplot() +
+    # OpenStreetMap basemap (zoom=11 appropriate for agricultural fields)
+    ggspatial::annotation_map_tile(type = "osm", zoom = 11, progress = "none", alpha = 0.5) +
+    # Hexbin for NOT ready fields (light background)
    geom_hex(
-        data = points_not_ready, 
-        aes(x = X, y = Y, weight = area_ac, alpha = "Not harvest ready within 1 month."), 
-        binwidth = c(1500, 1500), 
-        fill = "#ffffff",
-        colour = "#0000009a",
-        linewidth = 0.1
+      data = points_not_ready, 
+      aes(x = X, y = Y, weight = area_ac, alpha = "Not harvest ready"), 
+      binwidth = c(0.012, 0.012),
+      fill = "#ffffff",
+      colour = "#0000009a",
+      linewidth = 0.1
    ) +
-    # Add the hexbins for ready points
+    # Hexbin for READY fields (colored gradient)
    geom_hex(
-        data = points_ready, 
-        aes(x = X, y = Y, weight = area_ac), 
-        binwidth = c(1500, 1500), 
-        alpha = 0.9,
-        colour = "#0000009a",
-        linewidth = 0.1
+      data = points_ready, 
+      aes(x = X, y = Y, weight = area_ac), 
+      binwidth = c(0.012, 0.012),
+      alpha = 0.9,
+      colour = "#0000009a",
+      linewidth = 0.1
    ) +
-    # Create colour bins
+    # Color gradient scale for acreage
    scale_fill_viridis_b(
-        option = "viridis",
-        direction = -1,
-        breaks = breaks_vec,        # Use our 0-50 sequence
-        labels = labels_vec,        # Use our custom ">50" labels
-        limits = c(0, 35),          # Limit the scale
-        oob = scales::squish,       # Squish higher values into the top bin
-        name = "Total Acres"
+      option = "viridis",
+      direction = -1,
+      breaks = breaks_vec,
+      labels = labels_vec,
+      limits = c(0, 35),
+      oob = scales::squish,
+      name = "Total Acres"
    ) +
-    # Titles
-    labs(
-        subtitle = "Acres of fields 'harvest ready within a month'"
-    ) +
-    # Set the CRS
-    coord_sf(crs = map_crs) +
-    theme_minimal() + 
-     # Legend trick to add the gray colours
+    # Alpha scale for "not ready" status indication
    scale_alpha_manual(
-        name = NULL,    # No title needed for this specific legend item
-        values = 0.8    # This sets the actual transparency for the map
+      name = NULL,
+      values = 0.8
    ) +
-    # Legend customisation
+    # Titles and subtitle
+    labs(subtitle = "Acres of fields 'harvest ready within a month'") +
+    # Spatial coordinate system with explicit bounds (prevents basemap calc_limits conflicts)
+    coord_sf(crs = 4326, xlim = x_limits, ylim = y_limits, expand = FALSE) +
+    # Theme customization
+    theme_minimal() +
    theme(
-        legend.position = "right",
-        legend.box = "vertical",
-        legend.title.align = 0.5,    # Center the legend title
-        plot.title = element_text(face = "bold", size = 16),
-        plot.subtitle = element_text(size = 11)
+      legend.position = "right",
+      legend.box = "vertical",
+      legend.title.align = 0.5,
+      plot.subtitle = element_text(size = 11, face = "bold")
    ) +
-    # Customise the look of the horizontal bar
+    # Custom legend guides with enhanced styling
    guides(
-        # The colour bar
-        fill = guide_coloursteps(
-            barwidth = 1, 
-            barheight = 20, 
-            title.position = "top",
-            order = 1   
+      fill = guide_coloursteps(
+        barwidth = 1, 
+        barheight = 20, 
+        title.position = "top",
+        order = 1   
+      ),
+      alpha = guide_legend(
+        override.aes = list(
+          fill = "#ffffff",     
+          colour = "#0000009a", 
+          shape = 22           
        ),
-        # The not ready box
-        alpha = guide_legend(
-            override.aes = list(
-                fill = "#ffffff",     
-                colour = "#0000009a", 
-                shape = 22           
-            ),
-            order = 2      
-        )
+        order = 2      
+      )
    )
-  
-  hexbin 

 }, error = function(e) {
  warning("Error creating hexbin map:", e$message)
 })
 ```

-
 \newpage
 ## 1.2 Key Performance Indicators 

@ -780,80 +812,35 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
 ## Cloud Coverage Summary

 ```{r cloud_coverage_summary, echo=FALSE}
-# Display per-field cloud coverage summary
-if (cloud_coverage_available && !is.null(per_field_cloud_coverage)) {
-  # Prepare cloud coverage table for display
-  # Handle both old and new column naming conventions
-  cloud_display <- per_field_cloud_coverage %>%
-    mutate(
-      Field = if_else("field" %in% names(per_field_cloud_coverage), field_id, 
-                      if_else("Field" %in% names(per_field_cloud_coverage), Field, field_id)),
-      Clear_Percent = pct_clear,
-      Cloud_Acreage = if_else("Cloud_Acreage" %in% names(per_field_cloud_coverage), Cloud_Acreage, 
-                              as.numeric(NA)),
-      Total_Acreage = if_else("Total_Acreage" %in% names(per_field_cloud_coverage), Total_Acreage,
-                              as.numeric(NA))
-    ) %>%
-    select(Field, Cloud_category, Clear_Percent, missing_pixels, clear_pixels, total_pixels) %>%
-    rename(
-      "Field" = Field,
-      "Cloud Status" = Cloud_category,
-      "Clear %" = Clear_Percent,
-      "Cloud Pixels" = missing_pixels,
-      "Clear Pixels" = clear_pixels,
-      "Total Pixels" = total_pixels
-    ) %>%
-    arrange(Field)
-  
-  # Create flextable
-  ft <- flextable(cloud_display) %>%
-    autofit()
-  
-  ft
-} else if (exists("cloud_coverage_available") && !cloud_coverage_available) {
-  cat("Cloud coverage data not available for this week.\n")
-} else {
-  cat("Cloud coverage data not loaded.\n")
-}
-```
-
-## Field Alerts
-
-```{r field_alerts_table, echo=FALSE}
-# Generate alerts table from field analysis status triggers
+# Display cloud coverage summary aggregated by category
+# Cloud coverage data is included in the field_analysis RDS from Script 80
 if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
-  field_analysis_table <- summary_data$field_analysis
+  field_analysis_df <- summary_data$field_analysis
  
-  # Extract fields with status alerts (non-null) - use Status_Alert column (not Status_trigger)
-  alerts_data <- field_analysis_table %>%
-    filter(!is.na(Status_Alert), Status_Alert != "") %>%
-    select(Field_id, Status_Alert) %>%
-    rename(Field = Field_id, Alert = Status_Alert)
+  # Aggregate cloud coverage by category
+  cloud_summary <- field_analysis_df %>%
+    filter(!is.na(Cloud_category)) %>%
+    group_by(Cloud_category) %>%
+    summarise(
+      "Number of Fields" = n(),
+      "Total Acreage" = round(sum(Acreage, na.rm = TRUE), 1),
+      .groups = "drop"
+    ) %>%
+    rename("Cloud Category" = Cloud_category) %>%
+    arrange("Cloud Category")
  
-  if (nrow(alerts_data) > 0) {
-    # Format alert messages for display
-    alerts_data <- alerts_data %>%
-      mutate(
-        Alert = case_when(
-          Alert == "germination_started" ~ "Germination started - crop emerging",
-          Alert == "germination_complete" ~ "Germination complete - established",
-          Alert == "stress_detected_whole_field" ~ "Stress detected - check irrigation/disease",
-          Alert == "strong_recovery" ~ "Strong recovery - growth accelerating",
-          Alert == "growth_on_track" ~ "Growth on track - normal progression",
-          Alert == "maturation_progressing" ~ "Maturation progressing - ripening phase",
-          Alert == "harvest_ready" ~ "Harvest ready - 45+ weeks old",
-          TRUE ~ Alert
-        )
-      )
+  if (nrow(cloud_summary) > 0) {
+    # Create flextable
+    ft <- flextable(cloud_summary) %>%
+      autofit() %>%
+      theme_vanilla()
    
-    ft <- flextable(alerts_data) %>%
-      autofit()
    ft
  } else {
-    cat("No active status triggers this week.\n")
+    cat("Cloud coverage data not available for summary.\n")
  }
 } else {
-  cat("Field analysis data not available for alerts.\n")
+  cat("Field analysis data not available for cloud coverage summary.\n")
 }
 ```

--- a/r_app/FIX_INDENTATION.R
+++ b/r_app/FIX_INDENTATION.R
@ -1,20 +0,0 @@
-# Fix indentation for lines 408-1022 in 80_calculate_kpis.R
-# These lines should be inside the else-if block at the CANE_SUPPLY_WORKFLOW level
-
-file_path <- "r_app/80_calculate_kpis.R"
-lines <- readLines(file_path)
-
-# Lines 408-1021 (0-indexed: 407-1020) need 2 more spaces of indentation
-for (i in 408:1021) {
-  if (i <= length(lines)) {
-    line <- lines[i]
-    # Skip empty or whitespace-only lines
-    if (nchar(trimws(line)) > 0) {
-      # Add 2 spaces
-      lines[i] <- paste0("  ", line)
-    }
-  }
-}
-
-writeLines(lines, file_path)
-cat("Fixed indentation for lines 408-1022\n")
--- a/r_app/MANUAL_PIPELINE_RUNNER.R
+++ b/r_app/MANUAL_PIPELINE_RUNNER.R
@ -436,21 +436,23 @@
 #   From R console or R script:
 #
 #   rmarkdown::render(
-#     "r_app/90_CI_report_with_kpis_simple.Rmd",
-#     params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
-#     output_file = "SmartCane_Report_agronomic_angata_2026-02-04.docx",
-#     output_dir = "laravel_app/storage/app/angata/reports"
-#   )
+rmarkdown::render(
+  "r_app/90_CI_report_with_kpis_cane_supply.Rmd",
+  params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
+  output_file = "SmartCane_Report_cane_supply_angata_2026-02-10_FIXED.docx",
+  output_dir = "laravel_app/storage/app/angata/reports"
+)
 #
 # COMMAND #2 - CANE SUPPLY REPORT (ANGATA, CHEMBA, XINAVANE, ESA):
 #   From R console or R script:
 #
 #   rmarkdown::render(
-#     "r_app/91_CI_report_with_kpis_Angata.Rmd",
-#     params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
-#     output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
-#     output_dir = "laravel_app/storage/app/angata/reports"
-#   )
+rmarkdown::render(
+  "r_app/91_CI_report_with_kpis_cane_supply.Rmd",
+  params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
+  output_file = "SmartCane_Report_basemap_test.docx",
+  output_dir = "laravel_app/storage/app/angata/reports"
+)
 #
 # EXPECTED OUTPUT:
 #   File: SmartCane_Report_*_{PROJECT}_{DATE}.docx
--- a/r_app/parameters_project.R
+++ b/r_app/parameters_project.R
@ -190,7 +190,6 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
  
  # TIER 8: CONFIG & METADATA PATHS
  field_boundaries_path <- here(data_dir, "pivot.geojson")
-  tiling_config_path <- here(laravel_storage_dir, "tiling_config.json")
  
  # Return comprehensive list
  return(list(
@ -231,8 +230,7 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
    log_dir = log_dir,
    
    # TIER 8: Metadata
-    field_boundaries_path = field_boundaries_path,
-    tiling_config_path = tiling_config_path
+    field_boundaries_path = field_boundaries_path
  ))
 }