Normalize field detail column names for consistency and improve centroid calculation in cane supply report

2026-02-17 14:00:32 +01:00 · 2026-02-17 14:00:32 +01:00 · 14bd0fa47a
parent e4e19df0c7
commit 14bd0fa47a
2 changed files with 73 additions and 40 deletions
--- a/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
+++ b/r_app/90_CI_report_with_kpis_agronomic_support.Rmd
@ -2,7 +2,7 @@
 params:
  ref: "word-styles-reference-var1.docx"
  output_file: "CI_report.docx"
-  report_date: !r Sys.Date()
+  report_date: "2026-02-04" #!r Sys.Date()
  data_dir: "aura"
  mail_day: "Wednesday"
  borders: FALSE
@ -179,6 +179,41 @@ if (dir.exists(kpi_data_dir)) {
        safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
        safe_log(paste("  Columns:", paste(names(field_details_table), collapse=", ")))
        
+        # NORMALIZATION: Ensure critical column names match downstream expectations
+        # Handle both "Field" and "Field_id" naming conventions
+        if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
+          field_details_table <- field_details_table %>%
+            dplyr::rename(Field_id = Field)
+          safe_log("  ✓ Normalized: renamed Field → Field_id")
+        }
+        
+        # Normalize other common column name variations
+        column_mappings <- list(
+          c("CV Value", "CV"),
+          c("CV", "CV"),  # Keep as-is
+          c("Mean CI", "Mean_CI"),
+          c("Mean_CI", "Mean_CI"),  # Keep as-is
+          c("Yield Forecast (t/ha)", "TCH_Forecasted"),
+          c("TCH_Forecasted", "TCH_Forecasted"),  # Keep as-is
+          c("Gap Score", "Gap_Score"),
+          c("Gap_Score", "Gap_Score"),  # Keep as-is
+          c("Growth Uniformity", "Growth_Uniformity"),
+          c("Decline Risk", "Decline_Risk"),
+          c("Weed Risk", "Weed_Risk"),
+          c("Moran's I", "Morans_I")
+        )
+        
+        for (mapping in column_mappings) {
+          old_name <- mapping[1]
+          new_name <- mapping[2]
+          if (old_name != new_name && old_name %in% names(field_details_table) && !new_name %in% names(field_details_table)) {
+            field_details_table <- field_details_table %>%
+              dplyr::rename(!!new_name := old_name)
+            safe_log(paste("  ✓ Normalized:", old_name, "→", new_name))
+          }
+        }
+
+        
        # Only create summary_tables if not already loaded from RDS
        if (is.null(summary_tables)) {
          summary_tables <- list()
@ -783,17 +818,18 @@ if (!exists("field_details_table") || is.null(field_details_table)) {
      }
      
      # Create minimal field details table with actual data we have + NAs for missing KPI columns
+      # IMPORTANT: Use column names that match downstream code expectations (no spaces, match exact names)
      field_details_table <- tibble::tibble(
-        Field = field_names,
-        `Field Size (acres)` = as.numeric(field_sizes),
-        `Growth Uniformity` = NA_character_,
-        `Yield Forecast (t/ha)` = NA_real_,
-        `Gap Score` = NA_real_,
-        `Decline Risk` = NA_character_,
-        `Weed Risk` = NA_character_,
-        `Mean CI` = NA_real_,
-        `CV Value` = NA_real_,
-        `Moran's I` = NA_real_
+        Field_id = field_names,
+        Acreage = as.numeric(field_sizes),
+        Growth_Uniformity = NA_character_,
+        TCH_Forecasted = NA_real_,
+        Gap_Score = NA_real_,
+        Decline_Risk = NA_character_,
+        Weed_Risk = NA_character_,
+        Mean_CI = NA_real_,
+        CV = NA_real_,
+        Morans_I = NA_real_
      )
      safe_log(paste("Created field_details_table from geometries for", nrow(field_details_table), "fields"))
    }
@ -1353,6 +1389,7 @@ tryCatch({
          )
          
          cat(paste(kpi_parts, collapse = " | "), "\n")
+          cat("\n")  # Extra newline for paragraph separation without creating empty pages
        }
      }
      
@ -1444,6 +1481,26 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
  }
  
  # Join field sizes and ages to KPI data, simplified column selection
+  # DEFENSIVE: Normalize field_details_table column names one more time before joining
+  # Ensure it has Field_id column (regardless of whether it was from RDS or fallback)
+  if (!is.null(field_details_table) && nrow(field_details_table) > 0) {
+    # If Field exists but Field_id doesn't, rename it
+    if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
+      field_details_table <- field_details_table %>%
+        dplyr::rename(Field_id = Field)
+    }
+    
+    # Ensure all expected KPI columns exist; add as NA if missing
+    expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score", 
+                       "Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
+                       "Decline_Severity", "Weed_Pressure_Risk")
+    for (col in expected_cols) {
+      if (!col %in% names(field_details_table)) {
+        field_details_table[[col]] <- NA
+      }
+    }
+  }
+  
  field_details_clean <- field_details_table %>%
    left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
    left_join(field_ages_df, by = c("Field_id" = "field")) %>%
--- a/r_app/91_CI_report_with_kpis_cane_supply.Rmd
+++ b/r_app/91_CI_report_with_kpis_cane_supply.Rmd
@ -514,6 +514,8 @@ tryCatch({
  TARGET_CRS <- 4326  # WGS84 for web basemap compatibility (was 32736 UTM)
  
  # Process polygons into points
+  # IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid 
+  # st_centroid warnings about longitude/latitude data, then transform back to WGS84
  points_processed <- field_boundaries_sf %>%
    st_make_valid() %>%
    mutate(
@ -529,8 +531,9 @@ tryCatch({
      analysis_data %>% select(Field_id, Status_trigger),
      by = c("field" = "Field_id")
    ) %>%
-    st_transform(crs = TARGET_CRS) %>%
+    st_transform(crs = 32736) %>%  # UTM zone 36S (southern Africa)
    st_centroid() %>%
+    st_transform(crs = TARGET_CRS) %>%
    bind_cols(st_coordinates(.))
  
  # Validate coordinates - check for NaN, Inf, or missing values
@ -557,30 +560,8 @@ tryCatch({
  labels_vec[length(labels_vec)] <- ">30"
  labels_vec[1] <- "0.1"
  
-  # Create dummy point to anchor hexbin grids for consistency
-  dummy_point <- data.frame(
-    field = NA,
-    sub_field = NA,
-    area_ac = 0,
-    Status_trigger = NA,
-    X = min(points_processed$X, na.rm = TRUE),
-    Y = min(points_processed$Y, na.rm = TRUE),
-    geometry = NA
-  )
-  
-  # Convert dummy point to sf and add xy coordinates
-  dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
-  dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
-  
-  # Mark dummy point with anchor flag before binding
-  # Referenced: dummy_point, st_as_sf, st_coordinates, area_ac
-  dummy_point$anchor_dummy <- TRUE
-  
-  # Add dummy point to ensure consistent hexbin grid anchoring
-  points_ready <- rbind(points_ready, dummy_point)
-  points_not_ready <- rbind(points_not_ready, dummy_point)
-
  # Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
+  # Use actual data bounds without dummy points to avoid column mismatch
  x_limits <- c(
    floor(min(points_processed$X, na.rm = TRUE) * 20) / 20,  # Round down to avoid edge clipping
    ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20   # Round up for padding
@ -657,11 +638,6 @@ tryCatch({
      )
    )

-  # Remove dummy point rows after grid anchoring to prevent dummy cells in plot
-  # Referenced: points_ready, points_not_ready, anchor_dummy flag filtering
-  points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE)
-  points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE)
-
 }, error = function(e) {
  warning("Error creating hexbin map:", e$message)
 })