Normalize field detail column names for consistency and improve centroid calculation in cane supply report

This commit is contained in:
Timon 2026-02-17 14:00:32 +01:00
parent e4e19df0c7
commit 14bd0fa47a
2 changed files with 73 additions and 40 deletions

View file

@ -2,7 +2,7 @@
params:
ref: "word-styles-reference-var1.docx"
output_file: "CI_report.docx"
report_date: !r Sys.Date()
report_date: "2026-02-04" #!r Sys.Date()
data_dir: "aura"
mail_day: "Wednesday"
borders: FALSE
@ -179,6 +179,41 @@ if (dir.exists(kpi_data_dir)) {
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
# NORMALIZATION: Ensure critical column names match downstream expectations
# Handle both "Field" and "Field_id" naming conventions
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
safe_log(" ✓ Normalized: renamed Field → Field_id")
}
# Normalize other common column name variations
column_mappings <- list(
c("CV Value", "CV"),
c("CV", "CV"), # Keep as-is
c("Mean CI", "Mean_CI"),
c("Mean_CI", "Mean_CI"), # Keep as-is
c("Yield Forecast (t/ha)", "TCH_Forecasted"),
c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is
c("Gap Score", "Gap_Score"),
c("Gap_Score", "Gap_Score"), # Keep as-is
c("Growth Uniformity", "Growth_Uniformity"),
c("Decline Risk", "Decline_Risk"),
c("Weed Risk", "Weed_Risk"),
c("Moran's I", "Morans_I")
)
for (mapping in column_mappings) {
old_name <- mapping[1]
new_name <- mapping[2]
if (old_name != new_name && old_name %in% names(field_details_table) && !new_name %in% names(field_details_table)) {
field_details_table <- field_details_table %>%
dplyr::rename(!!new_name := old_name)
safe_log(paste(" ✓ Normalized:", old_name, "→", new_name))
}
}
# Only create summary_tables if not already loaded from RDS
if (is.null(summary_tables)) {
summary_tables <- list()
@ -783,17 +818,18 @@ if (!exists("field_details_table") || is.null(field_details_table)) {
}
# Create minimal field details table with actual data we have + NAs for missing KPI columns
# IMPORTANT: Use column names that match downstream code expectations (no spaces, match exact names)
field_details_table <- tibble::tibble(
Field = field_names,
`Field Size (acres)` = as.numeric(field_sizes),
`Growth Uniformity` = NA_character_,
`Yield Forecast (t/ha)` = NA_real_,
`Gap Score` = NA_real_,
`Decline Risk` = NA_character_,
`Weed Risk` = NA_character_,
`Mean CI` = NA_real_,
`CV Value` = NA_real_,
`Moran's I` = NA_real_
Field_id = field_names,
Acreage = as.numeric(field_sizes),
Growth_Uniformity = NA_character_,
TCH_Forecasted = NA_real_,
Gap_Score = NA_real_,
Decline_Risk = NA_character_,
Weed_Risk = NA_character_,
Mean_CI = NA_real_,
CV = NA_real_,
Morans_I = NA_real_
)
safe_log(paste("Created field_details_table from geometries for", nrow(field_details_table), "fields"))
}
@ -1353,6 +1389,7 @@ tryCatch({
)
cat(paste(kpi_parts, collapse = " | "), "\n")
cat("\n") # Extra newline for paragraph separation without creating empty pages
}
}
@ -1444,6 +1481,26 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
}
# Join field sizes and ages to KPI data, simplified column selection
# DEFENSIVE: Normalize field_details_table column names one more time before joining
# Ensure it has Field_id column (regardless of whether it was from RDS or fallback)
if (!is.null(field_details_table) && nrow(field_details_table) > 0) {
# If Field exists but Field_id doesn't, rename it
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
field_details_table <- field_details_table %>%
dplyr::rename(Field_id = Field)
}
# Ensure all expected KPI columns exist; add as NA if missing
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
"Decline_Severity", "Weed_Pressure_Risk")
for (col in expected_cols) {
if (!col %in% names(field_details_table)) {
field_details_table[[col]] <- NA
}
}
}
field_details_clean <- field_details_table %>%
left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
left_join(field_ages_df, by = c("Field_id" = "field")) %>%

View file

@ -514,6 +514,8 @@ tryCatch({
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
# Process polygons into points
# IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid
# st_centroid warnings about longitude/latitude data, then transform back to WGS84
points_processed <- field_boundaries_sf %>%
st_make_valid() %>%
mutate(
@ -529,8 +531,9 @@ tryCatch({
analysis_data %>% select(Field_id, Status_trigger),
by = c("field" = "Field_id")
) %>%
st_transform(crs = TARGET_CRS) %>%
st_transform(crs = 32736) %>% # UTM zone 36S (southern Africa)
st_centroid() %>%
st_transform(crs = TARGET_CRS) %>%
bind_cols(st_coordinates(.))
# Validate coordinates - check for NaN, Inf, or missing values
@ -557,30 +560,8 @@ tryCatch({
labels_vec[length(labels_vec)] <- ">30"
labels_vec[1] <- "0.1"
# Create dummy point to anchor hexbin grids for consistency
dummy_point <- data.frame(
field = NA,
sub_field = NA,
area_ac = 0,
Status_trigger = NA,
X = min(points_processed$X, na.rm = TRUE),
Y = min(points_processed$Y, na.rm = TRUE),
geometry = NA
)
# Convert dummy point to sf and add xy coordinates
dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
# Mark dummy point with anchor flag before binding
# Referenced: dummy_point, st_as_sf, st_coordinates, area_ac
dummy_point$anchor_dummy <- TRUE
# Add dummy point to ensure consistent hexbin grid anchoring
points_ready <- rbind(points_ready, dummy_point)
points_not_ready <- rbind(points_not_ready, dummy_point)
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
# Use actual data bounds without dummy points to avoid column mismatch
x_limits <- c(
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
@ -657,11 +638,6 @@ tryCatch({
)
)
# Remove dummy point rows after grid anchoring to prevent dummy cells in plot
# Referenced: points_ready, points_not_ready, anchor_dummy flag filtering
points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE)
points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE)
}, error = function(e) {
warning("Error creating hexbin map:", e$message)
})