Normalize field detail column names for consistency and improve centroid calculation in cane supply report
This commit is contained in:
parent
e4e19df0c7
commit
14bd0fa47a
|
|
@ -2,7 +2,7 @@
|
|||
params:
|
||||
ref: "word-styles-reference-var1.docx"
|
||||
output_file: "CI_report.docx"
|
||||
report_date: !r Sys.Date()
|
||||
report_date: "2026-02-04" #!r Sys.Date()
|
||||
data_dir: "aura"
|
||||
mail_day: "Wednesday"
|
||||
borders: FALSE
|
||||
|
|
@ -179,6 +179,41 @@ if (dir.exists(kpi_data_dir)) {
|
|||
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
|
||||
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
|
||||
|
||||
# NORMALIZATION: Ensure critical column names match downstream expectations
|
||||
# Handle both "Field" and "Field_id" naming conventions
|
||||
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||
field_details_table <- field_details_table %>%
|
||||
dplyr::rename(Field_id = Field)
|
||||
safe_log(" ✓ Normalized: renamed Field → Field_id")
|
||||
}
|
||||
|
||||
# Normalize other common column name variations
|
||||
column_mappings <- list(
|
||||
c("CV Value", "CV"),
|
||||
c("CV", "CV"), # Keep as-is
|
||||
c("Mean CI", "Mean_CI"),
|
||||
c("Mean_CI", "Mean_CI"), # Keep as-is
|
||||
c("Yield Forecast (t/ha)", "TCH_Forecasted"),
|
||||
c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is
|
||||
c("Gap Score", "Gap_Score"),
|
||||
c("Gap_Score", "Gap_Score"), # Keep as-is
|
||||
c("Growth Uniformity", "Growth_Uniformity"),
|
||||
c("Decline Risk", "Decline_Risk"),
|
||||
c("Weed Risk", "Weed_Risk"),
|
||||
c("Moran's I", "Morans_I")
|
||||
)
|
||||
|
||||
for (mapping in column_mappings) {
|
||||
old_name <- mapping[1]
|
||||
new_name <- mapping[2]
|
||||
if (old_name != new_name && old_name %in% names(field_details_table) && !new_name %in% names(field_details_table)) {
|
||||
field_details_table <- field_details_table %>%
|
||||
dplyr::rename(!!new_name := old_name)
|
||||
safe_log(paste(" ✓ Normalized:", old_name, "→", new_name))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Only create summary_tables if not already loaded from RDS
|
||||
if (is.null(summary_tables)) {
|
||||
summary_tables <- list()
|
||||
|
|
@ -783,17 +818,18 @@ if (!exists("field_details_table") || is.null(field_details_table)) {
|
|||
}
|
||||
|
||||
# Create minimal field details table with actual data we have + NAs for missing KPI columns
|
||||
# IMPORTANT: Use column names that match downstream code expectations (no spaces, match exact names)
|
||||
field_details_table <- tibble::tibble(
|
||||
Field = field_names,
|
||||
`Field Size (acres)` = as.numeric(field_sizes),
|
||||
`Growth Uniformity` = NA_character_,
|
||||
`Yield Forecast (t/ha)` = NA_real_,
|
||||
`Gap Score` = NA_real_,
|
||||
`Decline Risk` = NA_character_,
|
||||
`Weed Risk` = NA_character_,
|
||||
`Mean CI` = NA_real_,
|
||||
`CV Value` = NA_real_,
|
||||
`Moran's I` = NA_real_
|
||||
Field_id = field_names,
|
||||
Acreage = as.numeric(field_sizes),
|
||||
Growth_Uniformity = NA_character_,
|
||||
TCH_Forecasted = NA_real_,
|
||||
Gap_Score = NA_real_,
|
||||
Decline_Risk = NA_character_,
|
||||
Weed_Risk = NA_character_,
|
||||
Mean_CI = NA_real_,
|
||||
CV = NA_real_,
|
||||
Morans_I = NA_real_
|
||||
)
|
||||
safe_log(paste("Created field_details_table from geometries for", nrow(field_details_table), "fields"))
|
||||
}
|
||||
|
|
@ -1353,6 +1389,7 @@ tryCatch({
|
|||
)
|
||||
|
||||
cat(paste(kpi_parts, collapse = " | "), "\n")
|
||||
cat("\n") # Extra newline for paragraph separation without creating empty pages
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1444,6 +1481,26 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
|||
}
|
||||
|
||||
# Join field sizes and ages to KPI data, simplified column selection
|
||||
# DEFENSIVE: Normalize field_details_table column names one more time before joining
|
||||
# Ensure it has Field_id column (regardless of whether it was from RDS or fallback)
|
||||
if (!is.null(field_details_table) && nrow(field_details_table) > 0) {
|
||||
# If Field exists but Field_id doesn't, rename it
|
||||
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||
field_details_table <- field_details_table %>%
|
||||
dplyr::rename(Field_id = Field)
|
||||
}
|
||||
|
||||
# Ensure all expected KPI columns exist; add as NA if missing
|
||||
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
|
||||
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
|
||||
"Decline_Severity", "Weed_Pressure_Risk")
|
||||
for (col in expected_cols) {
|
||||
if (!col %in% names(field_details_table)) {
|
||||
field_details_table[[col]] <- NA
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
field_details_clean <- field_details_table %>%
|
||||
left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
|
||||
left_join(field_ages_df, by = c("Field_id" = "field")) %>%
|
||||
|
|
|
|||
|
|
@ -514,6 +514,8 @@ tryCatch({
|
|||
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
|
||||
|
||||
# Process polygons into points
|
||||
# IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid
|
||||
# st_centroid warnings about longitude/latitude data, then transform back to WGS84
|
||||
points_processed <- field_boundaries_sf %>%
|
||||
st_make_valid() %>%
|
||||
mutate(
|
||||
|
|
@ -529,8 +531,9 @@ tryCatch({
|
|||
analysis_data %>% select(Field_id, Status_trigger),
|
||||
by = c("field" = "Field_id")
|
||||
) %>%
|
||||
st_transform(crs = TARGET_CRS) %>%
|
||||
st_transform(crs = 32736) %>% # UTM zone 36S (southern Africa)
|
||||
st_centroid() %>%
|
||||
st_transform(crs = TARGET_CRS) %>%
|
||||
bind_cols(st_coordinates(.))
|
||||
|
||||
# Validate coordinates - check for NaN, Inf, or missing values
|
||||
|
|
@ -557,30 +560,8 @@ tryCatch({
|
|||
labels_vec[length(labels_vec)] <- ">30"
|
||||
labels_vec[1] <- "0.1"
|
||||
|
||||
# Create dummy point to anchor hexbin grids for consistency
|
||||
dummy_point <- data.frame(
|
||||
field = NA,
|
||||
sub_field = NA,
|
||||
area_ac = 0,
|
||||
Status_trigger = NA,
|
||||
X = min(points_processed$X, na.rm = TRUE),
|
||||
Y = min(points_processed$Y, na.rm = TRUE),
|
||||
geometry = NA
|
||||
)
|
||||
|
||||
# Convert dummy point to sf and add xy coordinates
|
||||
dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
|
||||
dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
|
||||
|
||||
# Mark dummy point with anchor flag before binding
|
||||
# Referenced: dummy_point, st_as_sf, st_coordinates, area_ac
|
||||
dummy_point$anchor_dummy <- TRUE
|
||||
|
||||
# Add dummy point to ensure consistent hexbin grid anchoring
|
||||
points_ready <- rbind(points_ready, dummy_point)
|
||||
points_not_ready <- rbind(points_not_ready, dummy_point)
|
||||
|
||||
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
|
||||
# Use actual data bounds without dummy points to avoid column mismatch
|
||||
x_limits <- c(
|
||||
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
|
||||
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
|
||||
|
|
@ -657,11 +638,6 @@ tryCatch({
|
|||
)
|
||||
)
|
||||
|
||||
# Remove dummy point rows after grid anchoring to prevent dummy cells in plot
|
||||
# Referenced: points_ready, points_not_ready, anchor_dummy flag filtering
|
||||
points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE)
|
||||
points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE)
|
||||
|
||||
}, error = function(e) {
|
||||
warning("Error creating hexbin map:", e$message)
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in a new issue