Normalize field detail column names for consistency and improve centroid calculation in cane supply report
This commit is contained in:
parent
e4e19df0c7
commit
14bd0fa47a
|
|
@ -2,7 +2,7 @@
|
||||||
params:
|
params:
|
||||||
ref: "word-styles-reference-var1.docx"
|
ref: "word-styles-reference-var1.docx"
|
||||||
output_file: "CI_report.docx"
|
output_file: "CI_report.docx"
|
||||||
report_date: !r Sys.Date()
|
report_date: "2026-02-04" #!r Sys.Date()
|
||||||
data_dir: "aura"
|
data_dir: "aura"
|
||||||
mail_day: "Wednesday"
|
mail_day: "Wednesday"
|
||||||
borders: FALSE
|
borders: FALSE
|
||||||
|
|
@ -179,6 +179,41 @@ if (dir.exists(kpi_data_dir)) {
|
||||||
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
|
safe_log(paste("✓ Loaded field_details_table with", nrow(field_details_table), "fields"))
|
||||||
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
|
safe_log(paste(" Columns:", paste(names(field_details_table), collapse=", ")))
|
||||||
|
|
||||||
|
# NORMALIZATION: Ensure critical column names match downstream expectations
|
||||||
|
# Handle both "Field" and "Field_id" naming conventions
|
||||||
|
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||||
|
field_details_table <- field_details_table %>%
|
||||||
|
dplyr::rename(Field_id = Field)
|
||||||
|
safe_log(" ✓ Normalized: renamed Field → Field_id")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Normalize other common column name variations
|
||||||
|
column_mappings <- list(
|
||||||
|
c("CV Value", "CV"),
|
||||||
|
c("CV", "CV"), # Keep as-is
|
||||||
|
c("Mean CI", "Mean_CI"),
|
||||||
|
c("Mean_CI", "Mean_CI"), # Keep as-is
|
||||||
|
c("Yield Forecast (t/ha)", "TCH_Forecasted"),
|
||||||
|
c("TCH_Forecasted", "TCH_Forecasted"), # Keep as-is
|
||||||
|
c("Gap Score", "Gap_Score"),
|
||||||
|
c("Gap_Score", "Gap_Score"), # Keep as-is
|
||||||
|
c("Growth Uniformity", "Growth_Uniformity"),
|
||||||
|
c("Decline Risk", "Decline_Risk"),
|
||||||
|
c("Weed Risk", "Weed_Risk"),
|
||||||
|
c("Moran's I", "Morans_I")
|
||||||
|
)
|
||||||
|
|
||||||
|
for (mapping in column_mappings) {
|
||||||
|
old_name <- mapping[1]
|
||||||
|
new_name <- mapping[2]
|
||||||
|
if (old_name != new_name && old_name %in% names(field_details_table) && !new_name %in% names(field_details_table)) {
|
||||||
|
field_details_table <- field_details_table %>%
|
||||||
|
dplyr::rename(!!new_name := old_name)
|
||||||
|
safe_log(paste(" ✓ Normalized:", old_name, "→", new_name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Only create summary_tables if not already loaded from RDS
|
# Only create summary_tables if not already loaded from RDS
|
||||||
if (is.null(summary_tables)) {
|
if (is.null(summary_tables)) {
|
||||||
summary_tables <- list()
|
summary_tables <- list()
|
||||||
|
|
@ -783,17 +818,18 @@ if (!exists("field_details_table") || is.null(field_details_table)) {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create minimal field details table with actual data we have + NAs for missing KPI columns
|
# Create minimal field details table with actual data we have + NAs for missing KPI columns
|
||||||
|
# IMPORTANT: Use column names that match downstream code expectations (no spaces, match exact names)
|
||||||
field_details_table <- tibble::tibble(
|
field_details_table <- tibble::tibble(
|
||||||
Field = field_names,
|
Field_id = field_names,
|
||||||
`Field Size (acres)` = as.numeric(field_sizes),
|
Acreage = as.numeric(field_sizes),
|
||||||
`Growth Uniformity` = NA_character_,
|
Growth_Uniformity = NA_character_,
|
||||||
`Yield Forecast (t/ha)` = NA_real_,
|
TCH_Forecasted = NA_real_,
|
||||||
`Gap Score` = NA_real_,
|
Gap_Score = NA_real_,
|
||||||
`Decline Risk` = NA_character_,
|
Decline_Risk = NA_character_,
|
||||||
`Weed Risk` = NA_character_,
|
Weed_Risk = NA_character_,
|
||||||
`Mean CI` = NA_real_,
|
Mean_CI = NA_real_,
|
||||||
`CV Value` = NA_real_,
|
CV = NA_real_,
|
||||||
`Moran's I` = NA_real_
|
Morans_I = NA_real_
|
||||||
)
|
)
|
||||||
safe_log(paste("Created field_details_table from geometries for", nrow(field_details_table), "fields"))
|
safe_log(paste("Created field_details_table from geometries for", nrow(field_details_table), "fields"))
|
||||||
}
|
}
|
||||||
|
|
@ -1353,6 +1389,7 @@ tryCatch({
|
||||||
)
|
)
|
||||||
|
|
||||||
cat(paste(kpi_parts, collapse = " | "), "\n")
|
cat(paste(kpi_parts, collapse = " | "), "\n")
|
||||||
|
cat("\n") # Extra newline for paragraph separation without creating empty pages
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1444,6 +1481,26 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
|
||||||
}
|
}
|
||||||
|
|
||||||
# Join field sizes and ages to KPI data, simplified column selection
|
# Join field sizes and ages to KPI data, simplified column selection
|
||||||
|
# DEFENSIVE: Normalize field_details_table column names one more time before joining
|
||||||
|
# Ensure it has Field_id column (regardless of whether it was from RDS or fallback)
|
||||||
|
if (!is.null(field_details_table) && nrow(field_details_table) > 0) {
|
||||||
|
# If Field exists but Field_id doesn't, rename it
|
||||||
|
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||||
|
field_details_table <- field_details_table %>%
|
||||||
|
dplyr::rename(Field_id = Field)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure all expected KPI columns exist; add as NA if missing
|
||||||
|
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
|
||||||
|
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
|
||||||
|
"Decline_Severity", "Weed_Pressure_Risk")
|
||||||
|
for (col in expected_cols) {
|
||||||
|
if (!col %in% names(field_details_table)) {
|
||||||
|
field_details_table[[col]] <- NA
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
field_details_clean <- field_details_table %>%
|
field_details_clean <- field_details_table %>%
|
||||||
left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
|
left_join(field_sizes_df, by = c("Field_id" = "field")) %>%
|
||||||
left_join(field_ages_df, by = c("Field_id" = "field")) %>%
|
left_join(field_ages_df, by = c("Field_id" = "field")) %>%
|
||||||
|
|
|
||||||
|
|
@ -514,6 +514,8 @@ tryCatch({
|
||||||
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
|
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
|
||||||
|
|
||||||
# Process polygons into points
|
# Process polygons into points
|
||||||
|
# IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid
|
||||||
|
# st_centroid warnings about longitude/latitude data, then transform back to WGS84
|
||||||
points_processed <- field_boundaries_sf %>%
|
points_processed <- field_boundaries_sf %>%
|
||||||
st_make_valid() %>%
|
st_make_valid() %>%
|
||||||
mutate(
|
mutate(
|
||||||
|
|
@ -529,8 +531,9 @@ tryCatch({
|
||||||
analysis_data %>% select(Field_id, Status_trigger),
|
analysis_data %>% select(Field_id, Status_trigger),
|
||||||
by = c("field" = "Field_id")
|
by = c("field" = "Field_id")
|
||||||
) %>%
|
) %>%
|
||||||
st_transform(crs = TARGET_CRS) %>%
|
st_transform(crs = 32736) %>% # UTM zone 36S (southern Africa)
|
||||||
st_centroid() %>%
|
st_centroid() %>%
|
||||||
|
st_transform(crs = TARGET_CRS) %>%
|
||||||
bind_cols(st_coordinates(.))
|
bind_cols(st_coordinates(.))
|
||||||
|
|
||||||
# Validate coordinates - check for NaN, Inf, or missing values
|
# Validate coordinates - check for NaN, Inf, or missing values
|
||||||
|
|
@ -557,30 +560,8 @@ tryCatch({
|
||||||
labels_vec[length(labels_vec)] <- ">30"
|
labels_vec[length(labels_vec)] <- ">30"
|
||||||
labels_vec[1] <- "0.1"
|
labels_vec[1] <- "0.1"
|
||||||
|
|
||||||
# Create dummy point to anchor hexbin grids for consistency
|
|
||||||
dummy_point <- data.frame(
|
|
||||||
field = NA,
|
|
||||||
sub_field = NA,
|
|
||||||
area_ac = 0,
|
|
||||||
Status_trigger = NA,
|
|
||||||
X = min(points_processed$X, na.rm = TRUE),
|
|
||||||
Y = min(points_processed$Y, na.rm = TRUE),
|
|
||||||
geometry = NA
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert dummy point to sf and add xy coordinates
|
|
||||||
dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
|
|
||||||
dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
|
|
||||||
|
|
||||||
# Mark dummy point with anchor flag before binding
|
|
||||||
# Referenced: dummy_point, st_as_sf, st_coordinates, area_ac
|
|
||||||
dummy_point$anchor_dummy <- TRUE
|
|
||||||
|
|
||||||
# Add dummy point to ensure consistent hexbin grid anchoring
|
|
||||||
points_ready <- rbind(points_ready, dummy_point)
|
|
||||||
points_not_ready <- rbind(points_not_ready, dummy_point)
|
|
||||||
|
|
||||||
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
|
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
|
||||||
|
# Use actual data bounds without dummy points to avoid column mismatch
|
||||||
x_limits <- c(
|
x_limits <- c(
|
||||||
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
|
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
|
||||||
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
|
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
|
||||||
|
|
@ -657,11 +638,6 @@ tryCatch({
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove dummy point rows after grid anchoring to prevent dummy cells in plot
|
|
||||||
# Referenced: points_ready, points_not_ready, anchor_dummy flag filtering
|
|
||||||
points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE)
|
|
||||||
points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE)
|
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
warning("Error creating hexbin map:", e$message)
|
warning("Error creating hexbin map:", e$message)
|
||||||
})
|
})
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue