SmartCane/r_app/80_utils_cane_supply.R
Timon e4e19df0c7 Update CI report parameters and improve map legend configurations
- Changed report date in CI report for cane supply to "2026-02-04".
- Updated output file naming convention for agronomic support report to reflect new report date.
- Enhanced map creation functions to allow customizable legend positions and improved layout settings.
- Adjusted widths for map arrangements to ensure better visual representation.
- Fixed minor issues in ggplot aesthetics for clearer legend positioning and improved readability.
- Corrected field size unit from hectares to acres in KPI summary generation.
2026-02-17 13:46:43 +01:00

605 lines
22 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 80_UTILS_CANE_SUPPLY.R
# ============================================================================
# CANE SUPPLY CLIENT-SPECIFIC UTILITIES (SCRIPT 80 - CLIENT TYPE: cane_supply)
#
# Contains ANGATA and other cane supply-specific KPI and reporting functions.
#
# Currently, CANE_SUPPLY clients use the common utilities from 80_utils_common.R:
# - Weekly statistics (calculate_field_statistics, calculate_kpi_trends)
# - Field analysis reporting (generate_field_analysis_summary)
# - Excel export (export_field_analysis_excel)
#
# This file is structured to accommodate future ANGATA-specific functionality such as:
# - Custom yield models
# - Harvest readiness criteria
# - Supply chain integration hooks
# - ANGATA-specific alerting and messaging
#
# Orchestrator: (Placeholder - uses common functions)
# Dependencies: 00_common_utils.R, 80_utils_common.R
# Used by: 80_calculate_kpis.R (when client_type == "cane_supply")
# ============================================================================
library(terra)
library(sf)
library(dplyr)
library(tidyr)
library(readxl)
library(writexl)
# ============================================================================
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
# ============================================================================
#' Calculate acreage for each field from geometry
#' @param field_boundaries_sf sf object with field geometries
#' @return data.frame with field and acreage columns
calculate_field_acreages <- function(field_boundaries_sf) {
tryCatch({
lookup_df <- field_boundaries_sf %>%
sf::st_drop_geometry() %>%
as.data.frame() %>%
mutate(
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
tryCatch({
sf::st_is_valid(field_boundaries_sf[idx, ])
}, error = function(e) FALSE)
}),
area_ha = 0
)
# Calculate area for valid geometries
valid_indices <- which(lookup_df$geometry_valid)
areas_ha <- vapply(valid_indices, function(idx) {
tryCatch({
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
area_m2 / 10000
}, error = function(e) NA_real_)
}, numeric(1))
lookup_df$area_ha[valid_indices] <- areas_ha
# Convert hectares to acres
lookup_df %>%
mutate(acreage = area_ha / 0.404686) %>%
select(field, acreage)
}, error = function(e) {
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
data.frame(field = character(0), acreage = numeric(0))
})
}
#' Calculate age in weeks from planting date
#'
#' @param planting_date Date of planting
#' @param reference_date Date to calculate age relative to (typically end_date)
#' @return Numeric age in weeks (rounded to nearest week)
calculate_age_week <- function(planting_date, reference_date) {
if (is.na(planting_date)) {
return(NA_real_)
}
round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0)
}
#' Assign crop phase based on age in weeks
#'
#' @param age_week Numeric age in weeks
#' @return Character phase name
calculate_phase <- function(age_week) {
if (is.na(age_week)) return(NA_character_)
if (age_week >= 0 & age_week < 4) return("Germination")
if (age_week >= 4 & age_week < 17) return("Tillering")
if (age_week >= 17 & age_week < 39) return("Grand Growth")
if (age_week >= 39) return("Maturation")
NA_character_
}
#' Bin percentage into 10% intervals with special handling for 90-100%
#'
#' @param pct Numeric percentage value (0-100)
#' @return Character bin label
bin_percentage <- function(pct) {
if (is.na(pct)) return(NA_character_)
if (pct >= 95) return("95-100%")
else if (pct >= 90) return("90-95%")
else if (pct >= 80) return("80-90%")
else if (pct >= 70) return("70-80%")
else if (pct >= 60) return("60-70%")
else if (pct >= 50) return("50-60%")
else if (pct >= 40) return("40-50%")
else if (pct >= 30) return("30-40%")
else if (pct >= 20) return("20-30%")
else if (pct >= 10) return("10-20%")
else return("0-10%")
}
#' Calculate germination progress from CI threshold percentage
#'
#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2
#' @return Character bin label
calculate_germination_progress <- function(pct_pixels_ci_gte_2) {
bin_percentage(pct_pixels_ci_gte_2)
}
#' Categorize CV trend (long-term slope) into qualitative labels
#'
#' @param cv_slope Numeric slope from CV trend analysis
#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform"
categorize_cv_trend_long_term <- function(cv_slope) {
if (is.na(cv_slope)) {
return(NA_character_)
} else if (cv_slope < -0.01) {
return("More uniform")
} else if (cv_slope > 0.01) {
return("Less uniform")
} else {
return("Stable uniformity")
}
}
#' Determine status alert based on harvest probability and crop health
#' Priority order:
#' 1. Ready for harvest-check (imminent + mature ≥12 months)
#' 2. Strong decline in crop health (drop ≥2 points but still >1.5)
#' 3. Harvested/bare (Mean CI < 1.5)
#' @param imminent_prob Numeric harvest probability
#' @param age_week Numeric age in weeks
#' @param weekly_ci_change Numeric weekly CI change
#' @param mean_ci Numeric mean CI value
#' @return Character status alert or NA
calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, mean_ci) {
# Priority 1: Ready for harvest-check
if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) {
return("Ready for harvest-check")
}
# Priority 2: Strong decline
if (!is.na(weekly_ci_change) && weekly_ci_change <= -2.0 && !is.na(mean_ci) && mean_ci > 1.5) {
return("Strong decline in crop health")
}
# Priority 3: Harvested/bare
if (!is.na(mean_ci) && mean_ci < 1.5) {
return("Harvested/bare")
}
# Fallback: no alert
NA_character_
}
#' Build complete per-field KPI dataframe with all 22 columns
#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats
#' @param planting_dates data.frame with field_id and planting_date columns
#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL)
#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL)
#' @param field_boundaries_sf sf object with field geometries
#' @param end_date Date object for current report date
#' @return data.frame with all 22 KPI columns
calculate_all_field_kpis <- function(current_stats,
planting_dates,
imminent_prob_data,
gap_scores_df,
field_boundaries_sf,
end_date) {
message("\nBuilding final field analysis output...")
# Pre-calculate acreages
acreage_lookup <- calculate_field_acreages(field_boundaries_sf)
field_analysis_df <- current_stats %>%
mutate(
# Column 2: Farm_Section (user fills manually)
Farm_Section = NA_character_,
# Column 3: Field_name (from GeoJSON)
Field_name = Field_id,
# Column 4: Acreage (from geometry)
Acreage = {
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
if_else(is.na(acreages_vec), 0, acreages_vec)
},
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx)
Last_harvest_or_planting_date = {
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
},
# Column 9: Age_week (calculated)
Age_week = {
sapply(seq_len(nrow(current_stats)), function(idx) {
calculate_age_week(Last_harvest_or_planting_date[idx], end_date)
})
},
# Column 10: Phase (based on Age_week)
Phase = sapply(Age_week, calculate_phase),
# Column 12: Germination_progress (binned Pct_pixels_CI_gte_2)
Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress),
# Column 13: Imminent_prob (from script 31 or NA)
Imminent_prob = {
if (!is.null(imminent_prob_data)) {
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
},
# Column 14: Status_Alert (multi-priority logic)
Status_Alert = {
sapply(seq_len(nrow(current_stats)), function(idx) {
calculate_status_alert(
Imminent_prob[idx],
Age_week[idx],
Weekly_ci_change[idx],
Mean_CI[idx]
)
})
},
# Column 19b: CV_Trend_Long_Term_Category (categorical slope)
CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term),
# Column 21: Cloud_pct_clear (binned into intervals)
Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage),
# Column 22: Gap_score (2σ method)
Gap_score = {
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
} else {
rep(NA_real_, nrow(current_stats))
}
}
) %>%
select(
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
"Last_harvest_or_planting_date", "Age_week", "Phase",
"Germination_progress",
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
"Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
)
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns"))
return(field_analysis_df)
}
#' Aggregate per-field data into farm-level KPI summary
#'
#' @param field_analysis_df data.frame with per-field KPI data
#' @param current_week Numeric current week number
#' @param current_year Numeric current year
#' @param end_date Date object for current report date
#' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats
calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) {
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
# Filter to only fields with actual data
field_data <- field_analysis_df %>%
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
filter(Acreage > 0)
if (nrow(field_data) == 0) {
message("No valid field data for farm-level aggregation")
return(NULL)
}
farm_summary <- list()
# 1. PHASE DISTRIBUTION
phase_dist <- field_data %>%
group_by(Phase) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Phase)
farm_summary$phase_distribution <- phase_dist
# 2. STATUS ALERT DISTRIBUTION
status_dist <- field_data %>%
group_by(Status_Alert) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Status_Alert)
farm_summary$status_distribution <- status_dist
# 3. CLOUD COVERAGE DISTRIBUTION
cloud_dist <- field_data %>%
group_by(Cloud_category) %>%
summarise(
num_fields = n(),
acreage = sum(Acreage, na.rm = TRUE),
.groups = 'drop'
) %>%
rename(Category = Cloud_category)
farm_summary$cloud_distribution <- cloud_dist
# 4. OVERALL STATISTICS
farm_summary$overall_stats <- data.frame(
total_fields = nrow(field_data),
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
week = current_week,
year = current_year,
date = as.character(end_date)
)
# Print summaries
cat("\n--- PHASE DISTRIBUTION ---\n")
print(phase_dist)
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
print(status_dist)
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
print(cloud_dist)
cat("\n--- OVERALL FARM STATISTICS ---\n")
print(farm_summary$overall_stats)
return(farm_summary)
}
# ============================================================================
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
# ============================================================================
#' Main orchestrator for CANE_SUPPLY per-field KPI workflow
#'
#' This function coordinates all KPI calculations for the per-field analysis workflow.
#' It loads historical data, calculates current/previous week statistics, computes
#' all 22 KPI columns, and aggregates farm-level summaries.
#'
#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.)
#' @param client_config List with workflow configuration (script_91_compatible, outputs)
#' @param end_date Date object for current report date
#' @param project_dir Character project identifier
#' @param weekly_mosaic Character path to weekly mosaic directory
#' @param daily_vals_dir Character path to daily values directory
#' @param field_boundaries_sf sf object with field geometries
#' @param data_dir Character path to data directory
#' @return List with field_analysis_df, farm_kpi_results, export_paths
calculate_field_analysis_cane_supply <- function(setup,
client_config,
end_date,
project_dir,
weekly_mosaic,
daily_vals_dir,
field_boundaries_sf,
data_dir) {
message("\n", strrep("=", 70))
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
message(strrep("=", 70))
reports_dir <- file.path(setup$reports_dir, "kpis")
# ========== PHASE 1: WEEKLY ANALYSIS SETUP ==========
message("\n", strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
message(strrep("-", 70))
weeks <- calculate_week_numbers(end_date)
current_week <- weeks$current_week
current_year <- weeks$current_year
previous_week <- weeks$previous_week
previous_year <- weeks$previous_year
message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))
# Find per-field weekly mosaics
message("Finding per-field weekly mosaics...")
if (!dir.exists(weekly_mosaic)) {
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
"\nScript 40 (mosaic creation) must be run first."))
}
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
if (length(field_dirs) == 0) {
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
}
# Verify we have mosaics for this week
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
per_field_files <- c()
for (field in field_dirs) {
field_mosaic_dir <- file.path(weekly_mosaic, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files <- c(per_field_files, files)
}
}
if (length(per_field_files) == 0) {
stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
"\nExpected pattern:", single_file_pattern,
"\nChecked:", weekly_mosaic))
}
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
# ========== PHASE 2: LOAD HISTORICAL DATA ==========
message("\nLoading historical field data for trend calculations...")
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
historical_data <- load_historical_field_data(
project_dir, current_week, current_year, reports_dir,
num_weeks = num_weeks_to_load,
auto_generate = allow_auto_gen,
field_boundaries_sf = field_boundaries_sf,
daily_vals_dir = daily_vals_dir
)
# ========== PHASE 3: LOAD PLANTING DATES ==========
message("\nLoading harvest data from harvest.xlsx for planting dates...")
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
harvesting_data <- tryCatch({
if (file.exists(harvest_file_path)) {
harvest_raw <- readxl::read_excel(harvest_file_path)
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
harvest_raw
} else {
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
NULL
}
}, error = function(e) {
message(paste(" ERROR loading harvest.xlsx:", e$message))
NULL
})
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
message("WARNING: No planting dates available. Using NA for all fields.")
planting_dates <- data.frame(
field_id = field_boundaries_sf$field,
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
stringsAsFactors = FALSE
)
}
# ========== PHASE 4: CALCULATE WEEKLY STATISTICS ==========
message("\nUsing modular RDS-based approach for weekly statistics...")
# Current week
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
current_stats <- load_or_calculate_weekly_stats(
week_num = current_week,
year = current_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = weekly_mosaic,
reports_dir = reports_dir,
report_date = end_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
# Previous week
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
prev_report_date <- end_date - 7
prev_stats <- load_or_calculate_weekly_stats(
week_num = previous_week,
year = previous_year,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
mosaic_dir = weekly_mosaic,
reports_dir = reports_dir,
report_date = prev_report_date
)
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
# ========== PHASE 5: CALCULATE TRENDS ==========
message("\n3. Calculating trend columns...")
current_stats <- calculate_kpi_trends(
current_stats, prev_stats,
project_dir = project_dir,
reports_dir = reports_dir,
current_week = current_week,
year = current_year
)
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
# ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
harvest_prob_file <- file.path(harvest_prob_dir,
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
message(paste(" Looking for:", harvest_prob_file))
imminent_prob_data <- tryCatch({
if (file.exists(harvest_prob_file)) {
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
col_types = readr::cols(.default = readr::col_character()))
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
prob_df %>%
select(field, imminent_prob, detected_prob) %>%
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
} else {
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
NULL
}
}, error = function(e) {
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
NULL
})
# ========== PHASE 7: CALCULATE GAP SCORES ==========
gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf)
# ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ==========
field_analysis_df <- calculate_all_field_kpis(
current_stats = current_stats,
planting_dates = planting_dates,
imminent_prob_data = imminent_prob_data,
gap_scores_df = gap_scores_df,
field_boundaries_sf = field_boundaries_sf,
end_date = end_date
)
# ========== PHASE 9: EXPORT PER-FIELD RESULTS ==========
export_paths <- export_field_analysis_excel(
field_analysis_df,
NULL,
project_dir,
current_week,
current_year,
reports_dir
)
# cat("\n--- Per-field Results (first 10) ---\n")
# available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
# available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
# if (length(available_cols) > 0) {
# print(head(field_analysis_df[, available_cols], 10))
# }
# # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ==========
# farm_kpi_results <- calculate_farm_level_kpis(
# field_analysis_df,
# current_week,
# current_year,
# end_date
# )
}
# ============================================================================
# FUTURE EXTENSION POINTS
# ============================================================================
# Placeholder for ANGATA-specific utilities that may be added in future:
# - Custom yield models based on ANGATA historical data
# - Field condition thresholds specific to ANGATA growing practices
# - Integration with ANGATA harvest scheduling system
# - WhatsApp messaging templates for ANGATA supply chain stakeholders
# - Cost/benefit analysis for ANGATA operational decisions
# These functions can be added here as ANGATA requirements evolve.