SmartCane/r_app/80_utils_common.R

1316 lines
45 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 80_UTILS_COMMON.R
# ============================================================================
# SHARED UTILITY FUNCTIONS FOR ALL CLIENT TYPES (SCRIPT 80)
#
# Contains helper and infrastructure functions used by both AURA and ANGATA workflows:
# - Statistical categorization and calculations
# - Tile operations and data loading
# - Field statistics extraction
# - Week/year calculations for consistent date handling
# - Excel/CSV/RDS export utilities
#
# Used by: 80_calculate_kpis.R, all client-specific utils files
# ============================================================================
# ============================================================================
# CONSTANTS (from 80_calculate_kpis.R)
# ============================================================================
# Four-week trend thresholds
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5
# CV Trend thresholds (8-week slope interpretation)
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03
CV_SLOPE_IMPROVEMENT_MIN <- -0.02
CV_SLOPE_IMPROVEMENT_MAX <- -0.01
CV_SLOPE_HOMOGENOUS_MIN <- -0.01
CV_SLOPE_HOMOGENOUS_MAX <- 0.01
CV_SLOPE_PATCHINESS_MIN <- 0.01
CV_SLOPE_PATCHINESS_MAX <- 0.02
CV_SLOPE_SEVERE_MIN <- 0.02
# Percentile calculations
CI_PERCENTILE_LOW <- 0.10
CI_PERCENTILE_HIGH <- 0.90
# Phase definitions (used by get_phase_by_age)
PHASE_DEFINITIONS <- data.frame(
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
age_start = c(0, 4, 17, 39),
age_end = c(6, 16, 39, 200),
stringsAsFactors = FALSE
)
# ============================================================================
# WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts)
# ============================================================================
#' Calculate week and year for a given lookback offset
#' This function handles ISO 8601 week numbering with proper year wrapping
#' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025)
#'
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year (from format(..., "%G"))
#' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.)
#'
#' @return List with: week (ISO week number), year (ISO week year)
#'
#' @details
#' This is the authoritative week/year calculation function.
#' Used by:
#' - load_historical_field_data() - to find RDS/CSV files for 4-week lookback
#' - Script 80 main - to calculate previous week with year wrapping
#' - Any other script needing to walk backwards through weeks
#'
#' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025)
calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) {
target_week <- current_week - offset_weeks
target_year <- current_year
# Handle wrapping: when going back from week 1, wrap to week 52 of previous year
while (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
return(list(week = target_week, year = target_year))
}
# ============================================================================
# TILE-AWARE HELPER FUNCTIONS
# ============================================================================
#' Get tile IDs that intersect with a field geometry
get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) {
if (inherits(field_geom, "sf")) {
field_bbox <- sf::st_bbox(field_geom)
field_xmin <- field_bbox["xmin"]
field_xmax <- field_bbox["xmax"]
field_ymin <- field_bbox["ymin"]
field_ymax <- field_bbox["ymax"]
} else if (inherits(field_geom, "SpatVector")) {
field_bbox <- terra::ext(field_geom)
field_xmin <- field_bbox$xmin
field_xmax <- field_bbox$xmax
field_ymin <- field_bbox$ymin
field_ymax <- field_bbox$ymax
} else {
stop("field_geom must be sf or terra::vect object")
}
intersecting_tiles <- tile_grid$id[
!(tile_grid$xmax < field_xmin |
tile_grid$xmin > field_xmax |
tile_grid$ymax < field_ymin |
tile_grid$ymin > field_ymax)
]
return(as.numeric(intersecting_tiles))
}
#' Load and merge tiles for a specific field
load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) {
if (length(tile_ids) == 0) {
return(NULL)
}
tiles_list <- list()
for (tile_id in sort(tile_ids)) {
tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id)
tile_path <- file.path(mosaic_dir, tile_filename)
if (file.exists(tile_path)) {
tryCatch({
tile_rast <- terra::rast(tile_path)
ci_band <- terra::subset(tile_rast, 5)
tiles_list[[length(tiles_list) + 1]] <- ci_band
}, error = function(e) {
message(paste(" Warning: Could not load tile", tile_id, ":", e$message))
})
}
}
if (length(tiles_list) == 0) {
return(NULL)
}
if (length(tiles_list) == 1) {
return(tiles_list[[1]])
} else {
tryCatch({
rsrc <- terra::sprc(tiles_list)
merged <- terra::mosaic(rsrc, fun = "max")
return(merged)
}, error = function(e) {
message(paste(" Warning: Could not merge tiles:", e$message))
return(tiles_list[[1]])
})
}
}
#' Build tile grid from available tile files
build_tile_grid <- function(mosaic_dir, week_num, year) {
# Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/)
detected_grid_size <- NA
if (dir.exists(mosaic_dir)) {
subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
detected_grid_size <- grid_patterns[1]
mosaic_dir <- file.path(mosaic_dir, detected_grid_size)
message(paste(" Using grid-size subdirectory:", detected_grid_size))
}
}
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
if (length(tile_files) == 0) {
stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
}
tile_grid <- data.frame(
id = integer(),
xmin = numeric(),
xmax = numeric(),
ymin = numeric(),
ymax = numeric(),
stringsAsFactors = FALSE
)
for (tile_file in tile_files) {
tryCatch({
matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file)))
if (length(matches) > 0) {
tile_id <- as.integer(sub("_|\\.tif", "", matches[1]))
tile_rast <- terra::rast(tile_file)
tile_ext <- terra::ext(tile_rast)
tile_grid <- rbind(tile_grid, data.frame(
id = tile_id,
xmin = tile_ext$xmin,
xmax = tile_ext$xmax,
ymin = tile_ext$ymin,
ymax = tile_ext$ymax,
stringsAsFactors = FALSE
))
}
}, error = function(e) {
message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message))
})
}
if (nrow(tile_grid) == 0) {
stop("Could not extract extents from any tile files")
}
return(list(
tile_grid = tile_grid,
mosaic_dir = mosaic_dir,
grid_size = detected_grid_size
))
}
# ============================================================================
# STATISTICAL CATEGORIZATION FUNCTIONS
# ============================================================================
#' Categorize four-week CI trend
categorize_four_week_trend <- function(ci_values_list) {
if (is.null(ci_values_list) || length(ci_values_list) < 2) {
return(NA_character_)
}
ci_values_list <- ci_values_list[!is.na(ci_values_list)]
if (length(ci_values_list) < 2) {
return(NA_character_)
}
weekly_changes <- diff(ci_values_list)
avg_weekly_change <- mean(weekly_changes, na.rm = TRUE)
if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) {
return("strong growth")
} else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN &&
avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) {
return("growth")
} else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) {
return("no growth")
} else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN &&
avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
return("decline")
} else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
return("strong decline")
} else {
return("no growth")
}
}
#' Round cloud coverage to interval categories
round_cloud_to_intervals <- function(cloud_pct_clear) {
if (is.na(cloud_pct_clear)) {
return(NA_character_)
}
if (cloud_pct_clear < 10) return("0-10%")
if (cloud_pct_clear < 20) return("10-20%")
if (cloud_pct_clear < 30) return("20-30%")
if (cloud_pct_clear < 40) return("30-40%")
if (cloud_pct_clear < 50) return("40-50%")
if (cloud_pct_clear < 60) return("50-60%")
if (cloud_pct_clear < 70) return("60-70%")
if (cloud_pct_clear < 80) return("70-80%")
if (cloud_pct_clear < 90) return("80-90%")
if (cloud_pct_clear < 95) return("90-95%")
return("95-100%")
}
#' Get CI percentile range (10th to 90th)
get_ci_percentiles <- function(ci_values) {
if (is.null(ci_values) || length(ci_values) == 0) {
return(NA_character_)
}
ci_values <- ci_values[!is.na(ci_values)]
if (length(ci_values) == 0) {
return(NA_character_)
}
p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE)
p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE)
return(sprintf("%.1f-%.1f", p10, p90))
}
#' Calculate short-term CV trend (current week vs previous week)
calculate_cv_trend <- function(cv_current, cv_previous) {
if (is.na(cv_current) || is.na(cv_previous)) {
return(NA_real_)
}
return(round(cv_current - cv_previous, 4))
}
#' Calculate four-week CI trend
calculate_four_week_trend <- function(mean_ci_values) {
if (is.null(mean_ci_values) || length(mean_ci_values) == 0) {
return(NA_real_)
}
ci_clean <- mean_ci_values[!is.na(mean_ci_values)]
if (length(ci_clean) < 2) {
return(NA_real_)
}
trend <- ci_clean[length(ci_clean)] - ci_clean[1]
return(round(trend, 2))
}
#' Categorize CV slope (8-week regression) into field uniformity interpretation
categorize_cv_slope <- function(slope) {
if (is.na(slope)) {
return(NA_character_)
}
if (slope <= CV_SLOPE_IMPROVEMENT_MIN) {
return("Excellent uniformity")
} else if (slope < CV_SLOPE_HOMOGENOUS_MIN) {
return("Homogenous growth")
} else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) {
return("Homogenous growth")
} else if (slope <= CV_SLOPE_PATCHINESS_MAX) {
return("Minor patchiness")
} else {
return("Severe fragmentation")
}
}
#' Calculate 8-week CV trend via linear regression slope
calculate_cv_trend_long_term <- function(cv_values) {
if (is.null(cv_values) || length(cv_values) == 0) {
return(NA_real_)
}
cv_clean <- cv_values[!is.na(cv_values)]
if (length(cv_clean) < 2) {
return(NA_real_)
}
weeks <- seq_along(cv_clean)
tryCatch({
lm_fit <- lm(cv_clean ~ weeks)
slope <- coef(lm_fit)["weeks"]
return(round(as.numeric(slope), 4))
}, error = function(e) {
return(NA_real_)
})
}
#' Calculate Gap Filling Score KPI (2σ method)
#' @param ci_raster Current week CI raster
#' @param field_boundaries Field boundaries
#' @return Data frame with field-level gap filling scores
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
safe_log("Calculating Gap Filling Score KPI (placeholder)")
# Handle both sf and SpatVector inputs
if (!inherits(field_boundaries, "SpatVector")) {
field_boundaries_vect <- terra::vect(field_boundaries)
} else {
field_boundaries_vect <- field_boundaries
}
# Ensure field_boundaries_vect is valid and matches field_boundaries dimensions
n_fields_vect <- length(field_boundaries_vect)
n_fields_sf <- nrow(field_boundaries)
if (n_fields_sf != n_fields_vect) {
warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length."))
}
field_results <- data.frame()
for (i in seq_len(nrow(field_boundaries))) {
field_name <- field_boundaries$field[i]
sub_field_name <- field_boundaries$sub_field[i]
field_vect <- field_boundaries_vect[i]
# Extract CI values using helper function
ci_values <- extract_ci_values(ci_raster, field_vect)
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
if (length(valid_values) > 1) {
# Gap score using 2σ below median to detect outliers
median_ci <- median(valid_values)
sd_ci <- sd(valid_values)
outlier_threshold <- median_ci - (2 * sd_ci)
low_ci_pixels <- sum(valid_values < outlier_threshold)
total_pixels <- length(valid_values)
gap_score <- (low_ci_pixels / total_pixels) * 100
# Classify gap severity
gap_level <- dplyr::case_when(
gap_score < 10 ~ "Minimal",
gap_score < 25 ~ "Moderate",
TRUE ~ "Significant"
)
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
gap_level = gap_level,
gap_score = gap_score,
mean_ci = mean(valid_values),
outlier_threshold = outlier_threshold
))
} else {
# Not enough valid data, fill with NA row
field_results <- rbind(field_results, data.frame(
field = field_name,
sub_field = sub_field_name,
gap_level = NA_character_,
gap_score = NA_real_,
mean_ci = NA_real_,
outlier_threshold = NA_real_
))
}
}
}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
#' Get crop phase by age in weeks
get_phase_by_age <- function(age_weeks) {
if (is.na(age_weeks)) return(NA_character_)
for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
return(PHASE_DEFINITIONS$phase[i])
}
}
return("Unknown")
}
#' Get status trigger based on CI values and field age
get_status_trigger <- function(ci_values, ci_change, age_weeks) {
if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)
ci_values <- ci_values[!is.na(ci_values)]
if (length(ci_values) == 0) return(NA_character_)
pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
mean_ci <- mean(ci_values, na.rm = TRUE)
if (age_weeks >= 0 && age_weeks <= 6) {
if (pct_at_or_above_2 >= 70) {
return("germination_complete")
} else if (pct_above_2 > 10) {
return("germination_started")
}
}
if (age_weeks >= 45) {
return("harvest_ready")
}
if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
return("stress_detected_whole_field")
}
if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
return("strong_recovery")
}
if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
return("growth_on_track")
}
if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
return("maturation_progressing")
}
return(NA_character_)
}
#' Extract planting dates from harvesting data
extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
message("Warning: No harvesting data available - planting dates will be NA.")
if (!is.null(field_boundaries_sf)) {
return(data.frame(
field_id = field_boundaries_sf$field,
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
stringsAsFactors = FALSE
))
}
return(NULL)
}
tryCatch({
planting_dates <- harvesting_data %>%
arrange(field, desc(season_start)) %>%
distinct(field, .keep_all = TRUE) %>%
select(field, season_start) %>%
rename(field_id = field, planting_date = season_start) %>%
filter(!is.na(planting_date)) %>%
as.data.frame()
message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx"))
return(planting_dates)
}, error = function(e) {
message(paste("Error extracting planting dates:", e$message))
return(NULL)
})
}
# ============================================================================
# FIELD STATISTICS EXTRACTION
# ============================================================================
#' Extract CI statistics for all fields from a single CI raster band
extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) {
extract_result <- terra::extract(ci_band, field_boundaries_sf)
stats_list <- list()
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
field_pixels <- extract_result[extract_result$ID == field_idx, 2]
pixels <- as.numeric(field_pixels[!is.na(field_pixels)])
if (length(pixels) == 0) {
stats_list[[field_idx]] <- data.frame(
field_idx = field_idx,
mean_ci = NA_real_,
cv = NA_real_,
p10 = NA_real_,
p90 = NA_real_,
min_ci = NA_real_,
max_ci = NA_real_,
pixel_count_valid = 0,
pixel_count_total = 0,
stringsAsFactors = FALSE
)
next
}
mean_val <- mean(pixels, na.rm = TRUE)
cv_val <- if (mean_val > 0) sd(pixels, na.rm = TRUE) / mean_val else NA_real_
p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]]
p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]]
min_val <- min(pixels, na.rm = TRUE)
max_val <- max(pixels, na.rm = TRUE)
stats_list[[field_idx]] <- data.frame(
field_idx = field_idx,
mean_ci = mean_val,
cv = cv_val,
p10 = p10_val,
p90 = p90_val,
min_ci = min_val,
max_ci = max_val,
pixel_count_valid = length(pixels),
pixel_count_total = nrow(extract_result[extract_result$ID == field_idx, ]),
stringsAsFactors = FALSE
)
}
return(dplyr::bind_rows(stats_list))
}
# ============================================================================
# EXPORT FUNCTIONS (USED BY ALL CLIENTS)
# ============================================================================
#' Generate summary statistics from field analysis data
generate_field_analysis_summary <- function(field_df) {
message("Generating summary statistics...")
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE)
tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE)
grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE)
maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE)
unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE)
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
total_fields <- nrow(field_df)
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
summary_df <- data.frame(
Category = c(
"--- PHASE DISTRIBUTION ---",
"Germination",
"Tillering",
"Grand Growth",
"Maturation",
"Unknown phase",
"--- STATUS TRIGGERS ---",
"Harvest ready",
"Stress detected",
"Strong recovery",
"Growth on track",
"Germination complete",
"Germination started",
"No trigger",
"--- CLOUD COVERAGE (FIELDS) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- CLOUD COVERAGE (ACREAGE) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- TOTAL ---",
"Total Acreage"
),
Acreage = c(
NA,
round(germination_acreage, 2),
round(tillering_acreage, 2),
round(grand_growth_acreage, 2),
round(maturation_acreage, 2),
round(unknown_phase_acreage, 2),
NA,
round(harvest_ready_acreage, 2),
round(stress_acreage, 2),
round(recovery_acreage, 2),
round(growth_on_track_acreage, 2),
round(germination_complete_acreage, 2),
round(germination_started_acreage, 2),
round(no_trigger_acreage, 2),
NA,
paste0(clear_fields, " fields"),
paste0(partial_fields, " fields"),
paste0(no_image_fields, " fields"),
NA,
round(clear_acreage, 2),
round(partial_acreage, 2),
round(no_image_acreage, 2),
NA,
round(total_acreage, 2)
),
stringsAsFactors = FALSE
)
return(summary_df)
}
#' Export field analysis to Excel, CSV, and RDS formats
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
message("Exporting per-field analysis to Excel, CSV, and RDS...")
field_df_rounded <- field_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
# Handle NULL summary_df
summary_df_rounded <- if (!is.null(summary_df)) {
summary_df %>%
mutate(across(where(is.numeric), ~ round(., 2)))
} else {
NULL
}
output_subdir <- file.path(reports_dir, "field_analysis")
if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE)
}
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
# Build sheets list dynamically
sheets <- list(
"Field Data" = field_df_rounded
)
if (!is.null(summary_df_rounded)) {
sheets[["Summary"]] <- summary_df_rounded
}
write_xlsx(sheets, excel_path)
message(paste("✓ Field analysis Excel exported to:", excel_path))
kpi_data <- list(
field_analysis = field_df_rounded,
field_analysis_summary = summary_df_rounded,
metadata = list(
current_week = current_week,
year = year,
project = project_dir,
created_at = Sys.time()
)
)
rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds")
rds_path <- file.path(output_subdir, rds_filename)
saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
csv_path <- file.path(output_subdir, csv_filename)
write_csv(field_df_rounded, csv_path)
message(paste("✓ Field analysis CSV exported to:", csv_path))
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
}
# ============================================================================
# ADDITIONAL CRITICAL FUNCTIONS FROM 80_weekly_stats_utils.R (REQUIRED BY 80_calculate_kpis.R)
# ============================================================================
#' Calculate statistics for all fields from weekly mosaics
calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
mosaic_dir, report_date = Sys.Date()) {
message(paste("Calculating statistics for all fields - Week", week_num, year))
# Per-field mode: look in per-field subdirectories
single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year)
# Find all field subdirectories with mosaics for this week
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
per_field_files <- list()
for (field in field_dirs) {
field_mosaic_dir <- file.path(mosaic_dir, field)
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
if (length(files) > 0) {
per_field_files[[field]] <- files[1] # Take first match for this field
}
}
if (length(per_field_files) == 0) {
stop(paste("No per-field mosaic files found for week", week_num, year, "in", mosaic_dir))
}
message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
results_list <- list()
# Initialize progress bar
pb <- progress::progress_bar$new(
format = " [:bar] :percent | Field :current/:total",
total = length(per_field_files),
width = 60
)
# Process each field's mosaic
for (field_idx in seq_along(per_field_files)) {
pb$tick() # Update progress bar
field_name <- names(per_field_files)[field_idx]
field_file <- per_field_files[[field_name]]
tryCatch({
current_rast <- terra::rast(field_file)
ci_band <- current_rast[["CI"]]
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
message(paste(" [SKIP] Field", field_name, "- CI band not found"))
next
}
# Extract CI values for this field
field_boundary <- field_boundaries_sf[field_boundaries_sf$field == field_name, ]
if (nrow(field_boundary) == 0) {
message(paste(" [SKIP] Field", field_name, "- not in field boundaries"))
next
}
extracted <- terra::extract(ci_band, field_boundary, na.rm = FALSE)
if (nrow(extracted) == 0 || all(is.na(extracted$CI))) {
message(paste(" [SKIP] Field", field_name, "- no CI values found"))
next
}
ci_vals <- extracted$CI[!is.na(extracted$CI)]
if (length(ci_vals) == 0) {
next
}
# Calculate statistics
mean_ci <- mean(ci_vals, na.rm = TRUE)
ci_std <- sd(ci_vals, na.rm = TRUE)
cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
range_min <- min(ci_vals, na.rm = TRUE)
range_max <- max(ci_vals, na.rm = TRUE)
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
ci_percentiles_str <- get_ci_percentiles(ci_vals)
num_pixels_total <- length(ci_vals)
num_pixels_gte_2 <- sum(ci_vals >= 2)
pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
num_total <- nrow(extracted)
num_data <- sum(!is.na(extracted$CI))
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
cloud_cat <- if (num_data == 0) "No image available"
else if (pct_clear >= 95) "Clear view"
else "Partial coverage"
# Add to results
results_list[[length(results_list) + 1]] <- data.frame(
Field_id = field_name,
Mean_CI = round(mean_ci, 2),
CV = round(cv * 100, 2),
CI_range = range_str,
CI_Percentiles = ci_percentiles_str,
Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
Cloud_pct_clear = pct_clear,
Cloud_category = cloud_cat,
stringsAsFactors = FALSE
)
}, error = function(e) {
message(paste(" [ERROR] Field", field_name, ":", e$message))
})
}
if (length(results_list) == 0) {
stop(paste("No fields processed successfully for week", week_num))
}
stats_df <- dplyr::bind_rows(results_list)
message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields"))
return(stats_df)
}
#' Load or calculate weekly statistics (with RDS caching)
load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf,
mosaic_dir, reports_dir, report_date = Sys.Date()) {
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
rds_path <- file.path(reports_dir, "field_stats", rds_filename)
if (file.exists(rds_path)) {
message(paste("Loading cached statistics from:", basename(rds_path)))
return(readRDS(rds_path))
}
message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date)
output_dir <- file.path(reports_dir, "field_stats")
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
}
saveRDS(stats_df, rds_path)
message(paste("Saved weekly statistics RDS:", basename(rds_path)))
csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year)
csv_path <- file.path(output_dir, csv_filename)
readr::write_csv(stats_df, csv_path)
message(paste("Saved weekly statistics CSV:", basename(csv_path)))
return(stats_df)
}
#' Load historical field data from CSV (4-week lookback)
load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) {
historical_data <- list()
loaded_weeks <- c()
missing_weeks <- c()
for (lookback in 0:(num_weeks - 1)) {
target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback)
target_week <- target$week
target_year <- target$year
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
csv_path <- file.path(reports_dir, "field_analysis", csv_filename)
if (file.exists(csv_path)) {
tryCatch({
data <- readr::read_csv(csv_path, show_col_types = FALSE,
col_types = readr::cols(.default = readr::col_character()))
historical_data[[lookback + 1]] <- list(
week = target_week,
year = target_year,
data = data
)
loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
}, error = function(e) {
message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message))
missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
})
} else {
missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
}
}
if (length(historical_data) == 0) {
message(paste("Error: No historical field data found"))
return(NULL)
}
message(paste("✓ Loaded", length(historical_data), "weeks of historical data:",
paste(loaded_weeks, collapse = ", ")))
return(historical_data)
}
#' Calculate KPI trends (CI change, CV trends, 4-week and 8-week trends)
calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
project_dir = NULL, reports_dir = NULL,
current_week = NULL, year = NULL) {
message("Calculating KPI trends from current and previous week data")
current_stats$Weekly_ci_change <- NA_real_
current_stats$CV_Trend_Short_Term <- NA_real_
current_stats$Four_week_trend <- NA_real_
current_stats$CV_Trend_Long_Term <- NA_real_
current_stats$nmr_of_weeks_analysed <- 1L
if (is.null(prev_stats) || nrow(prev_stats) == 0) {
message(" No previous week data available - using defaults")
return(current_stats)
}
message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns"))
prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id)
prev_field_analysis <- NULL
tryCatch({
analysis_dir <- file.path(reports_dir, "field_analysis")
if (dir.exists(analysis_dir)) {
analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
if (length(analysis_files) > 0) {
recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)]
prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE,
col_types = readr::cols(.default = readr::col_character()),
col_select = c(Field_id, nmr_of_weeks_analysed, Phase))
}
}
}, error = function(e) {
message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message))
})
if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed"))
}
historical_4weeks <- list()
historical_8weeks <- list()
if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) {
message(" Loading historical field_stats for 4-week and 8-week trends...")
for (lookback in 1:4) {
target_week <- current_week - lookback
target_year <- year
if (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "field_stats", rds_filename)
if (file.exists(rds_path)) {
tryCatch({
stats_data <- readRDS(rds_path)
historical_4weeks[[length(historical_4weeks) + 1]] <- list(week = target_week, stats = stats_data)
}, error = function(e) {
message(paste(" Warning: Could not load week", target_week, ":", e$message))
})
}
}
for (lookback in 1:8) {
target_week <- current_week - lookback
target_year <- year
if (target_week < 1) {
target_week <- target_week + 52
target_year <- target_year - 1
}
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
rds_path <- file.path(reports_dir, "field_stats", rds_filename)
if (file.exists(rds_path)) {
tryCatch({
stats_data <- readRDS(rds_path)
historical_8weeks[[length(historical_8weeks) + 1]] <- list(week = target_week, stats = stats_data)
}, error = function(e) {
# Silently skip
})
}
}
if (length(historical_4weeks) > 0) {
message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend"))
}
if (length(historical_8weeks) > 0) {
message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend"))
}
}
cv_trends_calculated <- 0
four_week_trends_calculated <- 0
cv_long_term_calculated <- 0
for (i in seq_len(nrow(current_stats))) {
field_id <- current_stats$Field_id[i]
prev_idx <- prev_lookup[field_id]
if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) {
prev_row <- prev_stats[prev_idx, , drop = FALSE]
prev_ci <- prev_row$Mean_CI[1]
if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) {
current_stats$Weekly_ci_change[i] <- round(current_stats$Mean_CI[i] - prev_ci, 2)
}
prev_cv <- prev_row$CV[1]
if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) {
current_stats$CV_Trend_Short_Term[i] <- calculate_cv_trend(current_stats$CV[i], prev_cv)
cv_trends_calculated <- cv_trends_calculated + 1
}
if (length(historical_4weeks) > 0) {
ci_values_4week <- numeric()
for (hist_idx in rev(seq_along(historical_4weeks))) {
hist_data <- historical_4weeks[[hist_idx]]$stats
hist_field <- which(hist_data$Field_id == field_id)
if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) {
ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]])
}
}
ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i])
if (length(ci_values_4week) >= 2) {
current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week)
four_week_trends_calculated <- four_week_trends_calculated + 1
}
}
if (length(historical_8weeks) > 0) {
cv_values_8week <- numeric()
for (hist_idx in rev(seq_along(historical_8weeks))) {
hist_data <- historical_8weeks[[hist_idx]]$stats
hist_field <- which(hist_data$Field_id == field_id)
if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) {
cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]])
}
}
cv_values_8week <- c(cv_values_8week, current_stats$CV[i])
if (length(cv_values_8week) >= 2) {
slope <- calculate_cv_trend_long_term(cv_values_8week)
current_stats$CV_Trend_Long_Term[i] <- slope
cv_long_term_calculated <- cv_long_term_calculated + 1
}
}
if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
prev_analysis_row <- prev_field_analysis %>% dplyr::filter(Field_id == field_id)
if (nrow(prev_analysis_row) > 0) {
prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1]
if (!is.na(prev_nmr_weeks_analysis)) {
current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L
} else {
current_stats$nmr_of_weeks_analysed[i] <- 1L
}
}
}
}
}
message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields"))
message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields"))
message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields"))
return(current_stats)
}
# ============================================================================
# INTERNAL HELPER FUNCTIONS (from 80_kpi_utils.R) - DO NOT DELETE
# ============================================================================
# Spatial autocorrelation thresholds for field pattern analysis
MORAN_THRESHOLD_HIGH <- 0.95 # Very strong clustering (problematic patterns)
MORAN_THRESHOLD_MODERATE <- 0.85 # Moderate clustering
MORAN_THRESHOLD_LOW <- 0.7 # Normal field continuity
#' Calculate coefficient of variation for uniformity assessment
calculate_cv <- function(values) {
values <- values[!is.na(values) & is.finite(values)]
if (length(values) < 2) return(NA)
cv <- sd(values) / mean(values)
return(cv)
}
#' Calculate percentage of field with positive vs negative change
calculate_change_percentages <- function(current_values, previous_values) {
if (length(current_values) != length(previous_values)) {
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
}
change_values <- current_values - previous_values
valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)]
if (length(valid_changes) < 2) {
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
}
positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100
negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100
stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100
return(list(
positive_pct = positive_pct,
negative_pct = negative_pct,
stable_pct = stable_pct
))
}
#' Calculate spatial autocorrelation (Moran's I) for a field
calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) {
tryCatch({
field_raster <- terra::crop(ci_raster, field_boundary)
field_raster <- terra::mask(field_raster, field_boundary)
raster_points <- terra::as.points(field_raster, na.rm = TRUE)
if (length(raster_points) < 10) {
return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data"))
}
points_sf <- sf::st_as_sf(raster_points)
coords <- sf::st_coordinates(points_sf)
k_neighbors <- min(8, max(4, floor(nrow(coords) / 10)))
knn_nb <- spdep::knearneigh(coords, k = k_neighbors)
knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE)
ci_values <- points_sf[[1]]
moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE)
morans_i <- moran_result$estimate[1]
p_value <- moran_result$p.value
interpretation <- if (is.na(morans_i)) {
"insufficient_data"
} else if (p_value > 0.05) {
"random"
} else if (morans_i > MORAN_THRESHOLD_HIGH) {
"very_strong_clustering"
} else if (morans_i > MORAN_THRESHOLD_MODERATE) {
"strong_clustering"
} else if (morans_i > MORAN_THRESHOLD_LOW) {
"normal_continuity"
} else if (morans_i > 0.3) {
"weak_clustering"
} else if (morans_i < -0.3) {
"dispersed"
} else {
"low_autocorrelation"
}
return(list(morans_i = morans_i, p_value = p_value, interpretation = interpretation))
}, error = function(e) {
warning(paste("Error calculating spatial autocorrelation:", e$message))
return(list(morans_i = NA, p_value = NA, interpretation = "error"))
})
}
#' Extract CI band from multi-band raster
extract_ci_values <- function(ci_raster, field_vect) {
extracted <- terra::extract(ci_raster, field_vect, fun = NULL)
if ("CI" %in% names(extracted)) {
return(extracted[, "CI"])
} else if (ncol(extracted) > 1) {
return(extracted[, ncol(extracted)])
} else {
return(extracted[, 1])
}
}
#' Calculate current and previous week numbers using ISO 8601
calculate_week_numbers <- function(report_date = Sys.Date()) {
current_week <- as.numeric(format(report_date, "%V"))
current_year <- as.numeric(format(report_date, "%G"))
previous_week <- current_week - 1
previous_year <- current_year
if (previous_week < 1) {
previous_week <- 52
previous_year <- current_year - 1
}
return(list(
current_week = current_week,
previous_week = previous_week,
current_year = current_year,
previous_year = previous_year
))
}
#' Load field CI raster (handles single-file and per-field architectures)
load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) {
is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field")
if (is_per_field) {
per_field_dir <- attr(ci_raster_or_obj, "per_field_dir")
week_file <- attr(ci_raster_or_obj, "week_file")
field_mosaic_path <- file.path(per_field_dir, field_name, week_file)
if (file.exists(field_mosaic_path)) {
tryCatch({
field_mosaic <- terra::rast(field_mosaic_path)
if (terra::nlyr(field_mosaic) >= 5) {
return(field_mosaic[[5]])
} else {
return(field_mosaic[[1]])
}
}, error = function(e) {
return(NULL)
})
} else {
return(NULL)
}
} else {
if (!is.null(field_vect)) {
return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE))
} else {
return(ci_raster_or_obj)
}
}
}
#' Load weekly CI mosaic (single-file or per-field)
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
week_path <- file.path(mosaic_dir, week_file)
if (file.exists(week_path)) {
tryCatch({
mosaic_raster <- terra::rast(week_path)
ci_raster <- mosaic_raster[[5]]
names(ci_raster) <- "CI"
return(ci_raster)
}, error = function(e) {
return(NULL)
})
}
if (dir.exists(mosaic_dir)) {
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
field_dirs <- field_dirs[field_dirs != ""]
found_any <- FALSE
for (field in field_dirs) {
field_mosaic_path <- file.path(mosaic_dir, field, week_file)
if (file.exists(field_mosaic_path)) {
found_any <- TRUE
break
}
}
if (found_any) {
dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA)
attr(dummy_raster, "per_field_dir") <- mosaic_dir
attr(dummy_raster, "week_file") <- week_file
attr(dummy_raster, "is_per_field") <- TRUE
return(dummy_raster)
}
}
return(NULL)
}
#' Prepare predictions with consistent naming
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}