- Renamed translation function from `t` to `tr_key` for clarity and consistency. - Updated all instances of translation calls in `90_CI_report_with_kpis_agronomic_support.Rmd` and `91_CI_report_with_kpis_cane_supply.Rmd` to use `tr_key`. - Introduced a new helper function `get_area_unit_label` to manage area unit preferences across the project. - Modified area calculations in `91_CI_report_with_kpis_cane_supply.Rmd` to utilize area from analysis data instead of recalculating. - Added area unit preference setting in `parameters_project.R` to allow for flexible reporting in either hectares or acres. - Updated `MANUAL_PIPELINE_RUNNER.R` to include language parameter for report generation. - Adjusted translations in the `translations.xlsx` file to reflect changes in the report structure.
1896 lines
71 KiB
R
1896 lines
71 KiB
R
# 80_UTILS_COMMON.R
|
||
# ============================================================================
|
||
# SHARED UTILITY FUNCTIONS FOR ALL CLIENT TYPES (SCRIPT 80)
|
||
#
|
||
# Contains helper and infrastructure functions used by both AURA and ANGATA workflows:
|
||
# - Statistical categorization and calculations
|
||
# - Tile operations and data loading
|
||
# - Field statistics extraction
|
||
# - Week/year calculations for consistent date handling
|
||
# - Excel/CSV/RDS export utilities
|
||
# - Yield prediction using ML models (Random Forest with Feature Selection)
|
||
#
|
||
# Used by: 80_calculate_kpis.R, all client-specific utils files
|
||
#
|
||
# NOTE: Libraries required by yield prediction (caret, CAST, here) are loaded
|
||
# in the main script 80_calculate_kpis.R, not here. This keeps dependencies
|
||
# centralized in the orchestrator script.
|
||
# ============================================================================
|
||
|
||
# ============================================================================
|
||
# LOAD PROJECT CONFIGURATION (Guard against re-sourcing)
|
||
# ============================================================================
|
||
# Ensure parameters_project.R has been sourced to provide global configuration
|
||
# (PROJECT, data_dir, field_boundaries_path, etc.). Use a sentinel to avoid double-sourcing.
|
||
if (!exists("PROJECT", envir = .GlobalEnv)) {
|
||
tryCatch({
|
||
source(here::here("r_app", "parameters_project.R"))
|
||
}, error = function(e) {
|
||
# Fallback: try relative path if here() doesn't work
|
||
tryCatch({
|
||
source("parameters_project.R")
|
||
}, error = function(e2) {
|
||
warning(paste("Could not source parameters_project.R:", e2$message,
|
||
"- using defaults or expecting caller to set PROJECT/data_dir"))
|
||
})
|
||
})
|
||
}
|
||
|
||
# ============================================================================
|
||
# CONSTANTS (from 80_calculate_kpis.R)
|
||
# ============================================================================
|
||
|
||
# Four-week trend thresholds (CI units/week) - SYMMETRIC by design
|
||
# Report mapping:
|
||
# Strong growth (↑↑): Slope ≥ 0.3
|
||
# Weak growth (↑): Slope 0.1–0.3
|
||
# Stable (→): Slope −0.1 to +0.1
|
||
# Weak Decline (↓): Slope −0.3 to −0.1
|
||
# Strong Decline (↓↓): Slope < −0.3
|
||
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.3
|
||
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
|
||
FOUR_WEEK_TREND_STABLE_THRESHOLD <- 0.1
|
||
FOUR_WEEK_TREND_WEAK_DECLINE_THRESHOLD <- -0.1 # upper bound of Weak Decline / lower bound of Stable
|
||
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.3
|
||
# ============================================================================
|
||
# AREA CALCULATION UNITS & CONVERSION
|
||
# ============================================================================
|
||
# Conversion constant: 1 hectare = 0.404686 acres (exact: 0.40468564224...)
|
||
HECTARE_TO_ACRE_CONVERSION <- 0.404686
|
||
|
||
# CV Trend thresholds (8-week slope interpretation)
|
||
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.03
|
||
CV_SLOPE_IMPROVEMENT_MIN <- -0.02
|
||
CV_SLOPE_IMPROVEMENT_MAX <- -0.01
|
||
CV_SLOPE_HOMOGENOUS_MIN <- -0.01
|
||
CV_SLOPE_HOMOGENOUS_MAX <- 0.01
|
||
CV_SLOPE_PATCHINESS_MIN <- 0.01
|
||
CV_SLOPE_PATCHINESS_MAX <- 0.02
|
||
CV_SLOPE_SEVERE_MIN <- 0.02
|
||
|
||
# Percentile calculations
|
||
CI_PERCENTILE_LOW <- 0.10
|
||
CI_PERCENTILE_HIGH <- 0.90
|
||
|
||
# Phase definitions (used by get_phase_by_age)
|
||
PHASE_DEFINITIONS <- data.frame(
|
||
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
|
||
age_start = c(0, 4, 17, 39),
|
||
age_end = c(6, 16, 39, 200),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
# ============================================================================
|
||
# AREA CALCULATION FUNCTIONS (Unified across all scripts)
|
||
# ============================================================================
|
||
|
||
#' Calculate field area from geometry in specified unit
|
||
#'
|
||
#' Unified function for calculating polygon area from sf or SpatVect geometries.
|
||
#' Uses equal-area projection (EPSG:6933) for accurate calculations across all zones.
|
||
#'
|
||
#' @param geometry sf or SpatVect object containing field polygons
|
||
#' If sf: must have geometry column (auto-detected)
|
||
#' If SpatVect: terra object with geometry
|
||
#' @param unit Character. Output unit: "hectare" (default) or "acre"
|
||
#'
|
||
#' @return Numeric vector of areas in specified unit
|
||
#'
|
||
#' @details
|
||
#' **Projection Logic**:
|
||
#' - Input geometries are reprojected to EPSG:6933 (Equal Earth projection)
|
||
#' - This ensures accurate area calculations regardless of original CRS
|
||
#' - Equal-area projections are essential for agricultural analysis
|
||
#'
|
||
#' **Unit Conversion**:
|
||
#' - m² → hectares: divide by 10,000
|
||
#' - hectares → acres: multiply by 2.4711 (or divide by 0.404686)
|
||
#' - Direct m² → acres: divide by 4046.8564
|
||
#'
|
||
#' **Handling Multiple Geometries**:
|
||
#' - If `geometry` has multiple rows/features, returns vector of areas (one per feature)
|
||
#' - NA values are preserved and propagated to output
|
||
#'
|
||
#' @examples
|
||
#' # With sf object
|
||
#' library(sf)
|
||
#' fields_sf <- st_read("pivot.geojson")
|
||
#' areas_ha <- calculate_area_from_geometry(fields_sf, unit = "hectare")
|
||
#' areas_ac <- calculate_area_from_geometry(fields_sf, unit = "acre")
|
||
#'
|
||
#' # With SpatVect
|
||
#' library(terra)
|
||
#' fields_vect <- vect("pivot.geojson")
|
||
#' areas_ha <- calculate_area_from_geometry(fields_vect, unit = "hectare")
|
||
#'
|
||
#' @export
|
||
calculate_area_from_geometry <- function(geometry, unit = "hectare") {
|
||
# Validate unit parameter
|
||
unit_lower <- tolower(unit)
|
||
if (!unit_lower %in% c("hectare", "acre")) {
|
||
stop("Unit must be 'hectare' or 'acre'. Got: ", unit)
|
||
}
|
||
|
||
tryCatch({
|
||
# Branch by geometry type
|
||
if (inherits(geometry, "sf")) {
|
||
# Handle sf object
|
||
geometry_proj <- sf::st_transform(geometry, 6933)
|
||
areas_m2 <- as.numeric(sf::st_area(geometry_proj))
|
||
} else if (inherits(geometry, "SpatVect")) {
|
||
# Handle terra SpatVect object
|
||
geometry_proj <- terra::project(geometry, "EPSG:6933")
|
||
areas_m2 <- as.numeric(terra::expanse(geometry_proj))
|
||
} else {
|
||
stop("geometry must be an sf or terra SpatVect object. Got: ",
|
||
paste(class(geometry), collapse = ", "))
|
||
}
|
||
|
||
# Convert units
|
||
areas_ha <- areas_m2 / 10000 # m² → hectares
|
||
|
||
if (unit_lower == "hectare") {
|
||
return(areas_ha)
|
||
} else { # unit_lower == "acre"
|
||
return(areas_ha / HECTARE_TO_ACRE_CONVERSION)
|
||
}
|
||
}, error = function(e) {
|
||
stop("Error calculating area from geometry: ", e$message)
|
||
})
|
||
}
|
||
|
||
# ============================================================================
|
||
# WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts)
|
||
# ============================================================================
|
||
|
||
#' Calculate week and year for a given lookback offset
|
||
#' This function handles ISO 8601 week numbering with proper year wrapping
|
||
#' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025)
|
||
#'
|
||
#' @param current_week ISO week number (1-53)
|
||
#' @param current_year ISO week year (from format(..., "%G"))
|
||
#' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.)
|
||
#'
|
||
#' @return List with: week (ISO week number), year (ISO week year)
|
||
#'
|
||
#' @details
|
||
#' This is the authoritative week/year calculation function.
|
||
#' Used by:
|
||
#' - load_historical_field_data() - to find RDS/CSV files for 4-week lookback
|
||
#' - Script 80 main - to calculate previous week with year wrapping
|
||
#' - Any other script needing to walk backwards through weeks
|
||
#'
|
||
#' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025)
|
||
calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) {
|
||
target_week <- current_week - offset_weeks
|
||
target_year <- current_year
|
||
|
||
# Handle wrapping: when going back from week 1, wrap to last ISO week of previous year
|
||
# Compute last_week_of_year dynamically (some years have 53 weeks)
|
||
while (target_week < 1) {
|
||
target_year <- target_year - 1
|
||
last_week_of_year <- as.numeric(format(as.Date(paste0(target_year, "-12-28")), "%V"))
|
||
target_week <- target_week + last_week_of_year
|
||
}
|
||
|
||
return(list(week = target_week, year = target_year))
|
||
}
|
||
|
||
# ============================================================================
|
||
# TILE-AWARE HELPER FUNCTIONS
|
||
# ============================================================================
|
||
|
||
#' Get tile IDs that intersect with a field geometry
|
||
get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) {
|
||
if (inherits(field_geom, "sf")) {
|
||
field_bbox <- sf::st_bbox(field_geom)
|
||
field_xmin <- field_bbox["xmin"]
|
||
field_xmax <- field_bbox["xmax"]
|
||
field_ymin <- field_bbox["ymin"]
|
||
field_ymax <- field_bbox["ymax"]
|
||
} else if (inherits(field_geom, "SpatVector")) {
|
||
field_bbox <- terra::ext(field_geom)
|
||
field_xmin <- field_bbox$xmin
|
||
field_xmax <- field_bbox$xmax
|
||
field_ymin <- field_bbox$ymin
|
||
field_ymax <- field_bbox$ymax
|
||
} else {
|
||
stop("field_geom must be sf or terra::vect object")
|
||
}
|
||
|
||
intersecting_tiles <- tile_grid$id[
|
||
!(tile_grid$xmax < field_xmin |
|
||
tile_grid$xmin > field_xmax |
|
||
tile_grid$ymax < field_ymin |
|
||
tile_grid$ymin > field_ymax)
|
||
]
|
||
|
||
return(as.numeric(intersecting_tiles))
|
||
}
|
||
|
||
#' Load and merge tiles for a specific field
|
||
load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) {
|
||
if (length(tile_ids) == 0) {
|
||
return(NULL)
|
||
}
|
||
|
||
tiles_list <- list()
|
||
for (tile_id in sort(tile_ids)) {
|
||
tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id)
|
||
tile_path <- file.path(mosaic_dir, tile_filename)
|
||
|
||
if (file.exists(tile_path)) {
|
||
tryCatch({
|
||
tile_rast <- terra::rast(tile_path)
|
||
ci_band <- terra::subset(tile_rast, 5)
|
||
tiles_list[[length(tiles_list) + 1]] <- ci_band
|
||
}, error = function(e) {
|
||
message(paste(" Warning: Could not load tile", tile_id, ":", e$message))
|
||
})
|
||
}
|
||
}
|
||
|
||
if (length(tiles_list) == 0) {
|
||
return(NULL)
|
||
}
|
||
|
||
if (length(tiles_list) == 1) {
|
||
return(tiles_list[[1]])
|
||
} else {
|
||
tryCatch({
|
||
rsrc <- terra::sprc(tiles_list)
|
||
merged <- terra::mosaic(rsrc, fun = "max")
|
||
return(merged)
|
||
}, error = function(e) {
|
||
message(paste(" Warning: Could not merge tiles:", e$message))
|
||
return(tiles_list[[1]])
|
||
})
|
||
}
|
||
}
|
||
|
||
#' Build tile grid from available tile files
|
||
build_tile_grid <- function(mosaic_dir, week_num, year) {
|
||
# Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/)
|
||
detected_grid_size <- NA
|
||
if (dir.exists(mosaic_dir)) {
|
||
subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
|
||
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
||
|
||
if (length(grid_patterns) > 0) {
|
||
detected_grid_size <- grid_patterns[1]
|
||
mosaic_dir <- file.path(mosaic_dir, detected_grid_size)
|
||
message(paste(" Using grid-size subdirectory:", detected_grid_size))
|
||
}
|
||
}
|
||
|
||
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
|
||
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
||
|
||
if (length(tile_files) == 0) {
|
||
stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
|
||
}
|
||
|
||
tile_grid <- data.frame(
|
||
id = integer(),
|
||
xmin = numeric(),
|
||
xmax = numeric(),
|
||
ymin = numeric(),
|
||
ymax = numeric(),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
for (tile_file in tile_files) {
|
||
tryCatch({
|
||
matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file)))
|
||
if (length(matches) > 0) {
|
||
tile_id <- as.integer(sub("_|\\.tif", "", matches[1]))
|
||
tile_rast <- terra::rast(tile_file)
|
||
tile_ext <- terra::ext(tile_rast)
|
||
tile_grid <- rbind(tile_grid, data.frame(
|
||
id = tile_id,
|
||
xmin = tile_ext$xmin,
|
||
xmax = tile_ext$xmax,
|
||
ymin = tile_ext$ymin,
|
||
ymax = tile_ext$ymax,
|
||
stringsAsFactors = FALSE
|
||
))
|
||
}
|
||
}, error = function(e) {
|
||
message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message))
|
||
})
|
||
}
|
||
|
||
if (nrow(tile_grid) == 0) {
|
||
stop("Could not extract extents from any tile files")
|
||
}
|
||
|
||
return(list(
|
||
tile_grid = tile_grid,
|
||
mosaic_dir = mosaic_dir,
|
||
grid_size = detected_grid_size
|
||
))
|
||
}
|
||
|
||
# ============================================================================
|
||
# STATISTICAL CATEGORIZATION FUNCTIONS
|
||
# ============================================================================
|
||
|
||
#' Categorize four-week CI trend
|
||
categorize_four_week_trend <- function(ci_values_list) {
|
||
if (is.null(ci_values_list) || length(ci_values_list) < 2) {
|
||
return(NA_character_)
|
||
}
|
||
|
||
ci_values_list <- ci_values_list[!is.na(ci_values_list)]
|
||
if (length(ci_values_list) < 2) {
|
||
return(NA_character_)
|
||
}
|
||
|
||
weekly_changes <- diff(ci_values_list)
|
||
avg_weekly_change <- mean(weekly_changes, na.rm = TRUE)
|
||
|
||
if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) {
|
||
return("strong growth")
|
||
} else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN &&
|
||
avg_weekly_change < FOUR_WEEK_TREND_STRONG_GROWTH_MIN) {
|
||
return("growth")
|
||
} else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_STABLE_THRESHOLD) {
|
||
return("no growth")
|
||
} else if (avg_weekly_change <= FOUR_WEEK_TREND_WEAK_DECLINE_THRESHOLD &&
|
||
avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
|
||
return("decline")
|
||
} else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
|
||
return("strong decline")
|
||
} else {
|
||
return("no growth")
|
||
}
|
||
}
|
||
|
||
#' Bin a percentage value into labeled intervals
|
||
#'
|
||
#' Universal binning function for percentages (0-100) with configurable precision.
|
||
#'
|
||
#' **Used for**:
|
||
#' - Cloud coverage binning: `bin_percentage(cloud_pct_clear)` → "0-10%", "90-95%", etc.
|
||
#' - Germination progress: `bin_percentage(pct_pixels_ci_gte_2)` → percentage bins
|
||
#' - Field acreage reporting: `bin_percentage(pct_area)` → standardized bins
|
||
#'
|
||
#' **Precision**: High-precision binning for 90-100% range (split at 95%)
|
||
#' to distinguish excellent conditions from marginal ones.
|
||
#'
|
||
#' @param pct Numeric value (0-100, typically a percentage)
|
||
#' @return Character bin label (e.g., "80-90%", "95-100%") or NA
|
||
#'
|
||
bin_percentage <- function(pct) {
|
||
if (is.na(pct)) return(NA_character_)
|
||
if (pct >= 95) return("95-100%")
|
||
else if (pct >= 90) return("90-95%")
|
||
else if (pct >= 80) return("80-90%")
|
||
else if (pct >= 70) return("70-80%")
|
||
else if (pct >= 60) return("60-70%")
|
||
else if (pct >= 50) return("50-60%")
|
||
else if (pct >= 40) return("40-50%")
|
||
else if (pct >= 30) return("30-40%")
|
||
else if (pct >= 20) return("20-30%")
|
||
else if (pct >= 10) return("10-20%")
|
||
else return("0-10%")
|
||
}
|
||
|
||
#' Get CI percentile range (10th to 90th)
|
||
get_ci_percentiles <- function(ci_values) {
|
||
if (is.null(ci_values) || length(ci_values) == 0) {
|
||
return(NA_character_)
|
||
}
|
||
|
||
ci_values <- ci_values[!is.na(ci_values)]
|
||
if (length(ci_values) == 0) {
|
||
return(NA_character_)
|
||
}
|
||
|
||
p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE)
|
||
p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE)
|
||
|
||
return(sprintf("%.1f-%.1f", p10, p90))
|
||
}
|
||
|
||
#' Calculate short-term CV trend (current week vs previous week)
|
||
calculate_cv_trend <- function(cv_current, cv_previous) {
|
||
if (is.na(cv_current) || is.na(cv_previous)) {
|
||
return(NA_real_)
|
||
}
|
||
return(round(cv_current - cv_previous, 4))
|
||
}
|
||
|
||
#' Calculate regression slope for temporal trend analysis
|
||
#'
|
||
#' Uses linear regression (lm) to compute trend slope over a time series.
|
||
#' Automatically handles NA values, short series, and errors.
|
||
#'
|
||
#' **Used for**:
|
||
#' - 4-week CI trends (growth trajectories): `calculate_regression_slope(ci_values_4week, 2)`
|
||
#' - 8-week CV trends (field uniformity change): `calculate_regression_slope(cv_values_8week, 2)`
|
||
#' - Any other multi-week trend analysis
|
||
#'
|
||
#' **Pairing**: Output slope is categorized by `categorize_cv_trend_long_term(slope)`
|
||
#' to produce labels: "More uniform" / "Stable uniformity" / "Less uniform"
|
||
#'
|
||
#' @param values Numeric vector of values (variable length, auto-determines weeks)
|
||
#' @param decimal_places Rounding precision (default: 2 for slope values)
|
||
#'
|
||
#' @return Numeric slope (negative = improving, positive = worsening) or NA
|
||
#'
|
||
#' @details
|
||
#' Implements: lm(values ~ seq_along(values)), returns coefficient for sequence index
|
||
#' with error handling for degenerate cases (NULL, empty, <2 values).
|
||
#'
|
||
calculate_regression_slope <- function(values, decimal_places = 2) {
|
||
if (is.null(values) || length(values) == 0) {
|
||
return(NA_real_)
|
||
}
|
||
|
||
clean_values <- values[!is.na(values)]
|
||
|
||
if (length(clean_values) < 2) {
|
||
return(NA_real_)
|
||
}
|
||
|
||
weeks <- seq_along(clean_values)
|
||
|
||
tryCatch({
|
||
lm_fit <- lm(clean_values ~ weeks)
|
||
slope <- coef(lm_fit)["weeks"]
|
||
return(round(as.numeric(slope), decimal_places))
|
||
}, error = function(e) {
|
||
return(NA_real_)
|
||
})
|
||
}
|
||
|
||
#' Calculate Gap Filling Score KPI (2σ method)
|
||
#' @param ci_raster Current week CI raster (single band)
|
||
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||
#' @return List with summary data frame and field-level results data frame
|
||
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
||
|
||
# Handle both sf and SpatVector inputs
|
||
if (!inherits(field_boundaries, "SpatVector")) {
|
||
field_boundaries_vect <- terra::vect(field_boundaries)
|
||
} else {
|
||
field_boundaries_vect <- field_boundaries
|
||
}
|
||
|
||
field_results <- data.frame()
|
||
|
||
for (i in seq_len(nrow(field_boundaries))) {
|
||
field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
|
||
sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
|
||
field_vect <- field_boundaries_vect[i]
|
||
|
||
# Extract CI values using helper function
|
||
ci_values <- extract_ci_values(ci_raster, field_vect)
|
||
valid_values <- ci_values[!is.na(ci_values) & is.finite(ci_values)]
|
||
|
||
if (length(valid_values) > 1) {
|
||
# Gap score using 2σ below median to detect outliers
|
||
median_ci <- median(valid_values)
|
||
sd_ci <- sd(valid_values)
|
||
outlier_threshold <- median_ci - (1 * sd_ci)
|
||
low_ci_pixels <- sum(valid_values < outlier_threshold)
|
||
total_pixels <- length(valid_values)
|
||
gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
|
||
|
||
# Classify gap severity
|
||
gap_level <- dplyr::case_when(
|
||
gap_score < 10 ~ "Minimal",
|
||
gap_score < 25 ~ "Moderate",
|
||
gap_score >= 25 ~ "Significant",
|
||
TRUE ~ NA_character_
|
||
)
|
||
|
||
field_results <- rbind(field_results, data.frame(
|
||
field = field_name,
|
||
sub_field = sub_field_name,
|
||
gap_level = gap_level,
|
||
gap_score = gap_score,
|
||
mean_ci = mean(valid_values),
|
||
outlier_threshold = outlier_threshold
|
||
))
|
||
} else {
|
||
field_results <- rbind(field_results, data.frame(
|
||
field = field_name,
|
||
sub_field = sub_field_name,
|
||
gap_level = NA_character_,
|
||
gap_score = NA_real_,
|
||
mean_ci = NA_real_,
|
||
outlier_threshold = NA_real_
|
||
))
|
||
}
|
||
}
|
||
|
||
# Summarize results
|
||
gap_summary <- field_results %>%
|
||
dplyr::group_by(gap_level) %>%
|
||
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
|
||
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
|
||
|
||
return(list(summary = gap_summary, field_results = field_results))
|
||
}
|
||
|
||
#' Calculate gap filling scores for all per-field mosaics (wrapper)
|
||
#'
|
||
#' This wrapper handles per-field mosaic structure by iterating over
|
||
#' individual field files and calling the basic KPI function
|
||
#'
|
||
#' @param per_field_files Character vector of paths to per-field mosaic TIFFs
|
||
#' @param field_boundaries_sf sf object with field geometries
|
||
#' @return data.frame with Field_id and gap_score columns
|
||
calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
|
||
message("\nCalculating gap filling scores (2σ method)...")
|
||
message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
|
||
|
||
field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
|
||
|
||
process_gap_for_field <- function(field_file) {
|
||
field_id <- basename(dirname(field_file))
|
||
field_bounds <- field_boundaries_by_id[[field_id]]
|
||
|
||
if (is.null(field_bounds) || nrow(field_bounds) == 0) {
|
||
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
||
}
|
||
|
||
tryCatch({
|
||
field_raster <- terra::rast(field_file)
|
||
ci_band_name <- "CI"
|
||
if (!(ci_band_name %in% names(field_raster))) {
|
||
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
||
}
|
||
field_ci_band <- field_raster[[ci_band_name]]
|
||
names(field_ci_band) <- "CI"
|
||
|
||
gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
|
||
|
||
if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
|
||
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
||
}
|
||
|
||
gap_scores <- gap_result$field_results
|
||
gap_scores$Field_id <- gap_scores$field
|
||
gap_scores <- gap_scores[, c("Field_id", "gap_score")]
|
||
|
||
stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
|
||
}, error = function(e) {
|
||
message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message))
|
||
data.frame(Field_id = field_id, gap_score = NA_real_)
|
||
})
|
||
}
|
||
|
||
# Process fields sequentially with progress bar
|
||
message(" Processing gap scores for ", length(per_field_files), " fields...")
|
||
pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
|
||
|
||
results_list <- lapply(seq_along(per_field_files), function(idx) {
|
||
result <- process_gap_for_field(per_field_files[[idx]])
|
||
utils::setTxtProgressBar(pb, idx)
|
||
result
|
||
})
|
||
close(pb)
|
||
|
||
gap_scores_df <- dplyr::bind_rows(results_list)
|
||
|
||
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
||
gap_scores_df <- gap_scores_df %>%
|
||
dplyr::group_by(Field_id) %>%
|
||
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
|
||
|
||
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
|
||
|
||
# Guard against all-NA values which would produce Inf/-Inf warnings
|
||
if (any(is.finite(gap_scores_df$gap_score))) {
|
||
min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2)
|
||
max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2)
|
||
message(paste(" Gap score range:", min_score, "-", max_score, "%"))
|
||
} else {
|
||
message(" Gap score range: All values are NA (no valid gap scores)")
|
||
}
|
||
} else {
|
||
message(" WARNING: No gap scores calculated from per-field mosaics")
|
||
gap_scores_df <- NULL
|
||
}
|
||
|
||
return(gap_scores_df)
|
||
}
|
||
|
||
# ============================================================================
|
||
# HELPER FUNCTIONS
|
||
# ============================================================================
|
||
|
||
#' Get crop phase by age in weeks
|
||
get_phase_by_age <- function(age_weeks) {
|
||
if (is.na(age_weeks)) return(NA_character_)
|
||
for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
|
||
if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
|
||
age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
|
||
return(PHASE_DEFINITIONS$phase[i])
|
||
}
|
||
}
|
||
return("Unknown")
|
||
}
|
||
|
||
#' Get status trigger based on CI values and field age
|
||
get_status_trigger <- function(ci_values, ci_change, age_weeks) {
|
||
if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)
|
||
|
||
ci_values <- ci_values[!is.na(ci_values)]
|
||
if (length(ci_values) == 0) return(NA_character_)
|
||
|
||
pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
|
||
pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
|
||
ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
|
||
mean_ci <- mean(ci_values, na.rm = TRUE)
|
||
|
||
if (age_weeks >= 0 && age_weeks <= 6) {
|
||
if (pct_at_or_above_2 >= 70) {
|
||
return("germination_complete")
|
||
} else if (pct_above_2 > 10) {
|
||
return("germination_started")
|
||
}
|
||
}
|
||
|
||
if (age_weeks >= 45) {
|
||
return("harvest_ready")
|
||
}
|
||
|
||
if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
|
||
return("stress_detected_whole_field")
|
||
}
|
||
|
||
if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
|
||
return("strong_recovery")
|
||
}
|
||
|
||
if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
|
||
return("growth_on_track")
|
||
}
|
||
|
||
if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
|
||
return("maturation_progressing")
|
||
}
|
||
|
||
return(NA_character_)
|
||
}
|
||
|
||
#' Extract planting dates from harvesting data
|
||
extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
|
||
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
||
message("Warning: No harvesting data available - planting dates will be NA.")
|
||
if (!is.null(field_boundaries_sf)) {
|
||
return(data.frame(
|
||
field_id = field_boundaries_sf$field,
|
||
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
|
||
stringsAsFactors = FALSE
|
||
))
|
||
}
|
||
return(NULL)
|
||
}
|
||
|
||
tryCatch({
|
||
planting_dates <- harvesting_data %>%
|
||
arrange(field, desc(season_start)) %>%
|
||
distinct(field, .keep_all = TRUE) %>%
|
||
select(field, season_start) %>%
|
||
rename(field_id = field, planting_date = season_start) %>%
|
||
filter(!is.na(planting_date)) %>%
|
||
as.data.frame()
|
||
|
||
message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx"))
|
||
return(planting_dates)
|
||
}, error = function(e) {
|
||
message(paste("Error extracting planting dates:", e$message))
|
||
return(NULL)
|
||
})
|
||
}
|
||
|
||
# ============================================================================
|
||
# DATA LOADING HELPERS
|
||
# ============================================================================
|
||
|
||
#' Load and validate harvest data from harvest.xlsx
|
||
#'
|
||
#' Encapsulates harvest data loading with validation, type coercion, and error handling.
|
||
#' Returns a data frame with required columns (field, year, tonnage_ha) or an empty
|
||
#' data frame with proper structure if loading fails.
|
||
#'
|
||
#' @param data_dir Path to data directory containing harvest.xlsx
|
||
#'
|
||
#' @return data.frame with columns: field (character), year (numeric), tonnage_ha (numeric)
|
||
#' - On success: data frame with N rows of harvest records
|
||
#' - On failure: empty data frame with correct structure (0 rows, 3 columns)
|
||
#'
|
||
#' @details
|
||
#' **File Location**: Expected at `file.path(data_dir, "harvest.xlsx")`
|
||
#'
|
||
#' **Validation**:
|
||
#' - Checks file existence before reading
|
||
#' - Validates required columns: field, year, tonnage_ha
|
||
#' - Coerces year and tonnage_ha to numeric
|
||
#' - Logs status messages for debugging
|
||
#'
|
||
#' **Error Handling**:
|
||
#' - Missing file: Returns empty DF (logs NOTE)
|
||
#' - Missing columns: Returns empty DF (logs WARNING)
|
||
#' - Read errors: Returns empty DF (logs WARNING)
|
||
#' - Always returns valid data frame structure (won't return NULL)
|
||
#'
|
||
#' **Usage**:
|
||
#' ```r
|
||
#' harvesting_data <- load_harvest_data(setup$data_dir)
|
||
#' # harvesting_data is guaranteed to be a data.frame with 3 columns
|
||
#' # even if harvest.xlsx is unavailable or invalid
|
||
#' ```
|
||
load_harvest_data <- function(data_dir) {
|
||
harvesting_data <- NULL
|
||
harvest_file <- file.path(data_dir, "harvest.xlsx")
|
||
|
||
if (file.exists(harvest_file)) {
|
||
tryCatch({
|
||
harvesting_data <- readxl::read_excel(harvest_file)
|
||
|
||
# Ensure required columns are present
|
||
required_cols <- c("field", "year", "tonnage_ha")
|
||
if (all(required_cols %in% names(harvesting_data))) {
|
||
# Convert to data frame and ensure column types
|
||
harvesting_data <- as.data.frame(harvesting_data)
|
||
# CRITICAL: Coerce field to character to preserve leading zeros (e.g., "01", "02")
|
||
harvesting_data$field <- as.character(harvesting_data$field)
|
||
harvesting_data$year <- as.numeric(harvesting_data$year)
|
||
harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha)
|
||
|
||
message(paste(" ✓ Loaded harvest data:", nrow(harvesting_data), "records from harvest.xlsx"))
|
||
return(harvesting_data)
|
||
} else {
|
||
message(paste(" WARNING: harvest.xlsx missing required columns. Expected: field, year, tonnage_ha"))
|
||
harvesting_data <- NULL
|
||
}
|
||
}, error = function(e) {
|
||
message(paste(" WARNING: Could not read harvest.xlsx:", e$message))
|
||
})
|
||
} else {
|
||
message(paste(" NOTE: harvest.xlsx not found at", harvest_file))
|
||
}
|
||
|
||
# Fallback: create empty data frame if loading failed
|
||
if (is.null(harvesting_data)) {
|
||
message(" WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)")
|
||
harvesting_data <- data.frame(
|
||
field = character(), # Explicitly character to preserve leading zeros when data is added
|
||
year = numeric(),
|
||
tonnage_ha = numeric(),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
}
|
||
|
||
return(harvesting_data)
|
||
}
|
||
|
||
# ============================================================================
|
||
# FIELD STATISTICS EXTRACTION
|
||
# ============================================================================
|
||
|
||
#' Extract CI statistics for all fields from a single CI raster band
|
||
extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) {
|
||
extract_result <- terra::extract(ci_band, field_boundaries_sf)
|
||
|
||
stats_list <- list()
|
||
|
||
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
||
field_pixels <- extract_result[extract_result$ID == field_idx, 2]
|
||
pixels <- as.numeric(field_pixels[!is.na(field_pixels)])
|
||
|
||
if (length(pixels) == 0) {
|
||
stats_list[[field_idx]] <- data.frame(
|
||
field_idx = field_idx,
|
||
mean_ci = NA_real_,
|
||
cv = NA_real_,
|
||
p10 = NA_real_,
|
||
p90 = NA_real_,
|
||
min_ci = NA_real_,
|
||
max_ci = NA_real_,
|
||
pixel_count_valid = 0,
|
||
pixel_count_total = 0,
|
||
stringsAsFactors = FALSE
|
||
)
|
||
next
|
||
}
|
||
|
||
mean_val <- mean(pixels, na.rm = TRUE)
|
||
cv_val <- if (mean_val > 0) sd(pixels, na.rm = TRUE) / mean_val else NA_real_
|
||
p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]]
|
||
p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]]
|
||
min_val <- min(pixels, na.rm = TRUE)
|
||
max_val <- max(pixels, na.rm = TRUE)
|
||
|
||
stats_list[[field_idx]] <- data.frame(
|
||
field_idx = field_idx,
|
||
mean_ci = mean_val,
|
||
cv = cv_val,
|
||
p10 = p10_val,
|
||
p90 = p90_val,
|
||
min_ci = min_val,
|
||
max_ci = max_val,
|
||
pixel_count_valid = length(pixels),
|
||
pixel_count_total = nrow(extract_result[extract_result$ID == field_idx, ]),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
}
|
||
|
||
return(dplyr::bind_rows(stats_list))
|
||
}
|
||
|
||
# ============================================================================
|
||
# EXPORT FUNCTIONS (USED BY ALL CLIENTS)
|
||
# ============================================================================
|
||
|
||
#' Generate summary statistics from field analysis data
|
||
generate_field_analysis_summary <- function(field_df) {
|
||
message("Generating summary statistics...")
|
||
|
||
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
|
||
|
||
germination_acreage <- sum(field_df$Acreage[field_df$Phase == "Germination"], na.rm = TRUE)
|
||
tillering_acreage <- sum(field_df$Acreage[field_df$Phase == "Tillering"], na.rm = TRUE)
|
||
grand_growth_acreage <- sum(field_df$Acreage[field_df$Phase == "Grand Growth"], na.rm = TRUE)
|
||
maturation_acreage <- sum(field_df$Acreage[field_df$Phase == "Maturation"], na.rm = TRUE)
|
||
unknown_phase_acreage <- sum(field_df$Acreage[field_df$Phase == "Unknown"], na.rm = TRUE)
|
||
|
||
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
|
||
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
|
||
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
|
||
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
|
||
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
|
||
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
|
||
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
|
||
|
||
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
|
||
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
|
||
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
|
||
total_fields <- nrow(field_df)
|
||
|
||
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
|
||
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
|
||
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
|
||
|
||
summary_df <- data.frame(
|
||
Category = c(
|
||
"--- PHASE DISTRIBUTION ---",
|
||
"Germination",
|
||
"Tillering",
|
||
"Grand Growth",
|
||
"Maturation",
|
||
"Unknown phase",
|
||
"--- STATUS TRIGGERS ---",
|
||
"Harvest ready",
|
||
"Stress detected",
|
||
"Strong recovery",
|
||
"Growth on track",
|
||
"Germination complete",
|
||
"Germination started",
|
||
"No trigger",
|
||
"--- CLOUD COVERAGE (FIELDS) ---",
|
||
"Clear view",
|
||
"Partial coverage",
|
||
"No image available",
|
||
"--- CLOUD COVERAGE (ACREAGE) ---",
|
||
"Clear view",
|
||
"Partial coverage",
|
||
"No image available",
|
||
"--- TOTAL ---",
|
||
"Total Acreage"
|
||
),
|
||
Acreage = c(
|
||
NA,
|
||
round(germination_acreage, 2),
|
||
round(tillering_acreage, 2),
|
||
round(grand_growth_acreage, 2),
|
||
round(maturation_acreage, 2),
|
||
round(unknown_phase_acreage, 2),
|
||
NA,
|
||
round(harvest_ready_acreage, 2),
|
||
round(stress_acreage, 2),
|
||
round(recovery_acreage, 2),
|
||
round(growth_on_track_acreage, 2),
|
||
round(germination_complete_acreage, 2),
|
||
round(germination_started_acreage, 2),
|
||
round(no_trigger_acreage, 2),
|
||
NA,
|
||
paste0(clear_fields, " fields"),
|
||
paste0(partial_fields, " fields"),
|
||
paste0(no_image_fields, " fields"),
|
||
NA,
|
||
round(clear_acreage, 2),
|
||
round(partial_acreage, 2),
|
||
round(no_image_acreage, 2),
|
||
NA,
|
||
round(total_acreage, 2)
|
||
),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
return(summary_df)
|
||
}
|
||
|
||
#' Export field analysis to Excel, CSV, and RDS formats
|
||
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, year, reports_dir) {
|
||
message("Exporting per-field analysis to Excel, CSV, and RDS...")
|
||
|
||
field_df_rounded <- field_df %>%
|
||
mutate(across(where(is.numeric), ~ round(., 2)))
|
||
|
||
# Handle NULL summary_df
|
||
summary_df_rounded <- if (!is.null(summary_df)) {
|
||
summary_df %>%
|
||
mutate(across(where(is.numeric), ~ round(., 2)))
|
||
} else {
|
||
NULL
|
||
}
|
||
|
||
output_dir <- file.path(reports_dir)
|
||
if (!dir.exists(output_dir)) {
|
||
dir.create(output_dir, recursive = TRUE)
|
||
}
|
||
|
||
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
|
||
excel_path <- file.path(output_dir, excel_filename)
|
||
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
|
||
|
||
# Build sheets list dynamically
|
||
sheets <- list(
|
||
"Field Data" = field_df_rounded
|
||
)
|
||
if (!is.null(summary_df_rounded)) {
|
||
sheets[["Summary"]] <- summary_df_rounded
|
||
}
|
||
|
||
write_xlsx(sheets, excel_path)
|
||
message(paste("✓ Field analysis Excel exported to:", excel_path))
|
||
|
||
kpi_data <- list(
|
||
field_analysis = field_df_rounded,
|
||
field_analysis_summary = summary_df_rounded,
|
||
metadata = list(
|
||
current_week = current_week,
|
||
year = year,
|
||
project = project_dir,
|
||
created_at = Sys.time()
|
||
)
|
||
)
|
||
|
||
rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds")
|
||
rds_path <- file.path(output_dir, rds_filename)
|
||
|
||
saveRDS(kpi_data, rds_path)
|
||
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
||
|
||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
|
||
csv_path <- file.path(output_dir, csv_filename)
|
||
write_csv(field_df_rounded, csv_path)
|
||
message(paste("✓ Field analysis CSV exported to:", csv_path))
|
||
|
||
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
|
||
}
|
||
|
||
# ============================================================================
|
||
# ADDITIONAL CRITICAL FUNCTIONS FROM 80_weekly_stats_utils.R (REQUIRED BY 80_calculate_kpis.R)
|
||
# ============================================================================
|
||
|
||
#' Calculate statistics for all fields from weekly mosaics
|
||
calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
|
||
mosaic_dir, report_date = Sys.Date()) {
|
||
|
||
message(paste("Calculating statistics for all fields - Week", week_num, year))
|
||
|
||
# Per-field mode: look in per-field subdirectories
|
||
single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year)
|
||
|
||
# Find all field subdirectories with mosaics for this week
|
||
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
|
||
field_dirs <- field_dirs[field_dirs != ""]
|
||
|
||
per_field_files <- list()
|
||
for (field in field_dirs) {
|
||
field_mosaic_dir <- file.path(mosaic_dir, field)
|
||
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
||
if (length(files) > 0) {
|
||
per_field_files[[field]] <- files[1] # Take first match for this field
|
||
}
|
||
}
|
||
|
||
if (length(per_field_files) == 0) {
|
||
stop(paste("No per-field mosaic files found for week", week_num, year, "in", mosaic_dir))
|
||
}
|
||
|
||
message(paste(" Found", length(per_field_files), "per-field mosaic file(s) for week", week_num))
|
||
results_list <- list()
|
||
|
||
# Initialize progress bar
|
||
pb <- progress::progress_bar$new(
|
||
format = " [:bar] :percent | Field :current/:total",
|
||
total = length(per_field_files),
|
||
width = 60
|
||
)
|
||
|
||
# Process each field's mosaic
|
||
for (field_idx in seq_along(per_field_files)) {
|
||
pb$tick() # Update progress bar
|
||
field_name <- names(per_field_files)[field_idx]
|
||
field_file <- per_field_files[[field_name]]
|
||
|
||
tryCatch({
|
||
current_rast <- terra::rast(field_file)
|
||
ci_band <- current_rast[["CI"]]
|
||
|
||
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
|
||
message(paste(" [SKIP] Field", field_name, "- CI band not found"))
|
||
next
|
||
}
|
||
|
||
# Extract CI values for this field
|
||
field_boundary <- field_boundaries_sf[field_boundaries_sf$field == field_name, ]
|
||
|
||
if (nrow(field_boundary) == 0) {
|
||
message(paste(" [SKIP] Field", field_name, "- not in field boundaries"))
|
||
next
|
||
}
|
||
|
||
extracted <- terra::extract(ci_band, field_boundary, na.rm = FALSE)
|
||
|
||
if (nrow(extracted) == 0 || all(is.na(extracted$CI))) {
|
||
message(paste(" [SKIP] Field", field_name, "- no CI values found"))
|
||
next
|
||
}
|
||
|
||
# Guard: detect cloud-masked dates (CI == 0 indicates no-data)
|
||
# When any extracted value is 0, treat the entire date as cloud-masked
|
||
has_zeros <- any(extracted$CI == 0, na.rm = TRUE)
|
||
|
||
if (has_zeros) {
|
||
# Cloud-masked date: skip temporal analysis, set stats to NA
|
||
message(paste(" [CLOUD] Field", field_name, "- entire date is cloud-masked (CI==0)"))
|
||
|
||
results_list[[length(results_list) + 1]] <- data.frame(
|
||
Field_id = field_name,
|
||
Mean_CI = NA_real_,
|
||
CV = NA_real_,
|
||
CI_range = NA_character_,
|
||
CI_Percentiles = NA_character_,
|
||
Pct_pixels_CI_gte_2 = NA_real_,
|
||
Cloud_pct_clear = 0,
|
||
Cloud_category = "No image available",
|
||
stringsAsFactors = FALSE
|
||
)
|
||
next
|
||
}
|
||
|
||
ci_vals <- extracted$CI[!is.na(extracted$CI)]
|
||
|
||
if (length(ci_vals) == 0) {
|
||
next
|
||
}
|
||
|
||
# Calculate statistics
|
||
mean_ci <- mean(ci_vals, na.rm = TRUE)
|
||
ci_std <- sd(ci_vals, na.rm = TRUE)
|
||
cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
|
||
range_min <- min(ci_vals, na.rm = TRUE)
|
||
range_max <- max(ci_vals, na.rm = TRUE)
|
||
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
|
||
ci_percentiles_str <- get_ci_percentiles(ci_vals)
|
||
|
||
num_pixels_total <- length(ci_vals)
|
||
num_pixels_gte_2 <- sum(ci_vals >= 2)
|
||
pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0
|
||
|
||
num_total <- nrow(extracted)
|
||
num_data <- sum(!is.na(extracted$CI))
|
||
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
|
||
cloud_cat <- if (num_data == 0) "No image available"
|
||
else if (pct_clear >= 95) "Clear view"
|
||
else "Partial coverage"
|
||
|
||
# Add to results
|
||
results_list[[length(results_list) + 1]] <- data.frame(
|
||
Field_id = field_name,
|
||
Mean_CI = round(mean_ci, 2),
|
||
CV = round(cv * 100, 2),
|
||
CI_range = range_str,
|
||
CI_Percentiles = ci_percentiles_str,
|
||
Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
|
||
Cloud_pct_clear = pct_clear,
|
||
Cloud_category = cloud_cat,
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
}, error = function(e) {
|
||
message(paste(" [ERROR] Field", field_name, ":", e$message))
|
||
})
|
||
}
|
||
|
||
if (length(results_list) == 0) {
|
||
stop(paste("No fields processed successfully for week", week_num))
|
||
}
|
||
|
||
stats_df <- dplyr::bind_rows(results_list)
|
||
message(paste(" ✓ Successfully calculated statistics for", nrow(stats_df), "fields"))
|
||
|
||
return(stats_df)
|
||
}
|
||
|
||
#' Load or calculate weekly statistics (with RDS caching)
|
||
load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf,
|
||
mosaic_dir, reports_dir, report_date = Sys.Date()) {
|
||
|
||
rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
|
||
rds_path <- file.path(reports_dir, "field_stats", rds_filename)
|
||
|
||
if (file.exists(rds_path)) {
|
||
message(paste("Loading cached statistics from:", basename(rds_path)))
|
||
return(readRDS(rds_path))
|
||
}
|
||
|
||
message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
|
||
stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year, mosaic_dir, report_date)
|
||
|
||
output_dir <- file.path(reports_dir, "field_stats")
|
||
if (!dir.exists(output_dir)) {
|
||
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
|
||
}
|
||
|
||
saveRDS(stats_df, rds_path)
|
||
message(paste("Saved weekly statistics RDS:", basename(rds_path)))
|
||
|
||
csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year)
|
||
csv_path <- file.path(output_dir, csv_filename)
|
||
readr::write_csv(stats_df, csv_path)
|
||
message(paste("Saved weekly statistics CSV:", basename(csv_path)))
|
||
|
||
return(stats_df)
|
||
}
|
||
|
||
#' Load historical field data from CSV (4-week lookback)
|
||
load_historical_field_data <- function(project_dir, current_week, current_year, kpi_reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) {
|
||
|
||
historical_data <- list()
|
||
loaded_weeks <- c()
|
||
missing_weeks <- c()
|
||
|
||
for (lookback in 0:(num_weeks - 1)) {
|
||
target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback)
|
||
target_week <- target$week
|
||
target_year <- target$year
|
||
|
||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
|
||
csv_path <- file.path(kpi_reports_dir, csv_filename)
|
||
|
||
if (file.exists(csv_path)) {
|
||
tryCatch({
|
||
data <- readr::read_csv(csv_path, show_col_types = FALSE,
|
||
col_types = readr::cols(.default = readr::col_character()))
|
||
historical_data[[lookback + 1]] <- list(
|
||
week = target_week,
|
||
year = target_year,
|
||
data = data
|
||
)
|
||
loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
|
||
}, error = function(e) {
|
||
message(paste(" Warning: Could not load week", target_week, "/", target_year, ":", e$message))
|
||
missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
|
||
})
|
||
} else {
|
||
missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
|
||
}
|
||
}
|
||
|
||
if (length(historical_data) == 0) {
|
||
message(paste("Error: No historical field data found"))
|
||
return(NULL)
|
||
}
|
||
|
||
message(paste("✓ Loaded", length(historical_data), "weeks of historical data:",
|
||
paste(loaded_weeks, collapse = ", ")))
|
||
|
||
return(historical_data)
|
||
}
|
||
|
||
#' Calculate KPI trends (CI change, CV trends, 4-week and 8-week trends)
|
||
calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
|
||
project_dir = NULL, reports_dir = NULL,
|
||
current_week = NULL, year = NULL) {
|
||
|
||
message("Calculating KPI trends from current and previous week data")
|
||
|
||
current_stats$Weekly_ci_change <- NA_real_
|
||
current_stats$CV_Trend_Short_Term <- NA_real_
|
||
current_stats$Four_week_trend <- NA_real_
|
||
current_stats$CV_Trend_Long_Term <- NA_real_
|
||
current_stats$nmr_of_weeks_analysed <- 1L
|
||
|
||
if (is.null(prev_stats) || nrow(prev_stats) == 0) {
|
||
message(" No previous week data available - using defaults")
|
||
return(current_stats)
|
||
}
|
||
|
||
message(paste(" prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns"))
|
||
|
||
prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id)
|
||
prev_field_analysis <- NULL
|
||
|
||
tryCatch({
|
||
analysis_dir <- file.path(reports_dir, "field_analysis")
|
||
if (dir.exists(analysis_dir)) {
|
||
analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
|
||
if (length(analysis_files) > 0) {
|
||
recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)]
|
||
prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE,
|
||
col_types = readr::cols(.default = readr::col_character()),
|
||
col_select = c(Field_id, nmr_of_weeks_analysed, Phase))
|
||
|
||
# Convert nmr_of_weeks_analysed from character to integer (read as character via .default)
|
||
# Handle NAs appropriately during conversion
|
||
if (!is.null(prev_field_analysis) && "nmr_of_weeks_analysed" %in% names(prev_field_analysis)) {
|
||
prev_field_analysis$nmr_of_weeks_analysed <- suppressWarnings(as.integer(prev_field_analysis$nmr_of_weeks_analysed))
|
||
}
|
||
}
|
||
}
|
||
}, error = function(e) {
|
||
message(paste(" Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message))
|
||
})
|
||
|
||
if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
|
||
message(paste(" Using previous field_analysis to track nmr_of_weeks_analysed"))
|
||
}
|
||
|
||
historical_4weeks <- list()
|
||
historical_8weeks <- list()
|
||
|
||
if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) {
|
||
message(" Loading historical field_stats for 4-week and 8-week trends...")
|
||
|
||
for (lookback in 1:4) {
|
||
target_week <- current_week - lookback
|
||
target_year <- year
|
||
if (target_week < 1) {
|
||
target_week <- target_week + 52
|
||
target_year <- target_year - 1
|
||
}
|
||
|
||
rds_filename <- sprintf("%s_field_analysis_week%02d_%d.rds", project_dir, target_week, target_year)
|
||
rds_path <- file.path(reports_dir, rds_filename)
|
||
|
||
if (file.exists(rds_path)) {
|
||
tryCatch({
|
||
rds_content <- readRDS(rds_path)
|
||
# RDS files are saved as lists with 'field_analysis' key (or 'field_data' for legacy formats)
|
||
if (is.list(rds_content) && "field_analysis" %in% names(rds_content)) {
|
||
stats_data <- rds_content$field_analysis
|
||
} else if (is.list(rds_content) && "field_data" %in% names(rds_content)) {
|
||
stats_data <- rds_content$field_data
|
||
} else if (is.data.frame(rds_content)) {
|
||
stats_data <- rds_content
|
||
} else {
|
||
stop("Unexpected RDS structure")
|
||
}
|
||
historical_4weeks[[length(historical_4weeks) + 1]] <- list(week = target_week, stats = stats_data)
|
||
}, error = function(e) {
|
||
message(paste(" Warning: Could not load week", target_week, ":", e$message))
|
||
})
|
||
}
|
||
}
|
||
|
||
for (lookback in 1:8) {
|
||
target_week <- current_week - lookback
|
||
target_year <- year
|
||
if (target_week < 1) {
|
||
target_week <- target_week + 52
|
||
target_year <- target_year - 1
|
||
}
|
||
|
||
rds_filename <- sprintf("%s_field_analysis_week%02d_%d.rds", project_dir, target_week, target_year)
|
||
rds_path <- file.path(reports_dir, rds_filename)
|
||
|
||
if (file.exists(rds_path)) {
|
||
tryCatch({
|
||
rds_content <- readRDS(rds_path)
|
||
# RDS files are saved as lists with 'field_analysis' key (or 'field_data' for legacy formats)
|
||
if (is.list(rds_content) && "field_analysis" %in% names(rds_content)) {
|
||
stats_data <- rds_content$field_analysis
|
||
} else if (is.list(rds_content) && "field_data" %in% names(rds_content)) {
|
||
stats_data <- rds_content$field_data
|
||
} else if (is.data.frame(rds_content)) {
|
||
stats_data <- rds_content
|
||
} else {
|
||
stop("Unexpected RDS structure")
|
||
}
|
||
historical_8weeks[[length(historical_8weeks) + 1]] <- list(week = target_week, stats = stats_data)
|
||
}, error = function(e) {
|
||
# Silently skip
|
||
})
|
||
}
|
||
}
|
||
|
||
if (length(historical_4weeks) > 0) {
|
||
message(paste(" Loaded", length(historical_4weeks), "weeks for 4-week trend"))
|
||
}
|
||
if (length(historical_8weeks) > 0) {
|
||
message(paste(" Loaded", length(historical_8weeks), "weeks for 8-week CV trend"))
|
||
}
|
||
}
|
||
|
||
cv_trends_calculated <- 0
|
||
four_week_trends_calculated <- 0
|
||
cv_long_term_calculated <- 0
|
||
|
||
for (i in seq_len(nrow(current_stats))) {
|
||
field_id <- current_stats$Field_id[i]
|
||
prev_idx <- prev_lookup[field_id]
|
||
|
||
if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) {
|
||
prev_row <- prev_stats[prev_idx, , drop = FALSE]
|
||
|
||
prev_ci <- prev_row$Mean_CI[1]
|
||
if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) {
|
||
current_stats$Weekly_ci_change[i] <- round(current_stats$Mean_CI[i] - prev_ci, 2)
|
||
}
|
||
|
||
prev_cv <- prev_row$CV[1]
|
||
if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) {
|
||
current_stats$CV_Trend_Short_Term[i] <- calculate_cv_trend(current_stats$CV[i], prev_cv)
|
||
cv_trends_calculated <- cv_trends_calculated + 1
|
||
}
|
||
|
||
if (length(historical_4weeks) > 0) {
|
||
ci_values_4week <- numeric()
|
||
for (hist_idx in rev(seq_along(historical_4weeks))) {
|
||
hist_data <- historical_4weeks[[hist_idx]]$stats
|
||
hist_field <- which(hist_data$Field_id == field_id)
|
||
if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) {
|
||
ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]])
|
||
}
|
||
}
|
||
ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i])
|
||
|
||
if (length(ci_values_4week) >= 2) {
|
||
current_stats$Four_week_trend[i] <- calculate_regression_slope(ci_values_4week, 2)
|
||
four_week_trends_calculated <- four_week_trends_calculated + 1
|
||
}
|
||
}
|
||
|
||
if (length(historical_8weeks) > 0) {
|
||
cv_values_8week <- numeric()
|
||
for (hist_idx in rev(seq_along(historical_8weeks))) {
|
||
hist_data <- historical_8weeks[[hist_idx]]$stats
|
||
hist_field <- which(hist_data$Field_id == field_id)
|
||
if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) {
|
||
cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]])
|
||
}
|
||
}
|
||
cv_values_8week <- c(cv_values_8week, current_stats$CV[i])
|
||
|
||
if (length(cv_values_8week) >= 2) {
|
||
slope <- calculate_regression_slope(cv_values_8week, 2)
|
||
current_stats$CV_Trend_Long_Term[i] <- slope
|
||
cv_long_term_calculated <- cv_long_term_calculated + 1
|
||
}
|
||
}
|
||
|
||
if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
|
||
prev_analysis_row <- prev_field_analysis %>% dplyr::filter(Field_id == field_id)
|
||
|
||
if (nrow(prev_analysis_row) > 0) {
|
||
prev_nmr_weeks_analysis <- as.integer(prev_analysis_row$nmr_of_weeks_analysed[1])
|
||
if (!is.na(prev_nmr_weeks_analysis)) {
|
||
current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L
|
||
} else {
|
||
current_stats$nmr_of_weeks_analysed[i] <- 1L
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
message(paste(" ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields"))
|
||
message(paste(" ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields"))
|
||
message(paste(" ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields"))
|
||
return(current_stats)
|
||
}
|
||
|
||
# ============================================================================
|
||
# INTERNAL HELPER FUNCTIONS (from 80_kpi_utils.R) - DO NOT DELETE
|
||
# ============================================================================
|
||
|
||
# Spatial autocorrelation thresholds for field pattern analysis
|
||
MORAN_THRESHOLD_HIGH <- 0.95 # Very strong clustering (problematic patterns)
|
||
MORAN_THRESHOLD_MODERATE <- 0.85 # Moderate clustering
|
||
MORAN_THRESHOLD_LOW <- 0.7 # Normal field continuity
|
||
|
||
#' Calculate coefficient of variation for uniformity assessment
|
||
calculate_cv <- function(values) {
|
||
values <- values[!is.na(values) & is.finite(values)]
|
||
if (length(values) < 2) return(NA)
|
||
cv <- sd(values) / mean(values)
|
||
return(cv)
|
||
}
|
||
|
||
#' Calculate percentage of field with positive vs negative change
|
||
calculate_change_percentages <- function(current_values, previous_values) {
|
||
if (length(current_values) != length(previous_values)) {
|
||
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
|
||
}
|
||
|
||
change_values <- current_values - previous_values
|
||
valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)]
|
||
|
||
if (length(valid_changes) < 2) {
|
||
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
|
||
}
|
||
|
||
positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100
|
||
negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100
|
||
stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100
|
||
|
||
return(list(
|
||
positive_pct = positive_pct,
|
||
negative_pct = negative_pct,
|
||
stable_pct = stable_pct
|
||
))
|
||
}
|
||
|
||
#' Calculate spatial autocorrelation (Moran's I) for a field
|
||
calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) {
|
||
tryCatch({
|
||
field_raster <- terra::crop(ci_raster, field_boundary)
|
||
field_raster <- terra::mask(field_raster, field_boundary)
|
||
raster_points <- terra::as.points(field_raster, na.rm = TRUE)
|
||
|
||
if (length(raster_points) < 10) {
|
||
return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data"))
|
||
}
|
||
|
||
points_sf <- sf::st_as_sf(raster_points)
|
||
coords <- sf::st_coordinates(points_sf)
|
||
k_neighbors <- min(8, max(4, floor(nrow(coords) / 10)))
|
||
|
||
knn_nb <- spdep::knearneigh(coords, k = k_neighbors)
|
||
knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE)
|
||
|
||
ci_values <- points_sf[[1]]
|
||
moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE)
|
||
|
||
morans_i <- moran_result$estimate[1]
|
||
p_value <- moran_result$p.value
|
||
|
||
interpretation <- if (is.na(morans_i)) {
|
||
"insufficient_data"
|
||
} else if (p_value > 0.05) {
|
||
"random"
|
||
} else if (morans_i > MORAN_THRESHOLD_HIGH) {
|
||
"very_strong_clustering"
|
||
} else if (morans_i > MORAN_THRESHOLD_MODERATE) {
|
||
"strong_clustering"
|
||
} else if (morans_i > MORAN_THRESHOLD_LOW) {
|
||
"normal_continuity"
|
||
} else if (morans_i > 0.3) {
|
||
"weak_clustering"
|
||
} else if (morans_i < -0.3) {
|
||
"dispersed"
|
||
} else {
|
||
"low_autocorrelation"
|
||
}
|
||
|
||
return(list(morans_i = morans_i, p_value = p_value, interpretation = interpretation))
|
||
}, error = function(e) {
|
||
warning(paste("Error calculating spatial autocorrelation:", e$message))
|
||
return(list(morans_i = NA, p_value = NA, interpretation = "error"))
|
||
})
|
||
}
|
||
|
||
#' Extract CI band from multi-band raster
|
||
extract_ci_values <- function(ci_raster, field_vect) {
|
||
extracted <- terra::extract(ci_raster, field_vect, fun = NULL)
|
||
|
||
if ("CI" %in% names(extracted)) {
|
||
return(extracted[, "CI"])
|
||
} else if (ncol(extracted) > 1) {
|
||
return(extracted[, ncol(extracted)])
|
||
} else {
|
||
# Degenerate case: extracted has only ID column, no CI values
|
||
# Return NA vector of appropriate length instead of the ID column
|
||
return(rep(NA_real_, nrow(extracted)))
|
||
}
|
||
}
|
||
|
||
#' Calculate current and previous week numbers using ISO 8601
|
||
calculate_week_numbers <- function(report_date = Sys.Date()) {
|
||
current_week <- as.numeric(format(report_date, "%V"))
|
||
current_year <- as.numeric(format(report_date, "%G"))
|
||
|
||
previous_week <- current_week - 1
|
||
previous_year <- current_year
|
||
|
||
if (previous_week < 1) {
|
||
# Compute last ISO week of previous year dynamically (some years have 53 weeks)
|
||
previous_year <- current_year - 1
|
||
previous_week <- as.numeric(format(as.Date(paste0(previous_year, "-12-28")), "%V"))
|
||
}
|
||
|
||
return(list(
|
||
current_week = current_week,
|
||
previous_week = previous_week,
|
||
current_year = current_year,
|
||
previous_year = previous_year
|
||
))
|
||
}
|
||
|
||
#' Load field CI raster (handles single-file and per-field architectures)
|
||
load_field_ci_raster <- function(ci_raster_or_obj, field_name, field_vect = NULL) {
|
||
is_per_field <- !is.null(attr(ci_raster_or_obj, "is_per_field")) && attr(ci_raster_or_obj, "is_per_field")
|
||
|
||
if (is_per_field) {
|
||
per_field_dir <- attr(ci_raster_or_obj, "per_field_dir")
|
||
week_file <- attr(ci_raster_or_obj, "week_file")
|
||
field_mosaic_path <- file.path(per_field_dir, field_name, week_file)
|
||
|
||
if (file.exists(field_mosaic_path)) {
|
||
tryCatch({
|
||
field_mosaic <- terra::rast(field_mosaic_path)
|
||
if (terra::nlyr(field_mosaic) >= 5) {
|
||
return(field_mosaic[[5]])
|
||
} else {
|
||
return(field_mosaic[[1]])
|
||
}
|
||
}, error = function(e) {
|
||
return(NULL)
|
||
})
|
||
} else {
|
||
return(NULL)
|
||
}
|
||
} else {
|
||
if (!is.null(field_vect)) {
|
||
return(terra::crop(ci_raster_or_obj, field_vect, mask = TRUE))
|
||
} else {
|
||
return(ci_raster_or_obj)
|
||
}
|
||
}
|
||
}
|
||
|
||
#' Load weekly CI mosaic (single-file or per-field)
|
||
load_weekly_ci_mosaic <- function(week_num, year, mosaic_dir) {
|
||
week_file <- sprintf("week_%02d_%d.tif", week_num, year)
|
||
week_path <- file.path(mosaic_dir, week_file)
|
||
|
||
if (file.exists(week_path)) {
|
||
tryCatch({
|
||
mosaic_raster <- terra::rast(week_path)
|
||
ci_raster <- mosaic_raster[[5]]
|
||
names(ci_raster) <- "CI"
|
||
return(ci_raster)
|
||
}, error = function(e) {
|
||
return(NULL)
|
||
})
|
||
}
|
||
|
||
if (dir.exists(mosaic_dir)) {
|
||
field_dirs <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
|
||
field_dirs <- field_dirs[field_dirs != ""]
|
||
|
||
found_any <- FALSE
|
||
for (field in field_dirs) {
|
||
field_mosaic_path <- file.path(mosaic_dir, field, week_file)
|
||
if (file.exists(field_mosaic_path)) {
|
||
found_any <- TRUE
|
||
break
|
||
}
|
||
}
|
||
|
||
if (found_any) {
|
||
dummy_raster <- terra::rast(nrow=1, ncol=1, vals=NA)
|
||
attr(dummy_raster, "per_field_dir") <- mosaic_dir
|
||
attr(dummy_raster, "week_file") <- week_file
|
||
attr(dummy_raster, "is_per_field") <- TRUE
|
||
return(dummy_raster)
|
||
}
|
||
}
|
||
|
||
return(NULL)
|
||
}
|
||
|
||
#' Prepare predictions with consistent naming
|
||
prepare_predictions <- function(predictions, newdata) {
|
||
return(predictions %>%
|
||
as.data.frame() %>%
|
||
dplyr::rename(predicted_Tcha = ".") %>%
|
||
dplyr::mutate(
|
||
sub_field = newdata$sub_field,
|
||
field = newdata$field,
|
||
Age_days = newdata$DAH,
|
||
total_CI = round(newdata$cumulative_CI, 0),
|
||
predicted_Tcha = round(predicted_Tcha, 0),
|
||
season = newdata$season
|
||
) %>%
|
||
dplyr::select(field, sub_field, Age_days, predicted_Tcha, season) %>%
|
||
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
|
||
)
|
||
}
|
||
|
||
# ============================================================================
|
||
# YIELD PREDICTION KPI (SHARED ML-BASED MODEL FOR ALL CLIENT TYPES)
|
||
# ============================================================================
|
||
|
||
#' Helper function for graceful fallback when training data unavailable
|
||
#'
|
||
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||
#' @return List with summary and field_results (both with NA values)
|
||
create_fallback_result <- function(field_boundaries) {
|
||
# Convert to SpatVector if needed (for terra::project)
|
||
if (!inherits(field_boundaries, "SpatVector")) {
|
||
field_boundaries <- terra::vect(field_boundaries)
|
||
}
|
||
field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection
|
||
field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares
|
||
total_area <- sum(field_areas)
|
||
|
||
summary_result <- data.frame(
|
||
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
|
||
count = c(0, 0, 0, nrow(field_boundaries)),
|
||
value = c(NA_real_, NA_real_, NA_real_, round(total_area, 1)),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
field_results <- data.frame(
|
||
field = character(0),
|
||
sub_field = character(0),
|
||
Age_days = numeric(0),
|
||
yield_forecast_t_ha = numeric(0),
|
||
season = numeric(0),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
return(list(summary = summary_result, field_results = field_results))
|
||
}
|
||
|
||
#' Calculate yield prediction KPI using Random Forest with Feature Selection
|
||
#'
|
||
#' Trains a Random Forest model on historical harvest data with cumulative CI,
|
||
#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
|
||
#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
|
||
#'
|
||
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||
#' @param harvesting_data Data frame with harvest data including tonnage_ha column
|
||
#' @param cumulative_CI_vals_dir Directory containing "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||
#'
|
||
#' @return List with:
|
||
#' - summary: Data frame with field_groups, count, value (quartiles and total area)
|
||
#' - field_results: Data frame with field-level yield forecasts (yield_forecast_t_ha)
|
||
#'
|
||
#' @details
|
||
#' **Training Data Requirements:**
|
||
#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||
#' - harvesting_data must have tonnage_ha column with numeric yield values
|
||
#' - Training stops gracefully if either is missing (returns NA values, not fake numbers)
|
||
#'
|
||
#' **Model Specifications:**
|
||
#' - Algorithm: Random Forest (caret + CAST)
|
||
#' - Feature Selection: Forward Feature Selection (CAST::ffs)
|
||
#' - Cross-validation: 5-fold CV
|
||
#' - Predictors: cumulative_CI, DAH, CI_per_day
|
||
#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
|
||
#' - Output: Field-level yield forecasts grouped by quartile
|
||
#'
|
||
#' **Error Handling:**
|
||
#' - Missing tonnage_ha: Returns graceful fallback with NA (not zero) values
|
||
#' - No training data: Logs WARNING, returns empty field_results
|
||
#' - RDS file missing: Returns graceful fallback
|
||
#' - Prediction errors: Wrapped in tryCatch, returns fallback on failure
|
||
calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) {
|
||
safe_log("Calculating yield prediction KPI using Random Forest with Feature Selection")
|
||
|
||
tryCatch({
|
||
# Check if tonnage_ha column is present and has valid data
|
||
if (is.null(harvesting_data) ||
|
||
!("tonnage_ha" %in% names(harvesting_data)) ||
|
||
all(is.na(harvesting_data$tonnage_ha))) {
|
||
safe_log("No harvest data available: lacking tonnage_ha column or all values are NA", "WARNING")
|
||
return(create_fallback_result(field_boundaries))
|
||
}
|
||
|
||
# Check if CI quadrant RDS file exists
|
||
ci_quadrant_path <- file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||
if (!file.exists(ci_quadrant_path)) {
|
||
safe_log(paste("CI quadrant file not found at:", ci_quadrant_path), "WARNING")
|
||
return(create_fallback_result(field_boundaries))
|
||
}
|
||
|
||
# Load CI quadrant data and fill missing field/sub_field values
|
||
CI_quadrant <- readRDS(ci_quadrant_path) %>%
|
||
dplyr::group_by(model) %>%
|
||
tidyr::fill(field, sub_field, .direction = "downup") %>%
|
||
dplyr::ungroup()
|
||
|
||
# Rename year column to season for consistency
|
||
harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year)
|
||
|
||
# Join CI and yield data
|
||
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed,
|
||
by = c("field", "sub_field", "season")) %>%
|
||
dplyr::group_by(sub_field, season) %>%
|
||
dplyr::slice(which.max(DAH)) %>%
|
||
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
|
||
dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_))
|
||
|
||
|
||
# Define predictors and response variables
|
||
predictors <- c("cumulative_CI", "DAH", "CI_per_day")
|
||
response <- "tonnage_ha"
|
||
|
||
# Prepare training dataset (fields with harvest data)
|
||
CI_and_yield_test <- CI_and_yield %>%
|
||
as.data.frame() %>%
|
||
dplyr::filter(!is.na(tonnage_ha))
|
||
|
||
# Check if we have minimum training data
|
||
if (nrow(CI_and_yield_test) == 0) {
|
||
safe_log("No training data available: no fields with tonnage_ha observations", "WARNING")
|
||
return(create_fallback_result(field_boundaries))
|
||
}
|
||
|
||
# Pre-clean training data: remove rows with any NAs in predictors or response
|
||
# This is required because CAST::ffs doesn't support na.rm parameter
|
||
CI_and_yield_test <- CI_and_yield_test %>%
|
||
dplyr::filter(!dplyr::if_any(dplyr::all_of(c(predictors, response)), is.na))
|
||
|
||
if (nrow(CI_and_yield_test) == 0) {
|
||
safe_log("No complete training data after removing NAs in predictors/response", "WARNING")
|
||
return(create_fallback_result(field_boundaries))
|
||
}
|
||
|
||
# Prepare prediction dataset (fields without harvest data, mature fields only)
|
||
prediction_yields <- CI_and_yield %>%
|
||
as.data.frame() %>%
|
||
dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only
|
||
|
||
# Configure model training parameters
|
||
ctrl <- caret::trainControl(
|
||
method = "cv",
|
||
savePredictions = TRUE,
|
||
allowParallel = TRUE,
|
||
number = 5,
|
||
verboseIter = FALSE
|
||
)
|
||
|
||
# Train the model with forward feature selection
|
||
set.seed(202) # For reproducibility
|
||
safe_log("Training Random Forest model with Forward Feature Selection...")
|
||
model_ffs_rf <- CAST::ffs(
|
||
CI_and_yield_test[, predictors],
|
||
CI_and_yield_test[, response],
|
||
method = "rf",
|
||
trControl = ctrl,
|
||
importance = TRUE,
|
||
withinSE = TRUE,
|
||
tuneLength = 5
|
||
)
|
||
|
||
# Predict yields on validation data (same as training data for RMSE calculation)
|
||
pred_ffs_rf <- prepare_predictions(
|
||
stats::predict(model_ffs_rf, newdata = CI_and_yield_test),
|
||
CI_and_yield_test
|
||
)
|
||
|
||
# Extract cross-validated RMSE from the model object (more honest than in-sample RMSE)
|
||
# The CAST::ffs model stores CV results in $results data frame
|
||
# We extract the RMSE from the best model (lowest RMSE across folds)
|
||
if (!is.null(model_ffs_rf$results) && "RMSE" %in% names(model_ffs_rf$results)) {
|
||
# Get minimum RMSE from cross-validation results (best model from feature selection)
|
||
rmse_value <- min(model_ffs_rf$results$RMSE, na.rm = TRUE)
|
||
safe_log(paste("Yield prediction RMSE (cross-validated):", round(rmse_value, 2), "t/ha"))
|
||
} else {
|
||
# Fallback: compute in-sample RMSE if CV results unavailable, but label it clearly
|
||
rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_test$tonnage_ha)^2, na.rm = TRUE))
|
||
safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
|
||
}
|
||
|
||
# Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
|
||
if (nrow(prediction_yields) > 0) {
|
||
pred_rf_current_season <- prepare_predictions(
|
||
stats::predict(model_ffs_rf, newdata = prediction_yields),
|
||
prediction_yields
|
||
) %>%
|
||
dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
|
||
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
|
||
} else {
|
||
pred_rf_current_season <- data.frame()
|
||
}
|
||
|
||
# Calculate summary statistics for KPI
|
||
if (nrow(pred_rf_current_season) > 0) {
|
||
safe_log(paste("Number of fields with yield predictions:", nrow(pred_rf_current_season)))
|
||
safe_log(paste("Predicted yield range:",
|
||
round(min(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
|
||
"-",
|
||
round(max(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
|
||
"t/ha"))
|
||
|
||
# Calculate quartiles for grouping
|
||
yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha,
|
||
probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
|
||
|
||
# Count fields in each quartile
|
||
top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE)
|
||
average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] &
|
||
pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE)
|
||
lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE)
|
||
|
||
# Calculate total area
|
||
if (!inherits(field_boundaries, "SpatVector")) {
|
||
field_boundaries_vect <- terra::vect(field_boundaries)
|
||
} else {
|
||
field_boundaries_vect <- field_boundaries
|
||
}
|
||
|
||
# Handle both sf and SpatVector inputs for area calculation
|
||
if (inherits(field_boundaries, "sf")) {
|
||
field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933")
|
||
field_areas <- sf::st_area(field_boundaries_projected) / 10000 # m² to hectares
|
||
} else {
|
||
field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933")
|
||
field_areas <- terra::expanse(field_boundaries_projected) / 10000
|
||
}
|
||
total_area <- sum(as.numeric(field_areas))
|
||
|
||
safe_log(paste("Total area:", round(total_area, 1), "hectares"))
|
||
|
||
# Build summary result
|
||
result <- data.frame(
|
||
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
|
||
count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)),
|
||
value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1),
|
||
round(yield_quartiles[1], 1), round(total_area, 1)),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
# Prepare field-level results
|
||
field_level_results <- pred_rf_current_season %>%
|
||
dplyr::select(field, Age_days, predicted_Tcha, season) %>%
|
||
dplyr::rename(yield_forecast_t_ha = predicted_Tcha)
|
||
|
||
safe_log("✓ Yield prediction complete")
|
||
return(list(summary = result, field_results = field_level_results))
|
||
} else {
|
||
safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
|
||
return(list(summary = create_fallback_result(field_boundaries)$summary,
|
||
field_results = data.frame()))
|
||
}
|
||
|
||
}, error = function(e) {
|
||
safe_log(paste("Error in yield prediction:", e$message), "ERROR")
|
||
return(create_fallback_result(field_boundaries))
|
||
})
|
||
}
|