SmartCane/r_app/80_calculate_kpis.R

1078 lines
35 KiB
R

# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION)
# ============================================================================
# UNIFIED KPI CALCULATION SCRIPT
#
# This script combines:
# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses)
# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators)
#
# FEATURES:
# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.)
# - Farm-level KPI calculation (6 metrics for executive overview)
# - Parallel processing (tile-aware, 1000+ fields supported)
# - Comprehensive Excel + RDS + CSV exports
# - Test mode for development
#
# COMMAND-LINE USAGE:
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
# Arguments: [end_date] [project_dir]
#
# Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
# Arguments: [end_date] [project_dir] [offset_days]
#
# Usage in run_full_pipeline.R:
# source("r_app/80_calculate_kpis.R")
# main()
# ============================================================================
# *** CONFIGURATION SECTION - MANUALLY DEFINED THRESHOLDS ***
# ============================================================================
# TEST MODE (for development with limited historical data)
TEST_MODE <- TRUE
TEST_MODE_NUM_WEEKS <- 2
# FOUR-WEEK TREND THRESHOLDS
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5
# CV TREND THRESHOLDS
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
# CLOUD COVER ROUNDING INTERVALS
CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100)
# PERCENTILE CALCULATIONS
CI_PERCENTILE_LOW <- 0.10
CI_PERCENTILE_HIGH <- 0.90
# HISTORICAL DATA LOOKBACK
WEEKS_FOR_FOUR_WEEK_TREND <- 4
WEEKS_FOR_CV_TREND_SHORT <- 2
WEEKS_FOR_CV_TREND_LONG <- 8
# ============================================================================
# 1. Load required libraries
# ============================================================================
suppressPackageStartupMessages({
library(here)
library(sf)
library(terra)
library(dplyr)
library(tidyr)
library(lubridate)
library(readr)
library(readxl)
library(writexl)
library(purrr)
library(furrr)
library(future)
library(caret)
library(CAST)
library(randomForest)
tryCatch({
library(torch)
}, error = function(e) {
message("Note: torch package not available - harvest model inference will be skipped")
})
})
# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
# ============================================================================
PHASE_DEFINITIONS <- data.frame(
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
age_start = c(0, 4, 17, 39),
age_end = c(6, 16, 39, 200),
stringsAsFactors = FALSE
)
STATUS_TRIGGERS <- data.frame(
trigger = c(
"germination_started",
"germination_complete",
"stress_detected_whole_field",
"strong_recovery",
"growth_on_track",
"maturation_progressing",
"harvest_ready"
),
age_min = c(0, 0, NA, NA, 4, 39, 45),
age_max = c(6, 6, NA, NA, 39, 200, 200),
description = c(
"10% of field CI > 2",
"70% of field CI >= 2",
"CI decline > -1.5 + low CV",
"CI increase > +1.5",
"CI increasing consistently",
"High CI, stable/declining",
"Age 45+ weeks (ready to harvest)"
),
stringsAsFactors = FALSE
)
# ============================================================================
# TILE-AWARE HELPER FUNCTIONS
# ============================================================================
get_tile_ids_for_field <- function(field_geom, tile_grid) {
if (inherits(field_geom, "sf")) {
field_bbox <- sf::st_bbox(field_geom)
field_xmin <- field_bbox["xmin"]
field_xmax <- field_bbox["xmax"]
field_ymin <- field_bbox["ymin"]
field_ymax <- field_bbox["ymax"]
} else if (inherits(field_geom, "SpatVector")) {
field_bbox <- terra::ext(field_geom)
field_xmin <- field_bbox$xmin
field_xmax <- field_bbox$xmax
field_ymin <- field_bbox$ymin
field_ymax <- field_bbox$ymax
} else {
stop("field_geom must be sf or terra::vect object")
}
intersecting_tiles <- tile_grid$id[
!(tile_grid$xmax < field_xmin |
tile_grid$xmin > field_xmax |
tile_grid$ymax < field_ymin |
tile_grid$ymin > field_ymax)
]
return(as.numeric(intersecting_tiles))
}
load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) {
if (length(tile_ids) == 0) {
return(NULL)
}
tiles_list <- list()
for (tile_id in sort(tile_ids)) {
tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id)
tile_path <- file.path(mosaic_dir, tile_filename)
if (file.exists(tile_path)) {
tryCatch({
tile_rast <- terra::rast(tile_path)
ci_band <- terra::subset(tile_rast, 5)
tiles_list[[length(tiles_list) + 1]] <- ci_band
}, error = function(e) {
message(paste(" Warning: Could not load tile", tile_id, ":", e$message))
})
}
}
if (length(tiles_list) == 0) {
return(NULL)
}
if (length(tiles_list) == 1) {
return(tiles_list[[1]])
} else {
tryCatch({
rsrc <- terra::sprc(tiles_list)
merged <- terra::mosaic(rsrc, fun = "max")
return(merged)
}, error = function(e) {
message(paste(" Warning: Could not merge tiles:", e$message))
return(tiles_list[[1]])
})
}
}
build_tile_grid <- function(mosaic_dir, week_num, year) {
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
if (length(tile_files) == 0) {
stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
}
tile_grid <- data.frame(
id = integer(),
xmin = numeric(),
xmax = numeric(),
ymin = numeric(),
ymax = numeric(),
stringsAsFactors = FALSE
)
for (tile_file in tile_files) {
tryCatch({
matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file)))
if (length(matches) > 0) {
tile_id <- as.integer(sub("_|\\.tif", "", matches[1]))
tile_rast <- terra::rast(tile_file)
tile_ext <- terra::ext(tile_rast)
tile_grid <- rbind(tile_grid, data.frame(
id = tile_id,
xmin = tile_ext$xmin,
xmax = tile_ext$xmax,
ymin = tile_ext$ymin,
ymax = tile_ext$ymax,
stringsAsFactors = FALSE
))
}
}, error = function(e) {
message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message))
})
}
if (nrow(tile_grid) == 0) {
stop("Could not extract extents from any tile files")
}
return(tile_grid)
}
# ============================================================================
# SC-64 ENHANCEMENT FUNCTIONS
# ============================================================================
categorize_four_week_trend <- function(ci_values_list) {
if (is.null(ci_values_list) || length(ci_values_list) < 2) {
return(NA_character_)
}
ci_values_list <- ci_values_list[!is.na(ci_values_list)]
if (length(ci_values_list) < 2) {
return(NA_character_)
}
weekly_changes <- diff(ci_values_list)
avg_weekly_change <- mean(weekly_changes, na.rm = TRUE)
if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) {
return("strong growth")
} else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN &&
avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) {
return("growth")
} else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) {
return("no growth")
} else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN &&
avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
return("decline")
} else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
return("strong decline")
} else {
return("no growth")
}
}
round_cloud_to_intervals <- function(cloud_pct_clear) {
if (is.na(cloud_pct_clear)) {
return(NA_character_)
}
if (cloud_pct_clear < 50) return("<50%")
if (cloud_pct_clear < 60) return("50-60%")
if (cloud_pct_clear < 70) return("60-70%")
if (cloud_pct_clear < 80) return("70-80%")
if (cloud_pct_clear < 90) return("80-90%")
return(">90%")
}
get_ci_percentiles <- function(ci_values) {
if (is.null(ci_values) || length(ci_values) == 0) {
return(NA_character_)
}
ci_values <- ci_values[!is.na(ci_values)]
if (length(ci_values) == 0) {
return(NA_character_)
}
p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE)
p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE)
return(sprintf("%.1f-%.1f", p10, p90))
}
calculate_cv_trend <- function(cv_current, cv_previous) {
if (is.na(cv_current) || is.na(cv_previous)) {
return(NA_real_)
}
return(round(cv_current - cv_previous, 4))
}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
get_phase_by_age <- function(age_weeks) {
if (is.na(age_weeks)) return(NA_character_)
for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
return(PHASE_DEFINITIONS$phase[i])
}
}
return("Unknown")
}
get_status_trigger <- function(ci_values, ci_change, age_weeks) {
if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)
ci_values <- ci_values[!is.na(ci_values)]
if (length(ci_values) == 0) return(NA_character_)
pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
mean_ci <- mean(ci_values, na.rm = TRUE)
if (age_weeks >= 0 && age_weeks <= 6) {
if (pct_at_or_above_2 >= 70) {
return("germination_complete")
} else if (pct_above_2 > 10) {
return("germination_started")
}
}
if (age_weeks >= 45) {
return("harvest_ready")
}
if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
return("stress_detected_whole_field")
}
if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
return("strong_recovery")
}
if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
return("growth_on_track")
}
if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
return("maturation_progressing")
}
return(NA_character_)
}
load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4) {
historical_data <- list()
loaded_weeks <- c()
for (lookback in 0:(num_weeks - 1)) {
target_week <- current_week - lookback
if (target_week < 1) target_week <- target_week + 52
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv")
csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
if (file.exists(csv_path)) {
tryCatch({
data <- read_csv(csv_path, show_col_types = FALSE)
historical_data[[lookback + 1]] <- list(
week = target_week,
data = data
)
loaded_weeks <- c(loaded_weeks, target_week)
}, error = function(e) {
message(paste(" Warning: Could not load week", target_week, ":", e$message))
})
}
}
if (length(historical_data) == 0) {
message(paste("Warning: No historical field data found for trend calculations"))
return(NULL)
}
message(paste("Loaded", length(historical_data), "weeks of historical data:",
paste(loaded_weeks, collapse = ", ")))
return(historical_data)
}
USE_UNIFORM_AGE <- TRUE
UNIFORM_PLANTING_DATE <- as.Date("2025-01-01")
extract_planting_dates <- function(harvesting_data) {
if (USE_UNIFORM_AGE) {
message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
return(data.frame(
field_id = character(),
planting_date = as.Date(character()),
stringsAsFactors = FALSE
))
}
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
message("Warning: No harvesting data available.")
return(NULL)
}
tryCatch({
planting_dates <- harvesting_data %>%
arrange(field, desc(season_start)) %>%
distinct(field, .keep_all = TRUE) %>%
select(field, season_start) %>%
rename(field_id = field, planting_date = season_start) %>%
filter(!is.na(planting_date)) %>%
as.data.frame()
message(paste("Extracted planting dates for", nrow(planting_dates), "fields"))
return(planting_dates)
}, error = function(e) {
message(paste("Error extracting planting dates:", e$message))
return(NULL)
})
}
# ============================================================================
# PARALLEL FIELD ANALYSIS FUNCTION
# ============================================================================
analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year,
mosaic_dir, historical_data = NULL, planting_dates = NULL,
report_date = Sys.Date(), harvest_imminence_data = NULL,
harvesting_data = NULL) {
tryCatch({
field_id <- field_boundaries_sf$field[field_idx]
farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) {
field_boundaries_sf$sub_area[field_idx]
} else {
NA_character_
}
field_name <- field_id
field_sf <- field_boundaries_sf[field_idx, ]
if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) {
return(data.frame(
Field_id = field_id,
error = "Empty or invalid geometry"
))
}
field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
field_area_acres <- field_area_ha / 0.404686
tile_ids <- get_tile_ids_for_field(field_sf, tile_grid)
current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir)
if (is.null(current_ci)) {
return(data.frame(
Field_id = field_id,
error = "No tile data available"
))
}
field_vect <- terra::vect(sf::as_Spatial(field_sf))
terra::crs(field_vect) <- terra::crs(current_ci)
all_extracted <- terra::extract(current_ci, field_vect)[, 2]
current_ci_vals <- all_extracted[!is.na(all_extracted)]
num_total <- length(all_extracted)
num_data <- sum(!is.na(all_extracted))
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
cloud_cat <- if (num_data == 0) "No image available"
else if (pct_clear >= 99.5) "Clear view"
else "Partial coverage"
cloud_pct <- 100 - pct_clear
cloud_interval <- round_cloud_to_intervals(pct_clear)
if (length(current_ci_vals) == 0) {
return(data.frame(
Field_id = field_id,
error = "No CI values extracted"
))
}
mean_ci_current <- mean(current_ci_vals, na.rm = TRUE)
ci_std <- sd(current_ci_vals, na.rm = TRUE)
cv_current <- ci_std / mean_ci_current
range_min <- min(current_ci_vals, na.rm = TRUE)
range_max <- max(current_ci_vals, na.rm = TRUE)
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
ci_percentiles_str <- get_ci_percentiles(current_ci_vals)
weekly_ci_change <- NA
previous_ci_vals <- NULL
tryCatch({
previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir)
if (!is.null(previous_ci)) {
prev_extracted <- terra::extract(previous_ci, field_vect)[, 2]
previous_ci_vals <- prev_extracted[!is.na(prev_extracted)]
if (length(previous_ci_vals) > 0) {
mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE)
weekly_ci_change <- mean_ci_current - mean_ci_previous
}
}
}, error = function(e) {
# Silent fail
})
if (is.na(weekly_ci_change)) {
weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std)
} else {
weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ%.1f)", mean_ci_current, ci_std, weekly_ci_change)
}
age_weeks <- NA
if (!is.null(planting_dates) && nrow(planting_dates) > 0) {
field_planting <- planting_dates %>%
filter(field_id == !!field_id) %>%
pull(planting_date)
if (length(field_planting) > 0) {
age_weeks <- as.numeric(difftime(report_date, field_planting[1], units = "weeks"))
}
}
if (USE_UNIFORM_AGE) {
age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
}
pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100
pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100
germination_progress_str <- NA_character_
if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) {
germination_progress_str <- sprintf("%.0f%%", pct_ci_ge_2)
}
phase <- "Unknown"
imminent_prob_val <- NA
if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
imminence_row <- harvest_imminence_data %>%
filter(field_id == !!field_id)
if (nrow(imminence_row) > 0) {
imminent_prob_val <- imminence_row$probability[1]
if (imminent_prob_val > 0.5) {
phase <- "Harvest Imminent (Model)"
}
}
}
if (phase == "Unknown") {
phase <- get_phase_by_age(age_weeks)
}
status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks)
nmr_weeks_in_phase <- 1
four_week_trend <- NA_character_
ci_values_for_trend <- c(mean_ci_current)
if (!is.null(historical_data) && length(historical_data) > 0) {
for (hist in historical_data) {
hist_week <- hist$week
hist_data <- hist$data
field_row <- hist_data %>%
filter(Field_id == !!field_id)
if (nrow(field_row) > 0 && !is.na(field_row$Mean_CI[1])) {
ci_values_for_trend <- c(field_row$Mean_CI[1], ci_values_for_trend)
}
}
if (length(ci_values_for_trend) >= 2) {
four_week_trend <- categorize_four_week_trend(ci_values_for_trend)
}
}
cv_trend_short <- NA_real_
cv_trend_long <- NA_real_
if (!is.null(historical_data) && length(historical_data) > 0) {
if (length(historical_data) >= 2) {
cv_2w <- historical_data[[2]]$data %>%
filter(Field_id == !!field_id) %>%
pull(CV)
if (length(cv_2w) > 0 && !is.na(cv_2w[1])) {
cv_trend_short <- calculate_cv_trend(cv_current, cv_2w[1])
}
}
if (length(historical_data) >= 8) {
cv_8w <- historical_data[[8]]$data %>%
filter(Field_id == !!field_id) %>%
pull(CV)
if (length(cv_8w) > 0 && !is.na(cv_8w[1])) {
cv_trend_long <- calculate_cv_trend(cv_current, cv_8w[1])
}
}
}
last_harvest_date <- NA_character_
if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
last_harvest_row <- harvesting_data %>%
filter(field == !!field_id) %>%
arrange(desc(season_start)) %>%
slice(1)
if (nrow(last_harvest_row) > 0 && !is.na(last_harvest_row$season_start[1])) {
last_harvest_date <- as.character(last_harvest_row$season_start[1])
}
}
result <- data.frame(
Field_id = field_id,
Farm_Section = farm_section,
Field_name = field_name,
Hectare = round(field_area_ha, 2),
Acreage = round(field_area_acres, 2),
Mean_CI = round(mean_ci_current, 2),
Weekly_ci_change = if (is.na(weekly_ci_change)) NA_real_ else round(weekly_ci_change, 2),
Weekly_ci_change_str = weekly_ci_change_str,
Four_week_trend = four_week_trend,
Last_harvest_or_planting_date = last_harvest_date,
Age_week = if (is.na(age_weeks)) NA_integer_ else as.integer(round(age_weeks)),
`Phase (age based)` = phase,
nmr_weeks_in_this_phase = nmr_weeks_in_phase,
Germination_progress = germination_progress_str,
Imminent_prob = imminent_prob_val,
Status_trigger = status_trigger,
CI_range = range_str,
CI_Percentiles = ci_percentiles_str,
CV = round(cv_current, 4),
CV_Trend_Short_Term = cv_trend_short,
CV_Trend_Long_Term = cv_trend_long,
Cloud_pct_clear = pct_clear,
Cloud_pct_clear_interval = cloud_interval,
Cloud_pct = cloud_pct,
Cloud_category = cloud_cat,
stringsAsFactors = FALSE
)
return(result)
}, error = function(e) {
message(paste("Error analyzing field", field_idx, ":", e$message))
return(data.frame(
Field_id = NA_character_,
error = e$message
))
})
}
# ============================================================================
# SUMMARY GENERATION
# ============================================================================
generate_field_analysis_summary <- function(field_df) {
message("Generating summary statistics...")
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)
unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE)
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
total_fields <- nrow(field_df)
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
summary_df <- data.frame(
Category = c(
"--- PHASE DISTRIBUTION ---",
"Germination",
"Tillering",
"Grand Growth",
"Maturation",
"Unknown phase",
"--- STATUS TRIGGERS ---",
"Harvest ready",
"Stress detected",
"Strong recovery",
"Growth on track",
"Germination complete",
"Germination started",
"No trigger",
"--- CLOUD COVERAGE (FIELDS) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- CLOUD COVERAGE (ACREAGE) ---",
"Clear view",
"Partial coverage",
"No image available",
"--- TOTAL ---",
"Total Acreage"
),
Acreage = c(
NA,
round(germination_acreage, 2),
round(tillering_acreage, 2),
round(grand_growth_acreage, 2),
round(maturation_acreage, 2),
round(unknown_phase_acreage, 2),
NA,
round(harvest_ready_acreage, 2),
round(stress_acreage, 2),
round(recovery_acreage, 2),
round(growth_on_track_acreage, 2),
round(germination_complete_acreage, 2),
round(germination_started_acreage, 2),
round(no_trigger_acreage, 2),
NA,
paste0(clear_fields, " fields"),
paste0(partial_fields, " fields"),
paste0(no_image_fields, " fields"),
NA,
round(clear_acreage, 2),
round(partial_acreage, 2),
round(no_image_acreage, 2),
NA,
round(total_acreage, 2)
),
stringsAsFactors = FALSE
)
return(summary_df)
}
# ============================================================================
# EXPORT FUNCTIONS
# ============================================================================
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
message("Exporting per-field analysis to Excel, CSV, and RDS...")
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
if (!dir.exists(output_subdir)) {
dir.create(output_subdir, recursive = TRUE)
}
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx")
excel_path <- file.path(output_subdir, excel_filename)
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
sheets <- list(
"Field Data" = field_df,
"Summary" = summary_df
)
write_xlsx(sheets, excel_path)
message(paste("✓ Field analysis Excel exported to:", excel_path))
kpi_data <- list(
field_analysis = field_df,
field_analysis_summary = summary_df,
metadata = list(
current_week = current_week,
project = project_dir,
created_at = Sys.time()
)
)
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
rds_path <- file.path(reports_dir, "kpis", rds_filename)
saveRDS(kpi_data, rds_path)
message(paste("✓ Field analysis RDS exported to:", rds_path))
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
csv_path <- file.path(output_subdir, csv_filename)
write_csv(field_df, csv_path)
message(paste("✓ Field analysis CSV exported to:", csv_path))
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
}
# ============================================================================
# FARM-LEVEL KPI CALCULATION (FROM OLD 09_CALCULATE_KPIS.R)
# ============================================================================
calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf,
harvesting_data, cumulative_CI_vals_dir,
weekly_CI_mosaic, reports_dir) {
message("\n=== CALCULATING FARM-LEVEL KPIs ===")
message("(6 high-level KPI metrics)")
tryCatch({
source(here("r_app", "kpi_utils.R"))
}, error = function(e) {
message(paste("Warning: Could not load kpi_utils.R:", e$message))
message("Farm-level KPIs will be skipped")
return(NULL)
})
if (!exists("calculate_all_kpis")) {
message("Warning: calculate_all_kpis() function not found in kpi_utils.R")
return(NULL)
}
output_dir <- file.path(reports_dir, "kpis")
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
tryCatch({
kpi_results <- calculate_all_kpis(
report_date = report_date,
output_dir = output_dir,
field_boundaries_sf = field_boundaries_sf,
harvesting_data = harvesting_data,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
weekly_CI_mosaic = weekly_CI_mosaic,
reports_dir = reports_dir,
project_dir = project_dir
)
# Print KPI summary
cat("\n=== FARM-LEVEL KPI SUMMARY ===\n")
cat("Report Date:", as.character(kpi_results$metadata$report_date), "\n")
cat("Current Week:", kpi_results$metadata$current_week, "\n")
cat("Previous Week:", kpi_results$metadata$previous_week, "\n")
cat("Total Fields Analyzed:", kpi_results$metadata$total_fields, "\n")
cat("Calculation Time:", as.character(kpi_results$metadata$calculation_time), "\n")
cat("\n--- KPI Metrics ---\n")
cat("Field Uniformity Summary:\n")
print(kpi_results$field_uniformity_summary)
cat("\nArea Change Summary:\n")
print(kpi_results$area_change)
cat("\nTCH Forecasted:\n")
print(kpi_results$tch_forecasted)
cat("\nGrowth Decline Index:\n")
print(kpi_results$growth_decline)
cat("\nWeed Presence Score:\n")
print(kpi_results$weed_presence)
cat("\nGap Filling Score:\n")
print(kpi_results$gap_filling)
return(kpi_results)
}, error = function(e) {
message(paste("Error calculating farm-level KPIs:", e$message))
return(NULL)
})
}
# ============================================================================
# MAIN
# ============================================================================
main <- function() {
# Parse command-line arguments
args <- commandArgs(trailingOnly = TRUE)
# end_date (arg 1)
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
as.Date(args[1])
} else if (exists("end_date_str", envir = .GlobalEnv)) {
as.Date(get("end_date_str", envir = .GlobalEnv))
} else {
Sys.Date()
}
# project_dir (arg 2)
project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
as.character(args[2])
} else if (exists("project_dir", envir = .GlobalEnv)) {
get("project_dir", envir = .GlobalEnv)
} else {
"angata"
}
# offset (arg 3) - for backward compatibility with old 09
offset <- if (length(args) >= 3 && !is.na(args[3])) {
as.numeric(args[3])
} else {
7
}
assign("project_dir", project_dir, envir = .GlobalEnv)
assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
message("\n" %+% strrep("=", 70))
message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
message(strrep("=", 70))
message("Date:", format(end_date, "%Y-%m-%d"))
message("Project:", project_dir)
message("Mode: Per-field analysis (SC-64) + Farm-level KPIs")
message("")
# Load configuration and utilities
source(here("r_app", "crop_messaging_utils.R"))
tryCatch({
source(here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
tryCatch({
source(here("r_app", "30_growth_model_utils.R"))
}, error = function(e) {
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
})
# ========== PER-FIELD ANALYSIS (SC-64) ==========
message("\n" %+% strrep("-", 70))
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
message(strrep("-", 70))
current_week <- as.numeric(format(end_date, "%V"))
year <- as.numeric(format(end_date, "%Y"))
previous_week <- current_week - 1
if (previous_week < 1) previous_week <- 52
message(paste("Week:", current_week, "/ Year:", year))
message("Building tile grid from available weekly tiles...")
tile_grid <- build_tile_grid(weekly_tile_max, current_week, year)
message(paste(" Found", nrow(tile_grid), "tiles"))
tryCatch({
boundaries_result <- load_field_boundaries(data_dir)
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
field_boundaries_sf <- boundaries_result$field_boundaries_sf
} else {
field_boundaries_sf <- boundaries_result
}
if (nrow(field_boundaries_sf) == 0) {
stop("No fields loaded from boundaries")
}
message(paste(" Loaded", nrow(field_boundaries_sf), "fields"))
}, error = function(e) {
stop("ERROR loading field boundaries: ", e$message)
})
message("Loading historical field data for trend calculations...")
num_weeks_to_load <- if (TEST_MODE) TEST_MODE_NUM_WEEKS else max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
if (TEST_MODE) {
message(paste(" TEST MODE: Loading only", num_weeks_to_load, "weeks"))
}
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, num_weeks = num_weeks_to_load)
planting_dates <- extract_planting_dates(harvesting_data)
message("Setting up parallel processing...")
current_plan <- class(future::plan())[1]
if (current_plan == "sequential") {
num_workers <- parallel::detectCores() - 1
message(paste(" Using", num_workers, "workers"))
future::plan(future::multisession, workers = num_workers)
}
message("Analyzing", nrow(field_boundaries_sf), "fields in parallel...")
field_analysis_list <- furrr::future_map(
seq_len(nrow(field_boundaries_sf)),
~ analyze_single_field(
field_idx = .,
field_boundaries_sf = field_boundaries_sf,
tile_grid = tile_grid,
week_num = current_week,
year = year,
mosaic_dir = weekly_tile_max,
historical_data = historical_data,
planting_dates = planting_dates,
report_date = end_date,
harvest_imminence_data = NULL,
harvesting_data = harvesting_data
),
.progress = TRUE,
.options = furrr::furrr_options(seed = TRUE)
)
field_analysis_df <- dplyr::bind_rows(field_analysis_list)
if (nrow(field_analysis_df) == 0) {
stop("No fields analyzed successfully!")
}
message(paste("✓ Analyzed", nrow(field_analysis_df), "fields"))
summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
export_paths <- export_field_analysis_excel(
field_analysis_df,
summary_statistics_df,
project_dir,
current_week,
reports_dir
)
cat("\n--- Per-field Results (first 10) ---\n")
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_trigger", "Cloud_category")
available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
if (length(available_cols) > 0) {
print(head(field_analysis_df[, available_cols], 10))
}
cat("\n--- Summary Statistics ---\n")
print(summary_statistics_df)
# ========== FARM-LEVEL KPI CALCULATION ==========
farm_kpi_results <- calculate_and_export_farm_kpis(
report_date = end_date,
project_dir = project_dir,
field_boundaries_sf = field_boundaries_sf,
harvesting_data = harvesting_data,
cumulative_CI_vals_dir = cumulative_CI_vals_dir,
weekly_CI_mosaic = weekly_CI_mosaic,
reports_dir = reports_dir
)
# ========== FINAL SUMMARY ==========
cat("\n" %+% strrep("=", 70) %+% "\n")
cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
cat(strrep("=", 70) %+% "\n")
cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
cat("Excel export:", export_paths$excel, "\n")
cat("RDS export:", export_paths$rds, "\n")
cat("CSV export:", export_paths$csv, "\n")
if (!is.null(farm_kpi_results)) {
cat("\nFarm-level KPIs: CALCULATED\n")
} else {
cat("\nFarm-level KPIs: SKIPPED (kpi_utils.R not available)\n")
}
cat("\n✓ Consolidated KPI calculation complete!\n")
cat(" - Per-field data exported\n")
cat(" - Farm-level KPIs calculated\n")
cat(" - All outputs in:", reports_dir, "\n\n")
}
if (sys.nframe() == 0) {
main()
}