- Updated `detect_mosaic_mode` function to check for grid-size subdirectories in addition to tile-named files. - Added comprehensive tests for DOY reset logic in `test_doy_logic.py`. - Implemented feature extraction tests in `test_feature_extraction.py`. - Created tests for growing window method in `test_growing_window_only.py`. - Developed a complete model inference test in `test_model_inference.py`. - Added a debug script for testing two-step refinement logic in `test_script22_debug.py`.
1595 lines
55 KiB
R
1595 lines
55 KiB
R
# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION)
|
|
# ============================================================================
|
|
# UNIFIED KPI CALCULATION SCRIPT
|
|
#
|
|
# This script combines:
|
|
# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses)
|
|
# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators)
|
|
#
|
|
# FEATURES:
|
|
# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.)
|
|
# - Farm-level KPI calculation (6 metrics for executive overview)
|
|
# - Parallel processing (tile-aware, 1000+ fields supported)
|
|
# - Comprehensive Excel + RDS + CSV exports
|
|
# - Test mode for development
|
|
#
|
|
# COMMAND-LINE USAGE:
|
|
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
|
|
# Arguments: [end_date] [project_dir]
|
|
#
|
|
# Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
|
|
# Arguments: [end_date] [project_dir] [offset_days]
|
|
#
|
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7
|
|
#
|
|
# Usage in run_full_pipeline.R:
|
|
# source("r_app/80_calculate_kpis.R")
|
|
# main()
|
|
|
|
# ============================================================================
|
|
# *** CONFIGURATION SECTION - MANUALLY DEFINED THRESHOLDS ***
|
|
# ============================================================================
|
|
|
|
# TEST MODE (for development with limited historical data)
|
|
TEST_MODE <- TRUE
|
|
TEST_MODE_NUM_WEEKS <- 2
|
|
|
|
# FOUR-WEEK TREND THRESHOLDS
|
|
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
|
|
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
|
|
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
|
|
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
|
|
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
|
|
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
|
|
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5
|
|
|
|
# CV TREND THRESHOLDS
|
|
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
|
|
|
|
# CLOUD COVER ROUNDING INTERVALS
|
|
CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100)
|
|
|
|
# PERCENTILE CALCULATIONS
|
|
CI_PERCENTILE_LOW <- 0.10
|
|
CI_PERCENTILE_HIGH <- 0.90
|
|
|
|
# HISTORICAL DATA LOOKBACK
|
|
WEEKS_FOR_FOUR_WEEK_TREND <- 4
|
|
WEEKS_FOR_CV_TREND_SHORT <- 2
|
|
WEEKS_FOR_CV_TREND_LONG <- 8
|
|
|
|
# ============================================================================
|
|
# 1. Load required libraries
|
|
# ============================================================================
|
|
|
|
suppressPackageStartupMessages({
|
|
library(here)
|
|
library(sf)
|
|
library(terra)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(readr)
|
|
library(readxl)
|
|
library(writexl)
|
|
library(purrr)
|
|
library(furrr)
|
|
library(future)
|
|
library(caret)
|
|
library(CAST)
|
|
library(randomForest)
|
|
tryCatch({
|
|
library(torch)
|
|
}, error = function(e) {
|
|
message("Note: torch package not available - harvest model inference will be skipped")
|
|
})
|
|
})
|
|
|
|
# ============================================================================
|
|
# PHASE AND STATUS TRIGGER DEFINITIONS
|
|
# ============================================================================
|
|
|
|
PHASE_DEFINITIONS <- data.frame(
|
|
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
|
|
age_start = c(0, 4, 17, 39),
|
|
age_end = c(6, 16, 39, 200),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
STATUS_TRIGGERS <- data.frame(
|
|
trigger = c(
|
|
"germination_started",
|
|
"germination_complete",
|
|
"stress_detected_whole_field",
|
|
"strong_recovery",
|
|
"growth_on_track",
|
|
"maturation_progressing",
|
|
"harvest_ready"
|
|
),
|
|
age_min = c(0, 0, NA, NA, 4, 39, 45),
|
|
age_max = c(6, 6, NA, NA, 39, 200, 200),
|
|
description = c(
|
|
"10% of field CI > 2",
|
|
"70% of field CI >= 2",
|
|
"CI decline > -1.5 + low CV",
|
|
"CI increase > +1.5",
|
|
"CI increasing consistently",
|
|
"High CI, stable/declining",
|
|
"Age 45+ weeks (ready to harvest)"
|
|
),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
# ============================================================================
|
|
# TILE-AWARE HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) {
|
|
if (inherits(field_geom, "sf")) {
|
|
field_bbox <- sf::st_bbox(field_geom)
|
|
field_xmin <- field_bbox["xmin"]
|
|
field_xmax <- field_bbox["xmax"]
|
|
field_ymin <- field_bbox["ymin"]
|
|
field_ymax <- field_bbox["ymax"]
|
|
} else if (inherits(field_geom, "SpatVector")) {
|
|
field_bbox <- terra::ext(field_geom)
|
|
field_xmin <- field_bbox$xmin
|
|
field_xmax <- field_bbox$xmax
|
|
field_ymin <- field_bbox$ymin
|
|
field_ymax <- field_bbox$ymax
|
|
} else {
|
|
stop("field_geom must be sf or terra::vect object")
|
|
}
|
|
|
|
# DEBUG: Print bbox info for first field
|
|
if (!is.null(field_id) && field_id == "1391") {
|
|
message(paste("[DEBUG get_tile_ids] Field bbox - xmin:", field_xmin, "xmax:", field_xmax,
|
|
"ymin:", field_ymin, "ymax:", field_ymax))
|
|
message(paste("[DEBUG get_tile_ids] tile_grid sample: id=", tile_grid$id[1],
|
|
"xmin=", tile_grid$xmin[1], "xmax=", tile_grid$xmax[1],
|
|
"ymin=", tile_grid$ymin[1], "ymax=", tile_grid$ymax[1]))
|
|
message(paste("[DEBUG get_tile_ids] tile_grid CRS:", sf::st_crs(tile_grid)))
|
|
message(paste("[DEBUG get_tile_ids] field CRS:", sf::st_crs(field_geom)))
|
|
}
|
|
|
|
intersecting_tiles <- tile_grid$id[
|
|
!(tile_grid$xmax < field_xmin |
|
|
tile_grid$xmin > field_xmax |
|
|
tile_grid$ymax < field_ymin |
|
|
tile_grid$ymin > field_ymax)
|
|
]
|
|
|
|
return(as.numeric(intersecting_tiles))
|
|
}
|
|
|
|
load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) {
|
|
if (length(tile_ids) == 0) {
|
|
return(NULL)
|
|
}
|
|
|
|
tiles_list <- list()
|
|
for (tile_id in sort(tile_ids)) {
|
|
tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id)
|
|
tile_path <- file.path(mosaic_dir, tile_filename)
|
|
|
|
if (file.exists(tile_path)) {
|
|
tryCatch({
|
|
tile_rast <- terra::rast(tile_path)
|
|
ci_band <- terra::subset(tile_rast, 5)
|
|
tiles_list[[length(tiles_list) + 1]] <- ci_band
|
|
}, error = function(e) {
|
|
message(paste(" Warning: Could not load tile", tile_id, ":", e$message))
|
|
})
|
|
}
|
|
}
|
|
|
|
if (length(tiles_list) == 0) {
|
|
return(NULL)
|
|
}
|
|
|
|
if (length(tiles_list) == 1) {
|
|
return(tiles_list[[1]])
|
|
} else {
|
|
tryCatch({
|
|
rsrc <- terra::sprc(tiles_list)
|
|
merged <- terra::mosaic(rsrc, fun = "max")
|
|
return(merged)
|
|
}, error = function(e) {
|
|
message(paste(" Warning: Could not merge tiles:", e$message))
|
|
return(tiles_list[[1]])
|
|
})
|
|
}
|
|
}
|
|
|
|
build_tile_grid <- function(mosaic_dir, week_num, year) {
|
|
# Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/)
|
|
# First check if mosaic_dir contains grid-size subdirectories
|
|
detected_grid_size <- NA
|
|
if (dir.exists(mosaic_dir)) {
|
|
subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
|
|
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
|
|
|
if (length(grid_patterns) > 0) {
|
|
# Use the first grid-size subdirectory found
|
|
detected_grid_size <- grid_patterns[1]
|
|
mosaic_dir <- file.path(mosaic_dir, detected_grid_size)
|
|
message(paste(" Using grid-size subdirectory:", detected_grid_size))
|
|
}
|
|
}
|
|
|
|
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
|
|
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
|
|
|
if (length(tile_files) == 0) {
|
|
stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
|
|
}
|
|
|
|
tile_grid <- data.frame(
|
|
id = integer(),
|
|
xmin = numeric(),
|
|
xmax = numeric(),
|
|
ymin = numeric(),
|
|
ymax = numeric(),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
for (tile_file in tile_files) {
|
|
tryCatch({
|
|
matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file)))
|
|
if (length(matches) > 0) {
|
|
tile_id <- as.integer(sub("_|\\.tif", "", matches[1]))
|
|
tile_rast <- terra::rast(tile_file)
|
|
tile_ext <- terra::ext(tile_rast)
|
|
tile_grid <- rbind(tile_grid, data.frame(
|
|
id = tile_id,
|
|
xmin = tile_ext$xmin,
|
|
xmax = tile_ext$xmax,
|
|
ymin = tile_ext$ymin,
|
|
ymax = tile_ext$ymax,
|
|
stringsAsFactors = FALSE
|
|
))
|
|
}
|
|
}, error = function(e) {
|
|
message(paste(" Warning: Could not process tile", basename(tile_file), ":", e$message))
|
|
})
|
|
}
|
|
|
|
if (nrow(tile_grid) == 0) {
|
|
stop("Could not extract extents from any tile files")
|
|
}
|
|
|
|
# RETURN BOTH the grid AND the corrected mosaic directory path
|
|
return(list(
|
|
tile_grid = tile_grid,
|
|
mosaic_dir = mosaic_dir,
|
|
grid_size = detected_grid_size
|
|
))
|
|
}
|
|
|
|
# ============================================================================
|
|
# SC-64 ENHANCEMENT FUNCTIONS
|
|
# ============================================================================
|
|
|
|
categorize_four_week_trend <- function(ci_values_list) {
|
|
if (is.null(ci_values_list) || length(ci_values_list) < 2) {
|
|
return(NA_character_)
|
|
}
|
|
|
|
ci_values_list <- ci_values_list[!is.na(ci_values_list)]
|
|
if (length(ci_values_list) < 2) {
|
|
return(NA_character_)
|
|
}
|
|
|
|
weekly_changes <- diff(ci_values_list)
|
|
avg_weekly_change <- mean(weekly_changes, na.rm = TRUE)
|
|
|
|
if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) {
|
|
return("strong growth")
|
|
} else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN &&
|
|
avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) {
|
|
return("growth")
|
|
} else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) {
|
|
return("no growth")
|
|
} else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN &&
|
|
avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
|
|
return("decline")
|
|
} else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
|
|
return("strong decline")
|
|
} else {
|
|
return("no growth")
|
|
}
|
|
}
|
|
|
|
round_cloud_to_intervals <- function(cloud_pct_clear) {
|
|
if (is.na(cloud_pct_clear)) {
|
|
return(NA_character_)
|
|
}
|
|
|
|
if (cloud_pct_clear < 50) return("<50%")
|
|
if (cloud_pct_clear < 60) return("50-60%")
|
|
if (cloud_pct_clear < 70) return("60-70%")
|
|
if (cloud_pct_clear < 80) return("70-80%")
|
|
if (cloud_pct_clear < 90) return("80-90%")
|
|
return(">90%")
|
|
}
|
|
|
|
get_ci_percentiles <- function(ci_values) {
|
|
if (is.null(ci_values) || length(ci_values) == 0) {
|
|
return(NA_character_)
|
|
}
|
|
|
|
ci_values <- ci_values[!is.na(ci_values)]
|
|
if (length(ci_values) == 0) {
|
|
return(NA_character_)
|
|
}
|
|
|
|
p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE)
|
|
p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE)
|
|
|
|
return(sprintf("%.1f-%.1f", p10, p90))
|
|
}
|
|
|
|
calculate_cv_trend <- function(cv_current, cv_previous) {
|
|
if (is.na(cv_current) || is.na(cv_previous)) {
|
|
return(NA_real_)
|
|
}
|
|
return(round(cv_current - cv_previous, 4))
|
|
}
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
get_phase_by_age <- function(age_weeks) {
|
|
if (is.na(age_weeks)) return(NA_character_)
|
|
for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
|
|
if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
|
|
age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
|
|
return(PHASE_DEFINITIONS$phase[i])
|
|
}
|
|
}
|
|
return("Unknown")
|
|
}
|
|
|
|
get_status_trigger <- function(ci_values, ci_change, age_weeks) {
|
|
if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)
|
|
|
|
ci_values <- ci_values[!is.na(ci_values)]
|
|
if (length(ci_values) == 0) return(NA_character_)
|
|
|
|
pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
|
|
pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
|
|
ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
|
|
mean_ci <- mean(ci_values, na.rm = TRUE)
|
|
|
|
if (age_weeks >= 0 && age_weeks <= 6) {
|
|
if (pct_at_or_above_2 >= 70) {
|
|
return("germination_complete")
|
|
} else if (pct_above_2 > 10) {
|
|
return("germination_started")
|
|
}
|
|
}
|
|
|
|
if (age_weeks >= 45) {
|
|
return("harvest_ready")
|
|
}
|
|
|
|
if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
|
|
return("stress_detected_whole_field")
|
|
}
|
|
|
|
if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
|
|
return("strong_recovery")
|
|
}
|
|
|
|
if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
|
|
return("growth_on_track")
|
|
}
|
|
|
|
if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
|
|
return("maturation_progressing")
|
|
}
|
|
|
|
return(NA_character_)
|
|
}
|
|
|
|
load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4) {
|
|
historical_data <- list()
|
|
loaded_weeks <- c()
|
|
|
|
for (lookback in 0:(num_weeks - 1)) {
|
|
target_week <- current_week - lookback
|
|
if (target_week < 1) target_week <- target_week + 52
|
|
|
|
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv")
|
|
csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)
|
|
|
|
if (file.exists(csv_path)) {
|
|
tryCatch({
|
|
data <- read_csv(csv_path, show_col_types = FALSE)
|
|
historical_data[[lookback + 1]] <- list(
|
|
week = target_week,
|
|
data = data
|
|
)
|
|
loaded_weeks <- c(loaded_weeks, target_week)
|
|
}, error = function(e) {
|
|
message(paste(" Warning: Could not load week", target_week, ":", e$message))
|
|
})
|
|
}
|
|
}
|
|
|
|
if (length(historical_data) == 0) {
|
|
message(paste("Warning: No historical field data found for trend calculations"))
|
|
return(NULL)
|
|
}
|
|
|
|
message(paste("Loaded", length(historical_data), "weeks of historical data:",
|
|
paste(loaded_weeks, collapse = ", ")))
|
|
|
|
return(historical_data)
|
|
}
|
|
|
|
USE_UNIFORM_AGE <- TRUE
|
|
UNIFORM_PLANTING_DATE <- as.Date("2025-01-01")
|
|
|
|
extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
|
|
if (USE_UNIFORM_AGE) {
|
|
message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
|
|
# Return a data frame with all field IDs mapped to uniform planting date
|
|
if (!is.null(field_boundaries_sf)) {
|
|
return(data.frame(
|
|
field_id = field_boundaries_sf$field,
|
|
date = rep(UNIFORM_PLANTING_DATE, nrow(field_boundaries_sf)),
|
|
stringsAsFactors = FALSE
|
|
))
|
|
} else {
|
|
# Fallback if field_boundaries_sf not provided
|
|
return(NULL)
|
|
}
|
|
}
|
|
|
|
if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
|
|
message("Warning: No harvesting data available.")
|
|
return(NULL)
|
|
}
|
|
|
|
tryCatch({
|
|
planting_dates <- harvesting_data %>%
|
|
arrange(field, desc(season_start)) %>%
|
|
distinct(field, .keep_all = TRUE) %>%
|
|
select(field, season_start) %>%
|
|
rename(field_id = field, planting_date = season_start) %>%
|
|
filter(!is.na(planting_date)) %>%
|
|
as.data.frame()
|
|
|
|
message(paste("Extracted planting dates for", nrow(planting_dates), "fields"))
|
|
return(planting_dates)
|
|
}, error = function(e) {
|
|
message(paste("Error extracting planting dates:", e$message))
|
|
return(NULL)
|
|
})
|
|
}
|
|
|
|
# ============================================================================
|
|
# PARALLEL FIELD ANALYSIS FUNCTION
|
|
# ============================================================================
|
|
|
|
analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year,
|
|
mosaic_dir, historical_data = NULL, planting_dates = NULL,
|
|
report_date = Sys.Date(), harvest_imminence_data = NULL,
|
|
harvesting_data = NULL) {
|
|
|
|
tryCatch({
|
|
field_id <- field_boundaries_sf$field[field_idx]
|
|
farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) {
|
|
field_boundaries_sf$sub_area[field_idx]
|
|
} else {
|
|
NA_character_
|
|
}
|
|
field_name <- field_id
|
|
|
|
# DEBUG: Print for first few fields
|
|
if (field_idx <= 3) {
|
|
message(paste("[DEBUG] Field", field_idx, ":", field_id))
|
|
}
|
|
|
|
field_sf <- field_boundaries_sf[field_idx, ]
|
|
if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) {
|
|
return(data.frame(
|
|
Field_id = field_id,
|
|
error = "Empty or invalid geometry"
|
|
))
|
|
}
|
|
|
|
field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
|
|
field_area_acres <- field_area_ha / 0.404686
|
|
|
|
tile_ids <- get_tile_ids_for_field(field_sf, tile_grid, field_id = field_id)
|
|
|
|
# DEBUG: Print tile IDs for first field
|
|
if (field_idx == 1) {
|
|
message(paste("[DEBUG] First field tile_ids:", paste(tile_ids, collapse=",")))
|
|
message(paste("[DEBUG] tile_grid nrows:", nrow(tile_grid), "ncols:", ncol(tile_grid)))
|
|
message(paste("[DEBUG] mosaic_dir:", mosaic_dir))
|
|
}
|
|
|
|
current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir)
|
|
|
|
if (is.null(current_ci)) {
|
|
return(data.frame(
|
|
Field_id = field_id,
|
|
error = "No tile data available"
|
|
))
|
|
}
|
|
|
|
# Extract CI values: EXACTLY LIKE SCRIPT 20
|
|
# Crop to field bounding box first, then extract with sf directly (not terra::vect conversion)
|
|
field_bbox <- sf::st_bbox(field_sf)
|
|
ci_cropped <- terra::crop(current_ci, terra::ext(field_bbox), snap = "out")
|
|
extracted_vals <- terra::extract(ci_cropped, field_sf, fun = "mean", na.rm = TRUE)
|
|
|
|
# extracted_vals is a data.frame with ID column (field index) + mean value
|
|
mean_ci_current <- as.numeric(extracted_vals[1, 2])
|
|
|
|
if (is.na(mean_ci_current)) {
|
|
return(data.frame(
|
|
Field_id = field_id,
|
|
error = "No CI values extracted from tiles"
|
|
))
|
|
}
|
|
|
|
# For per-tile extraction, we only have mean from the aggregation function
|
|
# To get variance/CV, we need to extract all pixels without the fun parameter
|
|
# But for farm-level purposes, the mean CI is sufficient
|
|
all_extracted <- terra::extract(ci_cropped, field_sf)[, 2]
|
|
current_ci_vals <- all_extracted[!is.na(all_extracted)]
|
|
|
|
num_total <- length(all_extracted)
|
|
num_data <- sum(!is.na(all_extracted))
|
|
pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
|
|
|
|
cloud_cat <- if (num_data == 0) "No image available"
|
|
else if (pct_clear >= 99.5) "Clear view"
|
|
else "Partial coverage"
|
|
cloud_pct <- 100 - pct_clear
|
|
cloud_interval <- round_cloud_to_intervals(pct_clear)
|
|
|
|
if (length(current_ci_vals) == 0) {
|
|
return(data.frame(
|
|
Field_id = field_id,
|
|
error = "No CI values extracted"
|
|
))
|
|
}
|
|
|
|
mean_ci_current <- mean(current_ci_vals, na.rm = TRUE)
|
|
ci_std <- sd(current_ci_vals, na.rm = TRUE)
|
|
cv_current <- ci_std / mean_ci_current
|
|
range_min <- min(current_ci_vals, na.rm = TRUE)
|
|
range_max <- max(current_ci_vals, na.rm = TRUE)
|
|
range_str <- sprintf("%.1f-%.1f", range_min, range_max)
|
|
|
|
ci_percentiles_str <- get_ci_percentiles(current_ci_vals)
|
|
|
|
weekly_ci_change <- NA
|
|
previous_ci_vals <- NULL
|
|
|
|
tryCatch({
|
|
previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir)
|
|
if (!is.null(previous_ci)) {
|
|
prev_bbox <- sf::st_bbox(field_sf)
|
|
prev_ci_cropped <- terra::crop(previous_ci, terra::ext(prev_bbox), snap = "out")
|
|
prev_extracted <- terra::extract(prev_ci_cropped, field_sf)[, 2]
|
|
previous_ci_vals <- prev_extracted[!is.na(prev_extracted)]
|
|
if (length(previous_ci_vals) > 0) {
|
|
mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE)
|
|
weekly_ci_change <- mean_ci_current - mean_ci_previous
|
|
}
|
|
}
|
|
}, error = function(e) {
|
|
# Silent fail
|
|
})
|
|
|
|
if (is.na(weekly_ci_change)) {
|
|
weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std)
|
|
} else {
|
|
weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ%.1f)", mean_ci_current, ci_std, weekly_ci_change)
|
|
}
|
|
|
|
age_weeks <- NA
|
|
if (!is.null(planting_dates) && nrow(planting_dates) > 0) {
|
|
field_planting <- planting_dates %>%
|
|
filter(field_id == !!field_id) %>%
|
|
pull(planting_date)
|
|
|
|
if (length(field_planting) > 0) {
|
|
age_weeks <- as.numeric(difftime(report_date, field_planting[1], units = "weeks"))
|
|
}
|
|
}
|
|
|
|
if (USE_UNIFORM_AGE) {
|
|
age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
|
|
}
|
|
|
|
pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100
|
|
pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100
|
|
germination_progress_str <- NA_character_
|
|
if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) {
|
|
germination_progress_str <- sprintf("%.0f%%", pct_ci_ge_2)
|
|
}
|
|
|
|
phase <- "Unknown"
|
|
imminent_prob_val <- NA
|
|
if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
|
|
imminence_row <- harvest_imminence_data %>%
|
|
filter(field_id == !!field_id)
|
|
if (nrow(imminence_row) > 0) {
|
|
imminent_prob_val <- imminence_row$probability[1]
|
|
if (imminent_prob_val > 0.5) {
|
|
phase <- "Harvest Imminent (Model)"
|
|
}
|
|
}
|
|
}
|
|
|
|
if (phase == "Unknown") {
|
|
phase <- get_phase_by_age(age_weeks)
|
|
}
|
|
|
|
status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks)
|
|
|
|
nmr_weeks_in_phase <- 1
|
|
|
|
four_week_trend <- NA_character_
|
|
ci_values_for_trend <- c(mean_ci_current)
|
|
|
|
if (!is.null(historical_data) && length(historical_data) > 0) {
|
|
for (hist in historical_data) {
|
|
hist_week <- hist$week
|
|
hist_data <- hist$data
|
|
|
|
field_row <- hist_data %>%
|
|
filter(Field_id == !!field_id)
|
|
|
|
if (nrow(field_row) > 0 && !is.na(field_row$Mean_CI[1])) {
|
|
ci_values_for_trend <- c(field_row$Mean_CI[1], ci_values_for_trend)
|
|
}
|
|
}
|
|
|
|
if (length(ci_values_for_trend) >= 2) {
|
|
four_week_trend <- categorize_four_week_trend(ci_values_for_trend)
|
|
}
|
|
}
|
|
|
|
cv_trend_short <- NA_real_
|
|
cv_trend_long <- NA_real_
|
|
|
|
if (!is.null(historical_data) && length(historical_data) > 0) {
|
|
if (length(historical_data) >= 2) {
|
|
cv_2w <- historical_data[[2]]$data %>%
|
|
filter(Field_id == !!field_id) %>%
|
|
pull(CV)
|
|
if (length(cv_2w) > 0 && !is.na(cv_2w[1])) {
|
|
cv_trend_short <- calculate_cv_trend(cv_current, cv_2w[1])
|
|
}
|
|
}
|
|
|
|
if (length(historical_data) >= 8) {
|
|
cv_8w <- historical_data[[8]]$data %>%
|
|
filter(Field_id == !!field_id) %>%
|
|
pull(CV)
|
|
if (length(cv_8w) > 0 && !is.na(cv_8w[1])) {
|
|
cv_trend_long <- calculate_cv_trend(cv_current, cv_8w[1])
|
|
}
|
|
}
|
|
}
|
|
|
|
last_harvest_date <- NA_character_
|
|
if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
|
|
last_harvest_row <- harvesting_data %>%
|
|
filter(field == !!field_id) %>%
|
|
arrange(desc(season_start)) %>%
|
|
slice(1)
|
|
|
|
if (nrow(last_harvest_row) > 0 && !is.na(last_harvest_row$season_start[1])) {
|
|
last_harvest_date <- as.character(last_harvest_row$season_start[1])
|
|
}
|
|
}
|
|
|
|
result <- data.frame(
|
|
Field_id = field_id,
|
|
Farm_Section = farm_section,
|
|
Field_name = field_name,
|
|
Hectare = round(field_area_ha, 2),
|
|
Acreage = round(field_area_acres, 2),
|
|
Mean_CI = round(mean_ci_current, 2),
|
|
Weekly_ci_change = if (is.na(weekly_ci_change)) NA_real_ else round(weekly_ci_change, 2),
|
|
Weekly_ci_change_str = weekly_ci_change_str,
|
|
Four_week_trend = four_week_trend,
|
|
Last_harvest_or_planting_date = last_harvest_date,
|
|
Age_week = if (is.na(age_weeks)) NA_integer_ else as.integer(round(age_weeks)),
|
|
`Phase (age based)` = phase,
|
|
nmr_weeks_in_this_phase = nmr_weeks_in_phase,
|
|
Germination_progress = germination_progress_str,
|
|
Imminent_prob = imminent_prob_val,
|
|
Status_trigger = status_trigger,
|
|
CI_range = range_str,
|
|
CI_Percentiles = ci_percentiles_str,
|
|
CV = round(cv_current, 4),
|
|
CV_Trend_Short_Term = cv_trend_short,
|
|
CV_Trend_Long_Term = cv_trend_long,
|
|
Cloud_pct_clear = pct_clear,
|
|
Cloud_pct_clear_interval = cloud_interval,
|
|
Cloud_pct = cloud_pct,
|
|
Cloud_category = cloud_cat,
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
return(result)
|
|
|
|
}, error = function(e) {
|
|
message(paste("Error analyzing field", field_idx, ":", e$message))
|
|
return(data.frame(
|
|
Field_id = NA_character_,
|
|
error = e$message
|
|
))
|
|
})
|
|
}
|
|
|
|
# ============================================================================
|
|
# SUMMARY GENERATION
|
|
# ============================================================================
|
|
|
|
generate_field_analysis_summary <- function(field_df) {
|
|
message("Generating summary statistics...")
|
|
|
|
total_acreage <- sum(field_df$Acreage, na.rm = TRUE)
|
|
|
|
germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
|
|
tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
|
|
grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
|
|
maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)
|
|
unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE)
|
|
|
|
harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
|
|
stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
|
|
recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
|
|
growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
|
|
germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
|
|
germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
|
|
no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)
|
|
|
|
clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
|
|
partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
|
|
no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
|
|
total_fields <- nrow(field_df)
|
|
|
|
clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
|
|
partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
|
|
no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
|
|
|
|
summary_df <- data.frame(
|
|
Category = c(
|
|
"--- PHASE DISTRIBUTION ---",
|
|
"Germination",
|
|
"Tillering",
|
|
"Grand Growth",
|
|
"Maturation",
|
|
"Unknown phase",
|
|
"--- STATUS TRIGGERS ---",
|
|
"Harvest ready",
|
|
"Stress detected",
|
|
"Strong recovery",
|
|
"Growth on track",
|
|
"Germination complete",
|
|
"Germination started",
|
|
"No trigger",
|
|
"--- CLOUD COVERAGE (FIELDS) ---",
|
|
"Clear view",
|
|
"Partial coverage",
|
|
"No image available",
|
|
"--- CLOUD COVERAGE (ACREAGE) ---",
|
|
"Clear view",
|
|
"Partial coverage",
|
|
"No image available",
|
|
"--- TOTAL ---",
|
|
"Total Acreage"
|
|
),
|
|
Acreage = c(
|
|
NA,
|
|
round(germination_acreage, 2),
|
|
round(tillering_acreage, 2),
|
|
round(grand_growth_acreage, 2),
|
|
round(maturation_acreage, 2),
|
|
round(unknown_phase_acreage, 2),
|
|
NA,
|
|
round(harvest_ready_acreage, 2),
|
|
round(stress_acreage, 2),
|
|
round(recovery_acreage, 2),
|
|
round(growth_on_track_acreage, 2),
|
|
round(germination_complete_acreage, 2),
|
|
round(germination_started_acreage, 2),
|
|
round(no_trigger_acreage, 2),
|
|
NA,
|
|
paste0(clear_fields, " fields"),
|
|
paste0(partial_fields, " fields"),
|
|
paste0(no_image_fields, " fields"),
|
|
NA,
|
|
round(clear_acreage, 2),
|
|
round(partial_acreage, 2),
|
|
round(no_image_acreage, 2),
|
|
NA,
|
|
round(total_acreage, 2)
|
|
),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
return(summary_df)
|
|
}
|
|
|
|
# ============================================================================
|
|
# EXPORT FUNCTIONS
|
|
# ============================================================================
|
|
|
|
export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
|
|
message("Exporting per-field analysis to Excel, CSV, and RDS...")
|
|
|
|
# Round all numeric columns to 2 decimals
|
|
field_df_rounded <- field_df %>%
|
|
mutate(across(where(is.numeric), ~ round(., 2)))
|
|
|
|
summary_df_rounded <- summary_df %>%
|
|
mutate(across(where(is.numeric), ~ round(., 2)))
|
|
|
|
output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
|
|
if (!dir.exists(output_subdir)) {
|
|
dir.create(output_subdir, recursive = TRUE)
|
|
}
|
|
|
|
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx")
|
|
excel_path <- file.path(output_subdir, excel_filename)
|
|
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
|
|
|
|
sheets <- list(
|
|
"Field Data" = field_df_rounded,
|
|
"Summary" = summary_df_rounded
|
|
)
|
|
|
|
write_xlsx(sheets, excel_path)
|
|
message(paste("✓ Field analysis Excel exported to:", excel_path))
|
|
|
|
kpi_data <- list(
|
|
field_analysis = field_df_rounded,
|
|
field_analysis_summary = summary_df_rounded,
|
|
metadata = list(
|
|
current_week = current_week,
|
|
project = project_dir,
|
|
created_at = Sys.time()
|
|
)
|
|
)
|
|
|
|
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
|
|
rds_path <- file.path(reports_dir, "kpis", rds_filename)
|
|
|
|
saveRDS(kpi_data, rds_path)
|
|
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
|
|
|
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
|
|
csv_path <- file.path(output_subdir, csv_filename)
|
|
write_csv(field_df_rounded, csv_path)
|
|
message(paste("✓ Field analysis CSV exported to:", csv_path))
|
|
|
|
return(list(excel = excel_path, rds = rds_path, csv = csv_path))
|
|
}
|
|
|
|
# ============================================================================
|
|
# TILE-BASED KPI EXTRACTION FUNCTION
|
|
# ============================================================================
|
|
|
|
calculate_field_kpis_from_tiles <- function(tile_dir, week_num, year, field_boundaries_sf, tile_grid) {
|
|
# Loop through tiles, extract KPI statistics per field per tile
|
|
# Follows the same pattern as extract_ci_from_tiles in CI extraction
|
|
|
|
message("Calculating field-level KPI statistics from tiles...")
|
|
|
|
# Get all tile files for this week
|
|
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
|
|
tile_files <- list.files(tile_dir, pattern = tile_pattern, full.names = TRUE)
|
|
|
|
if (length(tile_files) == 0) {
|
|
message("No tiles found for week", week_num, year)
|
|
return(NULL)
|
|
}
|
|
|
|
# Process tiles in parallel using furrr (same as CI extraction)
|
|
message(paste("Processing", length(tile_files), "tiles in parallel..."))
|
|
|
|
field_kpi_list <- furrr::future_map(
|
|
tile_files,
|
|
~ process_single_kpi_tile(
|
|
tile_file = .,
|
|
field_boundaries_sf = field_boundaries_sf,
|
|
tile_grid = tile_grid
|
|
),
|
|
.progress = TRUE,
|
|
.options = furrr::furrr_options(seed = TRUE)
|
|
)
|
|
|
|
# Combine results from all tiles
|
|
field_kpi_stats <- dplyr::bind_rows(field_kpi_list)
|
|
|
|
if (nrow(field_kpi_stats) == 0) {
|
|
message(" No KPI data extracted from tiles")
|
|
return(NULL)
|
|
}
|
|
|
|
message(paste(" Extracted KPI stats for", length(unique(field_kpi_stats$field)), "unique fields"))
|
|
return(field_kpi_stats)
|
|
}
|
|
|
|
# Helper function to process a single tile (like process_single_tile in CI extraction)
|
|
process_single_kpi_tile <- function(tile_file, field_boundaries_sf, tile_grid) {
|
|
tryCatch({
|
|
tile_basename <- basename(tile_file)
|
|
# Load tile raster
|
|
tile_raster <- terra::rast(tile_file)
|
|
|
|
# Get first band (CI band for weekly mosaics)
|
|
ci_band <- tile_raster[[1]]
|
|
|
|
# EXACTLY LIKE SCRIPT 20: Crop to field bounding box first, then extract with sf directly
|
|
field_bbox <- sf::st_bbox(field_boundaries_sf)
|
|
ci_cropped <- terra::crop(ci_band, terra::ext(field_bbox), snap = "out")
|
|
|
|
# Extract CI values for ALL fields at once using sf object directly (NOT terra::vect)
|
|
# terra::extract() works with sf objects and handles geometries properly
|
|
extracted_vals <- terra::extract(ci_cropped, field_boundaries_sf, fun = "mean", na.rm = TRUE)
|
|
|
|
# Initialize results for this tile
|
|
tile_results <- data.frame()
|
|
|
|
# Get tile ID from filename
|
|
tile_id_match <- as.numeric(sub(".*_(\\d{2})\\.tif$", "\\1", tile_basename))
|
|
|
|
# Process each field: extracted_vals is a data.frame with ID column (field indices) + extracted values
|
|
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
|
field_id <- field_boundaries_sf$field[field_idx]
|
|
|
|
# extracted_vals columns: 1=ID, 2=mean_CI (since we used fun="mean")
|
|
mean_ci <- extracted_vals[field_idx, 2]
|
|
|
|
# Skip if no data for this field in this tile
|
|
if (is.na(mean_ci)) {
|
|
next
|
|
}
|
|
|
|
# For tile-level stats, we only have mean from extraction (no variance without all pixels)
|
|
# Add to results
|
|
tile_results <- rbind(tile_results, data.frame(
|
|
field = field_id,
|
|
tile_id = tile_id_match,
|
|
tile_file = tile_basename,
|
|
mean_ci = round(mean_ci, 4),
|
|
stringsAsFactors = FALSE
|
|
))
|
|
}
|
|
|
|
return(tile_results)
|
|
|
|
}, error = function(e) {
|
|
message(paste(" Warning: Error processing tile", basename(tile_file), ":", e$message))
|
|
return(data.frame())
|
|
})
|
|
}
|
|
|
|
calculate_and_export_farm_kpis <- function(report_date, project_dir, field_boundaries_sf,
|
|
harvesting_data, cumulative_CI_vals_dir,
|
|
weekly_CI_mosaic, reports_dir, current_week, year,
|
|
tile_grid, use_tile_mosaic = FALSE, tile_grid_size = "5x5") {
|
|
message("\n=== CALCULATING FARM-LEVEL KPIs ===")
|
|
message("(6 high-level KPI metrics with tile-based extraction)")
|
|
|
|
output_dir <- file.path(reports_dir, "kpis")
|
|
if (!dir.exists(output_dir)) {
|
|
dir.create(output_dir, recursive = TRUE)
|
|
}
|
|
|
|
# Get mosaic directory with grid size if using tiles
|
|
mosaic_dir <- if (use_tile_mosaic && !is.null(tile_grid_size)) {
|
|
file.path(weekly_CI_mosaic, tile_grid_size)
|
|
} else {
|
|
weekly_CI_mosaic
|
|
}
|
|
|
|
# Extract field-level KPI statistics from tiles
|
|
field_kpi_stats <- calculate_field_kpis_from_tiles(
|
|
tile_dir = mosaic_dir,
|
|
week_num = current_week,
|
|
year = year,
|
|
field_boundaries_sf = field_boundaries_sf,
|
|
tile_grid = tile_grid
|
|
)
|
|
|
|
if (is.null(field_kpi_stats) || nrow(field_kpi_stats) == 0) {
|
|
message("Warning: No field KPI statistics extracted from tiles")
|
|
return(NULL)
|
|
}
|
|
|
|
# Aggregate tile-based statistics by field (average across tiles for each field)
|
|
field_summary_stats <- field_kpi_stats %>%
|
|
dplyr::group_by(field) %>%
|
|
dplyr::summarise(
|
|
mean_ci = mean(mean_ci, na.rm = TRUE),
|
|
cv_ci = mean(cv_ci, na.rm = TRUE),
|
|
min_ci = min(min_ci, na.rm = TRUE),
|
|
max_ci = max(max_ci, na.rm = TRUE),
|
|
total_pixels = sum(n_pixels, na.rm = TRUE),
|
|
num_tiles = n_distinct(tile_id),
|
|
.groups = 'drop'
|
|
)
|
|
|
|
# Create results list
|
|
kpi_results <- list(
|
|
field_kpi_stats = field_kpi_stats,
|
|
field_summary_stats = field_summary_stats,
|
|
metadata = list(
|
|
report_date = report_date,
|
|
current_week = current_week,
|
|
year = year,
|
|
calculation_method = "tile_based_extraction",
|
|
num_fields_processed = length(unique(field_kpi_stats$field)),
|
|
num_tiles_processed = length(unique(field_kpi_stats$tile_id))
|
|
)
|
|
)
|
|
|
|
# Save results
|
|
rds_filename <- paste0(project_dir, "_farm_kpi_stats_week", sprintf("%02d", current_week), ".rds")
|
|
rds_path <- file.path(output_dir, rds_filename)
|
|
saveRDS(kpi_results, rds_path)
|
|
message(paste("✓ Farm-level KPI stats exported to:", rds_path))
|
|
|
|
# Print summary
|
|
cat("\n=== FARM-LEVEL KPI SUMMARY ===\n")
|
|
cat("Report Date:", as.character(report_date), "\n")
|
|
cat("Week:", current_week, "Year:", year, "\n")
|
|
cat("Fields Processed:", length(unique(field_kpi_stats$field)), "\n")
|
|
cat("Tiles Processed:", length(unique(field_kpi_stats$tile_id)), "\n")
|
|
cat("\n--- Field Summary Statistics (Mean across tiles) ---\n")
|
|
print(head(field_summary_stats, 20))
|
|
|
|
return(kpi_results)
|
|
}
|
|
|
|
# ============================================================================
|
|
# HELPER: Extract field-level statistics from CI raster (all pixels, single call)
|
|
# ============================================================================
|
|
|
|
extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) {
|
|
#' Extract CI statistics for all fields from a single CI raster band
|
|
#'
|
|
#' This function extracts all pixel values for each field in one terra::extract call,
|
|
#' then calculates mean, CV, and percentiles from those pixels.
|
|
#'
|
|
#' @param ci_band Single CI band from terra raster
|
|
#' @param field_boundaries_sf SF object with field geometries
|
|
#' @return Data frame with columns: field_idx, mean_ci, cv, p10, p90, pixel_count
|
|
|
|
# Extract all pixels for all fields at once (more efficient than individual calls)
|
|
all_pixels <- terra::extract(ci_band, field_boundaries_sf)
|
|
|
|
# Calculate statistics for each field
|
|
stats_list <- list()
|
|
|
|
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
|
# Extract pixel values for this field (skip ID column 1)
|
|
pixels <- all_pixels[field_idx, -1, drop = TRUE]
|
|
pixels <- as.numeric(pixels)
|
|
pixels <- pixels[!is.na(pixels)]
|
|
|
|
# Only calculate stats if we have pixels
|
|
if (length(pixels) > 0) {
|
|
mean_val <- mean(pixels, na.rm = TRUE)
|
|
|
|
# Only calculate CV if mean > 0 (avoid division by zero)
|
|
if (mean_val > 0) {
|
|
cv_val <- sd(pixels, na.rm = TRUE) / mean_val
|
|
} else {
|
|
cv_val <- NA
|
|
}
|
|
|
|
p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]]
|
|
p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]]
|
|
|
|
stats_list[[field_idx]] <- data.frame(
|
|
field_idx = field_idx,
|
|
mean_ci = mean_val,
|
|
cv = cv_val,
|
|
p10 = p10_val,
|
|
p90 = p90_val,
|
|
pixel_count = length(pixels),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
} else {
|
|
# No pixels for this field (doesn't intersect tile)
|
|
stats_list[[field_idx]] <- data.frame(
|
|
field_idx = field_idx,
|
|
mean_ci = NA_real_,
|
|
cv = NA_real_,
|
|
p10 = NA_real_,
|
|
p90 = NA_real_,
|
|
pixel_count = 0,
|
|
stringsAsFactors = FALSE
|
|
)
|
|
}
|
|
}
|
|
|
|
return(dplyr::bind_rows(stats_list))
|
|
}
|
|
|
|
# ============================================================================
|
|
# MAIN
|
|
# ============================================================================
|
|
|
|
main <- function() {
|
|
# Parse command-line arguments
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
|
|
# end_date (arg 1)
|
|
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
|
|
as.Date(args[1])
|
|
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
|
as.Date(get("end_date_str", envir = .GlobalEnv))
|
|
} else {
|
|
Sys.Date()
|
|
}
|
|
|
|
# project_dir (arg 2)
|
|
project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
|
|
as.character(args[2])
|
|
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
|
get("project_dir", envir = .GlobalEnv)
|
|
} else {
|
|
"angata"
|
|
}
|
|
|
|
# offset (arg 3) - for backward compatibility with old 09
|
|
offset <- if (length(args) >= 3 && !is.na(args[3])) {
|
|
as.numeric(args[3])
|
|
} else {
|
|
7
|
|
}
|
|
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
|
|
|
|
message("\n" %+% strrep("=", 70))
|
|
message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
|
|
message(strrep("=", 70))
|
|
message("Date:", format(end_date, "%Y-%m-%d"))
|
|
message("Project:", project_dir)
|
|
message("Mode: Per-field analysis (SC-64) + Farm-level KPIs")
|
|
message("")
|
|
|
|
# Load configuration and utilities
|
|
# source(here("r_app", "crop_messaging_utils.R"))
|
|
|
|
tryCatch({
|
|
source(here("r_app", "parameters_project.R"))
|
|
}, error = function(e) {
|
|
stop("Error loading parameters_project.R: ", e$message)
|
|
})
|
|
|
|
tryCatch({
|
|
source(here("r_app", "30_growth_model_utils.R"))
|
|
}, error = function(e) {
|
|
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
|
|
})
|
|
|
|
# ========== PER-FIELD ANALYSIS (SC-64) ==========
|
|
|
|
message("\n" %+% strrep("-", 70))
|
|
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
|
|
message(strrep("-", 70))
|
|
|
|
current_week <- as.numeric(format(end_date, "%V"))
|
|
year <- as.numeric(format(end_date, "%Y"))
|
|
previous_week <- current_week - 1
|
|
if (previous_week < 1) previous_week <- 52
|
|
|
|
message(paste("Week:", current_week, "/ Year:", year))
|
|
|
|
# Find tile files - approach from Script 20
|
|
message("Finding tile files...")
|
|
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year)
|
|
|
|
# Detect grid size subdirectory
|
|
detected_grid_size <- NA
|
|
if (dir.exists(weekly_tile_max)) {
|
|
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
|
|
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
|
if (length(grid_patterns) > 0) {
|
|
detected_grid_size <- grid_patterns[1]
|
|
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
|
|
message(paste(" Using grid-size subdirectory:", detected_grid_size))
|
|
}
|
|
}
|
|
|
|
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
|
if (length(tile_files) == 0) {
|
|
stop(paste("No tile files found for week", current_week, year, "in", mosaic_dir))
|
|
}
|
|
message(paste(" Found", length(tile_files), "tiles"))
|
|
|
|
# Load field boundaries
|
|
tryCatch({
|
|
boundaries_result <- load_field_boundaries(data_dir)
|
|
|
|
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
|
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
|
} else {
|
|
field_boundaries_sf <- boundaries_result
|
|
}
|
|
|
|
if (nrow(field_boundaries_sf) == 0) {
|
|
stop("No fields loaded from boundaries")
|
|
}
|
|
|
|
message(paste(" Loaded", nrow(field_boundaries_sf), "fields"))
|
|
}, error = function(e) {
|
|
stop("ERROR loading field boundaries: ", e$message)
|
|
})
|
|
|
|
message("Loading historical field data for trend calculations...")
|
|
num_weeks_to_load <- if (TEST_MODE) TEST_MODE_NUM_WEEKS else max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
|
|
if (TEST_MODE) {
|
|
message(paste(" TEST MODE: Loading only", num_weeks_to_load, "weeks"))
|
|
}
|
|
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir, num_weeks = num_weeks_to_load)
|
|
|
|
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
|
|
|
|
# Validate planting_dates
|
|
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
|
|
message("WARNING: No planting dates available. Using NA for all fields.")
|
|
planting_dates <- data.frame(
|
|
field_id = field_boundaries_sf$field,
|
|
date = rep(as.Date(NA), nrow(field_boundaries_sf)),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
}
|
|
|
|
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
|
|
message("\nProcessing tiles and extracting field statistics...")
|
|
all_tile_results <- list()
|
|
|
|
for (i in seq_along(tile_files)) {
|
|
tile_file <- tile_files[i]
|
|
message(paste(" Processing tile", i, "of", length(tile_files), ":", basename(tile_file)))
|
|
|
|
tryCatch({
|
|
# Load current tile and previous week tile
|
|
current_rast <- terra::rast(tile_file)
|
|
|
|
# DEBUG: Check tile structure on first tile
|
|
if (i == 1) {
|
|
message(paste(" [DEBUG] Tile CRS:", terra::crs(current_rast)))
|
|
message(paste(" [DEBUG] Tile extent:", paste(terra::ext(current_rast))))
|
|
message(paste(" [DEBUG] Field boundaries CRS:", sf::st_crs(field_boundaries_sf)))
|
|
field_bbox <- sf::st_bbox(field_boundaries_sf)
|
|
message(paste(" [DEBUG] Field bbox:", paste(round(field_bbox, 2))))
|
|
message(paste(" [DEBUG] Band names:", paste(names(current_rast), collapse=", ")))
|
|
}
|
|
|
|
# Extract CI band by name
|
|
ci_band <- current_rast[["CI"]]
|
|
|
|
# Check if CI band exists - use proper logical checks
|
|
if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
|
|
message(paste(" ERROR: CI band not found. Available bands:", paste(names(current_rast), collapse=", ")))
|
|
next
|
|
}
|
|
|
|
# Check if CI band has any valid data
|
|
if (tryCatch(all(is.na(values(ci_band))), error = function(e) TRUE)) {
|
|
message(paste(" ERROR: CI band has no valid data"))
|
|
next
|
|
}
|
|
|
|
# Load previous week tile if available
|
|
previous_tile_file <- sub(sprintf("week_%02d", current_week),
|
|
sprintf("week_%02d", previous_week),
|
|
tile_file)
|
|
previous_ci <- NULL
|
|
if (file.exists(previous_tile_file)) {
|
|
previous_rast <- terra::rast(previous_tile_file)
|
|
previous_ci <- previous_rast[["CI"]]
|
|
}
|
|
|
|
# OPTION 1 + 2: Extract all CI statistics from one pixel extraction (single call)
|
|
current_stats <- extract_field_statistics_from_ci(ci_band, field_boundaries_sf)
|
|
|
|
# DEBUG: Check extraction result on first tile
|
|
if (i == 1) {
|
|
num_with_data <- sum(!is.na(current_stats$mean_ci))
|
|
message(paste(" [DEBUG] Extracted", nrow(current_stats), "fields, ", num_with_data, "with non-NA data"))
|
|
if (num_with_data > 0) {
|
|
message(paste(" [DEBUG] Sample mean CIs:", paste(head(current_stats$mean_ci[!is.na(current_stats$mean_ci)], 3), collapse=", ")))
|
|
}
|
|
}
|
|
|
|
# Extract previous week CI statistics if available
|
|
previous_stats <- NULL
|
|
if (!is.null(previous_ci)) {
|
|
previous_stats <- extract_field_statistics_from_ci(previous_ci, field_boundaries_sf)
|
|
}
|
|
|
|
# Process each field that was extracted
|
|
field_results_this_tile <- list()
|
|
fields_added <- 0
|
|
|
|
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
|
|
tryCatch({
|
|
field_id <- field_boundaries_sf$field[field_idx]
|
|
field_sf <- field_boundaries_sf[field_idx, ]
|
|
|
|
# Get statistics from helper function results
|
|
# current_stats should have same number of rows as field_boundaries_sf
|
|
if (field_idx > nrow(current_stats)) {
|
|
message(paste(" [ERROR] field_idx", field_idx, "> nrow(current_stats)", nrow(current_stats)))
|
|
next
|
|
}
|
|
|
|
mean_ci_current <- current_stats$mean_ci[field_idx]
|
|
pixel_count <- current_stats$pixel_count[field_idx]
|
|
|
|
# SKIP fields with no data in this tile (they don't intersect this tile)
|
|
if (is.na(pixel_count) || pixel_count == 0) {
|
|
next
|
|
}
|
|
ci_cv_current <- current_stats$cv[field_idx]
|
|
ci_percentile_low <- current_stats$p10[field_idx]
|
|
ci_percentile_high <- current_stats$p90[field_idx]
|
|
|
|
# If field doesn't intersect this tile, mean_ci_current will be NA
|
|
if (is.na(mean_ci_current)) {
|
|
next # Skip this field - doesn't intersect this tile
|
|
}
|
|
|
|
field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
|
|
field_area_acres <- field_area_ha / 0.404686
|
|
|
|
# Extract previous week CI if available
|
|
mean_ci_previous <- NA
|
|
ci_change <- NA
|
|
if (!is.null(previous_stats)) {
|
|
mean_ci_previous <- previous_stats$mean_ci[field_idx]
|
|
if (!is.na(mean_ci_previous)) {
|
|
ci_change <- mean_ci_current - mean_ci_previous
|
|
}
|
|
}
|
|
|
|
# Reconstruct pixel values for status trigger (we need the actual pixel array)
|
|
# Use the percentiles and mean to create a synthetic distribution for status_trigger
|
|
# For now, use mean CI repeated by pixel count for testing
|
|
# TODO: Consider extracting pixels directly if needed for more complex triggers
|
|
pixel_count <- current_stats$pixel_count[field_idx]
|
|
ci_vals_current <- if (pixel_count > 0) {
|
|
rep(mean_ci_current, pixel_count) # Simplified: use mean value repeated
|
|
} else {
|
|
numeric(0)
|
|
}
|
|
|
|
# Calculate age
|
|
age_weeks <- if (!is.null(planting_dates) && nrow(planting_dates) > 0 && field_idx <= nrow(planting_dates)) {
|
|
planting_date <- planting_dates$date[field_idx]
|
|
if (!is.na(planting_date)) {
|
|
as.numeric(difftime(end_date, planting_date, units = "weeks"))
|
|
} else {
|
|
0
|
|
}
|
|
} else {
|
|
0
|
|
}
|
|
|
|
# Get phase and status
|
|
phase <- get_phase_by_age(age_weeks)
|
|
status_trigger <- get_status_trigger(ci_vals_current, ci_change, age_weeks)
|
|
|
|
# Cloud coverage categorization based on CI value
|
|
# No data = No image available
|
|
# CI 0.01 to 95 = Partial coverage
|
|
# CI >= 95 = Clear view
|
|
if (is.na(mean_ci_current) || mean_ci_current == 0) {
|
|
cloud_category <- "No image available"
|
|
# Set all CI metrics to NA since no valid data
|
|
ci_change <- NA
|
|
ci_cv_current <- NA
|
|
ci_percentile_low <- NA
|
|
ci_percentile_high <- NA
|
|
} else if (mean_ci_current >= 95) {
|
|
cloud_category <- "Clear view"
|
|
} else {
|
|
cloud_category <- "Partial coverage"
|
|
}
|
|
|
|
# Build result row
|
|
result_row <- data.frame(
|
|
Field_id = field_id,
|
|
Acreage = field_area_acres,
|
|
Mean_CI = mean_ci_current,
|
|
Mean_CI_prev = mean_ci_previous,
|
|
CI_change = ci_change,
|
|
CI_CV = ci_cv_current,
|
|
CI_percentile_low = ci_percentile_low,
|
|
CI_percentile_high = ci_percentile_high,
|
|
Age_weeks = age_weeks,
|
|
Phase = phase,
|
|
Status_trigger = status_trigger,
|
|
Cloud_category = cloud_category,
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
field_results_this_tile[[as.character(field_id)]] <- result_row
|
|
fields_added <- fields_added + 1
|
|
|
|
}, error = function(e) {
|
|
# Show error for debugging
|
|
message(paste(" [FIELD ERROR] Field", field_idx, ":", e$message))
|
|
})
|
|
}
|
|
|
|
if (length(field_results_this_tile) > 0) {
|
|
all_tile_results[[basename(tile_file)]] <- dplyr::bind_rows(field_results_this_tile)
|
|
message(paste(" Extracted", length(field_results_this_tile), "fields from tile (processed", fields_added, "fields total)"))
|
|
} else {
|
|
message(paste(" WARNING: No fields extracted from this tile (processed", fields_added, "fields, all either NA or errored)"))
|
|
}
|
|
|
|
}, error = function(e) {
|
|
message(paste(" Error processing tile", basename(tile_file), ":", e$message))
|
|
})
|
|
}
|
|
|
|
# Combine all tile results, keeping unique fields (may appear in multiple tiles)
|
|
if (length(all_tile_results) == 0) {
|
|
stop("No fields extracted from any tiles!")
|
|
}
|
|
|
|
field_analysis_df <- dplyr::bind_rows(all_tile_results) %>%
|
|
distinct(Field_id, .keep_all = TRUE)
|
|
|
|
if (nrow(field_analysis_df) == 0) {
|
|
stop("No fields analyzed successfully!")
|
|
}
|
|
|
|
message(paste("✓ Analyzed", nrow(field_analysis_df), "fields"))
|
|
|
|
summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
|
|
|
|
export_paths <- export_field_analysis_excel(
|
|
field_analysis_df,
|
|
summary_statistics_df,
|
|
project_dir,
|
|
current_week,
|
|
reports_dir
|
|
)
|
|
|
|
cat("\n--- Per-field Results (first 10) ---\n")
|
|
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_trigger", "Cloud_category")
|
|
available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
|
|
if (length(available_cols) > 0) {
|
|
print(head(field_analysis_df[, available_cols], 10))
|
|
}
|
|
|
|
cat("\n--- Summary Statistics ---\n")
|
|
print(summary_statistics_df)
|
|
|
|
# ========== FARM-LEVEL KPI AGGREGATION ==========
|
|
# Aggregate the per-field analysis into farm-level summary statistics
|
|
|
|
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
|
|
|
|
# Filter to only fields that have actual data (non-NA CI and valid acreage)
|
|
field_data <- field_analysis_df %>%
|
|
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
|
|
filter(Acreage > 0)
|
|
|
|
if (nrow(field_data) > 0) {
|
|
|
|
if (nrow(field_data) > 0) {
|
|
# Create summary statistics
|
|
farm_summary <- list()
|
|
|
|
# 1. PHASE DISTRIBUTION
|
|
phase_dist <- field_data %>%
|
|
group_by(Phase) %>%
|
|
summarise(
|
|
num_fields = n(),
|
|
acreage = sum(Acreage, na.rm = TRUE),
|
|
.groups = 'drop'
|
|
) %>%
|
|
rename(Category = Phase)
|
|
|
|
farm_summary$phase_distribution <- phase_dist
|
|
|
|
# 2. STATUS TRIGGER DISTRIBUTION
|
|
status_dist <- field_data %>%
|
|
group_by(Status_trigger) %>%
|
|
summarise(
|
|
num_fields = n(),
|
|
acreage = sum(Acreage, na.rm = TRUE),
|
|
.groups = 'drop'
|
|
) %>%
|
|
rename(Category = Status_trigger)
|
|
|
|
farm_summary$status_distribution <- status_dist
|
|
|
|
# 3. CLOUD COVERAGE DISTRIBUTION
|
|
cloud_dist <- field_data %>%
|
|
group_by(Cloud_category) %>%
|
|
summarise(
|
|
num_fields = n(),
|
|
acreage = sum(Acreage, na.rm = TRUE),
|
|
.groups = 'drop'
|
|
) %>%
|
|
rename(Category = Cloud_category)
|
|
|
|
farm_summary$cloud_distribution <- cloud_dist
|
|
|
|
# 4. OVERALL STATISTICS
|
|
farm_summary$overall_stats <- data.frame(
|
|
total_fields = nrow(field_data),
|
|
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
|
|
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
|
|
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
|
|
mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4),
|
|
week = current_week,
|
|
year = year,
|
|
date = as.character(end_date)
|
|
)
|
|
|
|
# Print summaries
|
|
cat("\n--- PHASE DISTRIBUTION ---\n")
|
|
print(phase_dist)
|
|
|
|
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
|
|
print(status_dist)
|
|
|
|
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
|
|
print(cloud_dist)
|
|
|
|
cat("\n--- OVERALL FARM STATISTICS ---\n")
|
|
print(farm_summary$overall_stats)
|
|
|
|
farm_kpi_results <- farm_summary
|
|
} else {
|
|
farm_kpi_results <- NULL
|
|
}
|
|
} else {
|
|
farm_kpi_results <- NULL
|
|
}
|
|
|
|
# ========== FINAL SUMMARY ==========
|
|
|
|
cat("\n" %+% strrep("=", 70) %+% "\n")
|
|
cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
|
|
cat(strrep("=", 70) %+% "\n")
|
|
cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
|
|
cat("Excel export:", export_paths$excel, "\n")
|
|
cat("RDS export:", export_paths$rds, "\n")
|
|
cat("CSV export:", export_paths$csv, "\n")
|
|
|
|
if (!is.null(farm_kpi_results)) {
|
|
cat("\nFarm-level KPIs: CALCULATED\n")
|
|
} else {
|
|
cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
|
|
}
|
|
|
|
cat("\n✓ Consolidated KPI calculation complete!\n")
|
|
cat(" - Per-field data exported\n")
|
|
cat(" - Farm-level KPIs calculated\n")
|
|
cat(" - All outputs in:", reports_dir, "\n\n")
|
|
}
|
|
|
|
if (sys.nframe() == 0) {
|
|
main()
|
|
}
|