708 lines
26 KiB
R
708 lines
26 KiB
R
# 80_CALCULATE_KPIS.R (CONSOLIDATED KPI CALCULATION)
|
|
# ============================================================================
|
|
# UNIFIED KPI CALCULATION SCRIPT
|
|
#
|
|
# This script combines:
|
|
# 1. Per-field weekly analysis (from 09c: field-level trends, phases, statuses)
|
|
# 2. Farm-level KPI metrics (from old 09: 6 high-level indicators)
|
|
#
|
|
# FEATURES:
|
|
# - Per-field analysis with SC-64 enhancements (4-week trends, CI percentiles, etc.)
|
|
# - Farm-level KPI calculation (6 metrics for executive overview)
|
|
# - Parallel processing (tile-aware, 1000+ fields supported)
|
|
# - Comprehensive Excel + RDS + CSV exports (21 columns per spec)
|
|
# - Test mode for development
|
|
|
|
# CRITICAL INTEGRATIONS:
|
|
#
|
|
# 1. IMMINENT_PROB FROM HARVEST MODEL (MODEL_307)
|
|
# [✓] Load script 31 output: {project}_week_{WW}_{YYYY}.csv
|
|
# Columns: field, imminent_prob, detected_prob, week, year
|
|
# [✓] LEFT JOIN to field_analysis_df by field
|
|
# [✓] Use actual harvest probability data instead of placeholder
|
|
#
|
|
# 2. AGE FROM HARVEST.XLSX (SCRIPTS 22 & 23)
|
|
# [✓] Load harvest.xlsx with planting_date (season_start)
|
|
# [✓] Extract planting dates per field
|
|
# [✓] Calculate Age_week = difftime(report_date, planting_date, units="weeks")
|
|
#
|
|
# COMMAND-LINE USAGE:
|
|
# Option 1: Rscript 80_calculate_kpis.R 2026-01-14 angata
|
|
# Arguments: [end_date] [project_dir]
|
|
#
|
|
# Option 2: Rscript 80_calculate_kpis.R 2026-01-14 angata 7
|
|
# Arguments: [end_date] [project_dir] [offset_days]
|
|
#
|
|
# & "C:\Program Files\R\R-4.4.3\bin\x64\Rscript.exe" r_app/80_calculate_kpis.R 2026-01-12 angata 7
|
|
#
|
|
# Usage in run_full_pipeline.R:
|
|
# source("r_app/80_calculate_kpis.R")
|
|
# main()
|
|
|
|
# ============================================================================
|
|
# NEXT INTEGRATIONS (See Linear issues for detailed requirements)
|
|
# ============================================================================
|
|
# 1. [✓] Load imminent_prob from script 31 (week_WW_YYYY.csv)
|
|
# 2. [✓] Load planting_date from harvest.xlsx for field-specific age calculation
|
|
# 3. [ ] Improve Status_trigger logic to use actual imminent_prob values
|
|
# ============================================================================
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5
|
|
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1
|
|
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5
|
|
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1
|
|
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1
|
|
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5
|
|
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5
|
|
|
|
# CV TREND THRESHOLDS
|
|
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05
|
|
|
|
# CV_TREND_LONG_TERM (8-WEEK SLOPE) INTERPRETATION THRESHOLDS
|
|
# Interpretation: Slope of CV over 8 weeks indicates field uniformity trend
|
|
# Negative slope = CV decreasing = field becoming MORE uniform = GOOD
|
|
# Positive slope = CV increasing = field becoming MORE patchy = BAD
|
|
# Near zero = Homogenous growth (all crops progressing equally)
|
|
CV_SLOPE_STRONG_IMPROVEMENT_MIN <- -0.05 # CV decreasing rapidly
|
|
CV_SLOPE_IMPROVEMENT_MIN <- -0.02 # Gradual synchronization
|
|
CV_SLOPE_IMPROVEMENT_MAX <- -0.005 # Becoming uniform
|
|
CV_SLOPE_HOMOGENOUS_MIN <- -0.005 # Stable, uniform growth
|
|
CV_SLOPE_HOMOGENOUS_MAX <- 0.005 # No change in uniformity
|
|
CV_SLOPE_PATCHINESS_MIN <- 0.005 # Minor divergence
|
|
CV_SLOPE_PATCHINESS_MAX <- 0.02 # Growing patchiness
|
|
CV_SLOPE_SEVERE_MIN <- 0.02 # Field fragmentation beginning
|
|
|
|
# CLOUD COVER ROUNDING INTERVALS
|
|
CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100)
|
|
|
|
# PERCENTILE CALCULATIONS
|
|
CI_PERCENTILE_LOW <- 0.10
|
|
CI_PERCENTILE_HIGH <- 0.90
|
|
|
|
# GERMINATION THRESHOLD (for germination_progress calculation)
|
|
GERMINATION_CI_THRESHOLD <- 2.0
|
|
|
|
# PLANTING DATE & AGE CONFIGURATION
|
|
# Load from harvest.xlsx (scripts 22 & 23) - no fallback to uniform dates
|
|
|
|
# HISTORICAL DATA LOOKBACK
|
|
WEEKS_FOR_FOUR_WEEK_TREND <- 4
|
|
WEEKS_FOR_CV_TREND_SHORT <- 2
|
|
WEEKS_FOR_CV_TREND_LONG <- 8
|
|
|
|
# ============================================================================
|
|
# 1. Load required libraries
|
|
# ============================================================================
|
|
|
|
suppressPackageStartupMessages({
|
|
library(here)
|
|
library(sf)
|
|
library(terra)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(readr)
|
|
library(readxl)
|
|
library(writexl)
|
|
library(purrr)
|
|
library(furrr)
|
|
library(future)
|
|
library(caret)
|
|
library(CAST)
|
|
library(randomForest)
|
|
tryCatch({
|
|
library(torch)
|
|
}, error = function(e) {
|
|
message("Note: torch package not available - harvest model inference will be skipped")
|
|
})
|
|
})
|
|
|
|
# ============================================================================
|
|
# LOAD UTILITY FUNCTIONS FROM SEPARATED MODULES
|
|
# ============================================================================
|
|
|
|
tryCatch({
|
|
source(here("r_app", "80_weekly_stats_utils.R"))
|
|
}, error = function(e) {
|
|
stop("Error loading 80_weekly_stats_utils.R: ", e$message)
|
|
})
|
|
|
|
tryCatch({
|
|
source(here("r_app", "80_report_building_utils.R"))
|
|
}, error = function(e) {
|
|
stop("Error loading 80_report_building_utils.R: ", e$message)
|
|
})
|
|
|
|
# ============================================================================
|
|
# PHASE AND STATUS TRIGGER DEFINITIONS
|
|
# ============================================================================
|
|
|
|
PHASE_DEFINITIONS <- data.frame(
|
|
phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
|
|
age_start = c(0, 4, 17, 39),
|
|
age_end = c(6, 16, 39, 200),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
STATUS_TRIGGERS <- data.frame(
|
|
trigger = c(
|
|
"germination_started",
|
|
"germination_complete",
|
|
"stress_detected_whole_field",
|
|
"strong_recovery",
|
|
"growth_on_track",
|
|
"maturation_progressing",
|
|
"harvest_ready"
|
|
),
|
|
age_min = c(0, 0, NA, NA, 4, 39, 45),
|
|
age_max = c(6, 6, NA, NA, 39, 200, 200),
|
|
description = c(
|
|
"10% of field CI > 2",
|
|
"70% of field CI >= 2",
|
|
"CI decline > -1.5 + low CV",
|
|
"CI increase > +1.5",
|
|
"CI increasing consistently",
|
|
"High CI, stable/declining",
|
|
"Age 45+ weeks (ready to harvest)"
|
|
),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
# ============================================================================
|
|
# MAIN
|
|
# ============================================================================
|
|
|
|
# ============================================================================
|
|
# MAIN
|
|
# ============================================================================
|
|
|
|
main <- function() {
|
|
# Parse command-line arguments
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
|
|
# end_date (arg 1)
|
|
# Priority: 1) Command-line arg, 2) Global end_date variable (for recursive calls), 3) Global end_date_str, 4) Sys.Date()
|
|
end_date <- if (length(args) >= 1 && !is.na(args[1])) {
|
|
as.Date(args[1])
|
|
} else if (exists("end_date", envir = .GlobalEnv)) {
|
|
# For recursive calls, use the end_date that was set in the global environment
|
|
get("end_date", envir = .GlobalEnv)
|
|
} else if (exists("end_date_str", envir = .GlobalEnv)) {
|
|
as.Date(get("end_date_str", envir = .GlobalEnv))
|
|
} else {
|
|
Sys.Date()
|
|
}
|
|
|
|
# project_dir (arg 2)
|
|
project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
|
|
as.character(args[2])
|
|
} else if (exists("project_dir", envir = .GlobalEnv)) {
|
|
get("project_dir", envir = .GlobalEnv)
|
|
} else {
|
|
"angata"
|
|
}
|
|
|
|
# offset (arg 3) - for backward compatibility with old 09
|
|
offset <- if (length(args) >= 3 && !is.na(args[3])) {
|
|
as.numeric(args[3])
|
|
} else {
|
|
7
|
|
}
|
|
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
assign("end_date_str", format(end_date, "%Y-%m-%d"), envir = .GlobalEnv)
|
|
|
|
message("\n" %+% strrep("=", 70))
|
|
message("80_CALCULATE_KPIs.R - CONSOLIDATED KPI CALCULATION")
|
|
message(strrep("=", 70))
|
|
message("Date:", format(end_date, "%Y-%m-%d"))
|
|
message("Project:", project_dir)
|
|
message("Mode: Per-field analysis (SC-64) + Farm-level KPIs")
|
|
message("")
|
|
|
|
# Load configuration and utilities
|
|
# source(here("r_app", "crop_messaging_utils.R"))
|
|
|
|
tryCatch({
|
|
source(here("r_app", "parameters_project.R"))
|
|
}, error = function(e) {
|
|
stop("Error loading parameters_project.R: ", e$message)
|
|
})
|
|
|
|
tryCatch({
|
|
source(here("r_app", "30_growth_model_utils.R"))
|
|
}, error = function(e) {
|
|
warning("30_growth_model_utils.R not found - yield prediction KPI will use placeholder data")
|
|
})
|
|
|
|
# ========== PER-FIELD ANALYSIS (SC-64) ==========
|
|
|
|
message("\n" %+% strrep("-", 70))
|
|
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS (SC-64 ENHANCEMENTS)")
|
|
message(strrep("-", 70))
|
|
|
|
current_week <- as.numeric(format(end_date, "%V"))
|
|
year <- as.numeric(format(end_date, "%Y"))
|
|
previous_week <- current_week - 1
|
|
if (previous_week < 1) previous_week <- 52
|
|
|
|
message(paste("Week:", current_week, "/ Year:", year))
|
|
|
|
# Find tile files - approach from Script 20
|
|
message("Finding tile files...")
|
|
tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", current_week, year)
|
|
|
|
# Detect grid size subdirectory
|
|
detected_grid_size <- NA
|
|
if (dir.exists(weekly_tile_max)) {
|
|
subfolders <- list.dirs(weekly_tile_max, full.names = FALSE, recursive = FALSE)
|
|
grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)
|
|
if (length(grid_patterns) > 0) {
|
|
detected_grid_size <- grid_patterns[1]
|
|
mosaic_dir <- file.path(weekly_tile_max, detected_grid_size)
|
|
message(paste(" Using grid-size subdirectory:", detected_grid_size))
|
|
}
|
|
}
|
|
|
|
tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)
|
|
if (length(tile_files) == 0) {
|
|
stop(paste("No tile files found for week", current_week, year, "in", mosaic_dir))
|
|
}
|
|
message(paste(" Found", length(tile_files), "tiles"))
|
|
|
|
# Load field boundaries
|
|
tryCatch({
|
|
boundaries_result <- load_field_boundaries(data_dir)
|
|
|
|
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
|
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
|
} else {
|
|
field_boundaries_sf <- boundaries_result
|
|
}
|
|
|
|
if (nrow(field_boundaries_sf) == 0) {
|
|
stop("No fields loaded from boundaries")
|
|
}
|
|
|
|
message(paste(" Loaded", nrow(field_boundaries_sf), "fields"))
|
|
}, error = function(e) {
|
|
stop("ERROR loading field boundaries: ", e$message)
|
|
})
|
|
|
|
message("Loading historical field data for trend calculations...")
|
|
# Load up to 8 weeks (max of 4-week and 8-week trend requirements)
|
|
# Function gracefully handles missing weeks and loads whatever exists
|
|
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8
|
|
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
|
|
|
|
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
|
|
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
|
|
|
|
historical_data <- load_historical_field_data(project_dir, current_week, reports_dir,
|
|
num_weeks = num_weeks_to_load,
|
|
auto_generate = allow_auto_gen,
|
|
field_boundaries_sf = field_boundaries_sf)
|
|
|
|
# Load harvest.xlsx for planting dates (season_start)
|
|
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
|
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
|
|
|
|
harvesting_data <- tryCatch({
|
|
if (file.exists(harvest_file_path)) {
|
|
harvest_raw <- readxl::read_excel(harvest_file_path)
|
|
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
|
|
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
|
|
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
|
|
harvest_raw
|
|
} else {
|
|
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
|
|
NULL
|
|
}
|
|
}, error = function(e) {
|
|
message(paste(" ERROR loading harvest.xlsx:", e$message))
|
|
NULL
|
|
})
|
|
|
|
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
|
|
|
|
# Validate planting_dates
|
|
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
|
|
message("WARNING: No planting dates available. Using NA for all fields.")
|
|
planting_dates <- data.frame(
|
|
field_id = field_boundaries_sf$field,
|
|
date = rep(as.Date(NA), nrow(field_boundaries_sf)),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
}
|
|
|
|
# SCRIPT 20 APPROACH: Loop through tiles, extract all fields from each tile
|
|
# ============================================================================
|
|
# NEW MODULAR APPROACH: Load/Calculate weekly stats, apply trends
|
|
# ============================================================================
|
|
|
|
# Build tile grid (needed by calculate_field_statistics)
|
|
message("\nBuilding tile grid for current week...")
|
|
tile_grid <- build_tile_grid(mosaic_dir, current_week, year)
|
|
|
|
message("\nUsing modular RDS-based approach for weekly statistics...")
|
|
|
|
# Load/calculate CURRENT week stats (from tiles or RDS cache)
|
|
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
|
|
current_stats <- load_or_calculate_weekly_stats(
|
|
week_num = current_week,
|
|
year = year,
|
|
project_dir = project_dir,
|
|
field_boundaries_sf = field_boundaries_sf,
|
|
mosaic_dir = tile_grid$mosaic_dir,
|
|
reports_dir = reports_dir,
|
|
report_date = end_date
|
|
)
|
|
|
|
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
|
|
|
|
# Load/calculate PREVIOUS week stats (from RDS cache or tiles)
|
|
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
|
|
|
|
# Calculate report date for previous week (7 days before current)
|
|
prev_report_date <- end_date - 7
|
|
|
|
prev_stats <- load_or_calculate_weekly_stats(
|
|
week_num = previous_week,
|
|
year = year,
|
|
project_dir = project_dir,
|
|
field_boundaries_sf = field_boundaries_sf,
|
|
mosaic_dir = tile_grid$mosaic_dir,
|
|
reports_dir = reports_dir,
|
|
report_date = prev_report_date
|
|
)
|
|
|
|
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
|
|
message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
|
|
message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))
|
|
|
|
# Apply trend calculations (requires both weeks)
|
|
message("\n3. Calculating trend columns...")
|
|
current_stats <- calculate_kpi_trends(current_stats, prev_stats,
|
|
project_dir = project_dir,
|
|
reports_dir = reports_dir,
|
|
current_week = current_week,
|
|
year = year)
|
|
|
|
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_weeks_in_this_phase"))
|
|
|
|
# Load weekly harvest probabilities from script 31 (if available)
|
|
message("\n4. Loading harvest probabilities from script 31...")
|
|
harvest_prob_file <- file.path(reports_dir, "kpis", "field_stats",
|
|
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, year))
|
|
message(paste(" Looking for:", harvest_prob_file))
|
|
|
|
imminent_prob_data <- tryCatch({
|
|
if (file.exists(harvest_prob_file)) {
|
|
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE)
|
|
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
|
|
prob_df %>%
|
|
select(field, imminent_prob, detected_prob) %>%
|
|
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
|
|
} else {
|
|
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
|
|
NULL
|
|
}
|
|
}, error = function(e) {
|
|
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
|
|
NULL
|
|
})
|
|
|
|
# ============================================================================
|
|
# Build final output dataframe with all 21 columns
|
|
# ============================================================================
|
|
|
|
message("\nBuilding final field analysis output...")
|
|
|
|
# Pre-calculate acreages with geometry validation
|
|
# This avoids geometry errors during field_analysis construction
|
|
acreage_lookup <- tryCatch({
|
|
lookup_df <- field_boundaries_sf %>%
|
|
sf::st_drop_geometry() %>%
|
|
as.data.frame() %>%
|
|
mutate(
|
|
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
|
|
tryCatch({
|
|
sf::st_is_valid(field_boundaries_sf[idx, ])
|
|
}, error = function(e) FALSE)
|
|
}),
|
|
area_ha = 0
|
|
)
|
|
|
|
# Calculate area for valid geometries
|
|
for (idx in which(lookup_df$geometry_valid)) {
|
|
tryCatch({
|
|
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
|
|
lookup_df$area_ha[idx] <- area_m2 / 10000
|
|
}, error = function(e) {
|
|
lookup_df$area_ha[idx] <<- NA_real_
|
|
})
|
|
}
|
|
|
|
# Convert hectares to acres
|
|
lookup_df %>%
|
|
mutate(acreage = area_ha / 0.404686) %>%
|
|
select(field, acreage)
|
|
}, error = function(e) {
|
|
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
|
|
data.frame(field = character(0), acreage = numeric(0))
|
|
})
|
|
|
|
field_analysis_df <- current_stats %>%
|
|
mutate(
|
|
# Column 2: Farm_Section (user fills)
|
|
Farm_Section = NA_character_,
|
|
# Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
|
|
Field_name = Field_id,
|
|
# Column 4: Acreage (calculate from geometry)
|
|
Acreage = {
|
|
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
|
|
if_else(is.na(acreages_vec), 0, acreages_vec)
|
|
},
|
|
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
|
|
# Column 7: Four_week_trend (from current_stats)
|
|
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
|
|
Last_harvest_or_planting_date = {
|
|
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
|
|
},
|
|
# Column 9: Age_week (calculated from report date and planting date)
|
|
Age_week = {
|
|
sapply(seq_len(nrow(current_stats)), function(idx) {
|
|
planting_dt <- Last_harvest_or_planting_date[idx]
|
|
if (is.na(planting_dt)) {
|
|
return(NA_real_)
|
|
}
|
|
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 1)
|
|
})
|
|
},
|
|
# Column 10: Phase (recalculate based on updated Age_week)
|
|
Phase = {
|
|
sapply(Age_week, function(age) {
|
|
if (is.na(age)) return(NA_character_)
|
|
if (age >= 0 & age < 4) return("Germination")
|
|
if (age >= 4 & age < 17) return("Tillering")
|
|
if (age >= 17 & age < 39) return("Grand Growth")
|
|
if (age >= 39) return("Maturation")
|
|
NA_character_
|
|
})
|
|
},
|
|
# Column 11: nmr_weeks_in_this_phase (already in current_stats from calculate_kpi_trends)
|
|
# Column 12: Germination_progress (calculated here from CI values)
|
|
Germination_progress = {
|
|
sapply(seq_len(nrow(current_stats)), function(idx) {
|
|
age_w <- Age_week[idx]
|
|
mean_ci_val <- Mean_CI[idx]
|
|
|
|
# Only relevant for germination phase (0-4 weeks)
|
|
if (is.na(age_w) || age_w < 0 || age_w >= 4) {
|
|
return(NA_character_)
|
|
}
|
|
|
|
# Estimate % of field with CI >= germination threshold
|
|
# Based on mean CI, estimate germination percentage
|
|
if (mean_ci_val >= 0.4) {
|
|
return(">80%")
|
|
} else if (mean_ci_val >= 0.25) {
|
|
return("50-80%")
|
|
} else if (mean_ci_val >= 0.1) {
|
|
return("20-50%")
|
|
} else {
|
|
return("<20%")
|
|
}
|
|
})
|
|
},
|
|
# Column 13: Imminent_prob (from script 31 or NA if not available)
|
|
Imminent_prob = {
|
|
if (!is.null(imminent_prob_data)) {
|
|
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
|
|
} else {
|
|
rep(NA_real_, nrow(current_stats))
|
|
}
|
|
},
|
|
# Column 14: Status_trigger (based on harvest probability + growth status)
|
|
Status_trigger = {
|
|
triggers <- sapply(seq_len(nrow(current_stats)), function(idx) {
|
|
imminent_prob <- Imminent_prob[idx]
|
|
age_w <- Age_week[idx]
|
|
ci_change <- Weekly_ci_change[idx]
|
|
phase <- Phase[idx]
|
|
|
|
# Priority 1: Harvest imminent (high probability)
|
|
if (!is.na(imminent_prob) && imminent_prob > 0.5) {
|
|
return("harvest_imminent")
|
|
}
|
|
|
|
# Priority 2: Age-based triggers
|
|
if (!is.na(age_w)) {
|
|
if (age_w >= 45) return("harvest_ready")
|
|
if (age_w >= 39) return("maturation_progressing")
|
|
if (age_w >= 4 & age_w < 39) return("growth_on_track")
|
|
if (age_w < 4) return("germination_started")
|
|
}
|
|
|
|
# Fallback
|
|
NA_character_
|
|
})
|
|
triggers
|
|
},
|
|
# Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
|
|
# Column 17: Already in current_stats (CV)
|
|
# Column 18: Already in current_stats (CV_Trend_Short_Term)
|
|
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
|
|
# Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
|
|
CV_Trend_Long_Term_Category = {
|
|
sapply(current_stats$CV_Trend_Long_Term, categorize_cv_slope)
|
|
},
|
|
# Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
|
|
.keep = "all" # Keep all existing columns
|
|
) %>%
|
|
select(
|
|
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Mean_CI", "Weekly_ci_change",
|
|
"Four_week_trend", "Last_harvest_or_planting_date", "Age_week", "Phase",
|
|
"nmr_weeks_in_this_phase", "Germination_progress", "Imminent_prob", "Status_trigger",
|
|
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
|
|
"Cloud_pct_clear", "Cloud_category")),
|
|
any_of(c("CI_range", "CI_Percentiles"))
|
|
)
|
|
|
|
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 21 columns"))
|
|
|
|
summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)
|
|
|
|
export_paths <- export_field_analysis_excel(
|
|
field_analysis_df,
|
|
summary_statistics_df,
|
|
project_dir,
|
|
current_week,
|
|
year,
|
|
reports_dir
|
|
)
|
|
|
|
cat("\n--- Per-field Results (first 10) ---\n")
|
|
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_trigger", "Cloud_category")
|
|
available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
|
|
if (length(available_cols) > 0) {
|
|
print(head(field_analysis_df[, available_cols], 10))
|
|
}
|
|
|
|
cat("\n--- Summary Statistics ---\n")
|
|
print(summary_statistics_df)
|
|
|
|
# ========== FARM-LEVEL KPI AGGREGATION ==========
|
|
# Aggregate the per-field analysis into farm-level summary statistics
|
|
|
|
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
|
|
|
|
# Filter to only fields that have actual data (non-NA CI and valid acreage)
|
|
field_data <- field_analysis_df %>%
|
|
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
|
|
filter(Acreage > 0)
|
|
|
|
if (nrow(field_data) > 0) {
|
|
|
|
if (nrow(field_data) > 0) {
|
|
# Create summary statistics
|
|
farm_summary <- list()
|
|
|
|
# 1. PHASE DISTRIBUTION
|
|
phase_dist <- field_data %>%
|
|
group_by(Phase) %>%
|
|
summarise(
|
|
num_fields = n(),
|
|
acreage = sum(Acreage, na.rm = TRUE),
|
|
.groups = 'drop'
|
|
) %>%
|
|
rename(Category = Phase)
|
|
|
|
farm_summary$phase_distribution <- phase_dist
|
|
|
|
# 2. STATUS TRIGGER DISTRIBUTION
|
|
status_dist <- field_data %>%
|
|
group_by(Status_trigger) %>%
|
|
summarise(
|
|
num_fields = n(),
|
|
acreage = sum(Acreage, na.rm = TRUE),
|
|
.groups = 'drop'
|
|
) %>%
|
|
rename(Category = Status_trigger)
|
|
|
|
farm_summary$status_distribution <- status_dist
|
|
|
|
# 3. CLOUD COVERAGE DISTRIBUTION
|
|
cloud_dist <- field_data %>%
|
|
group_by(Cloud_category) %>%
|
|
summarise(
|
|
num_fields = n(),
|
|
acreage = sum(Acreage, na.rm = TRUE),
|
|
.groups = 'drop'
|
|
) %>%
|
|
rename(Category = Cloud_category)
|
|
|
|
farm_summary$cloud_distribution <- cloud_dist
|
|
|
|
# 4. OVERALL STATISTICS
|
|
farm_summary$overall_stats <- data.frame(
|
|
total_fields = nrow(field_data),
|
|
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
|
|
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
|
|
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
|
|
mean_cv = round(mean(field_data$CI_CV, na.rm = TRUE), 4),
|
|
week = current_week,
|
|
year = year,
|
|
date = as.character(end_date)
|
|
)
|
|
|
|
# Print summaries
|
|
cat("\n--- PHASE DISTRIBUTION ---\n")
|
|
print(phase_dist)
|
|
|
|
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
|
|
print(status_dist)
|
|
|
|
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
|
|
print(cloud_dist)
|
|
|
|
cat("\n--- OVERALL FARM STATISTICS ---\n")
|
|
print(farm_summary$overall_stats)
|
|
|
|
farm_kpi_results <- farm_summary
|
|
} else {
|
|
farm_kpi_results <- NULL
|
|
}
|
|
} else {
|
|
farm_kpi_results <- NULL
|
|
}
|
|
|
|
# ========== FINAL SUMMARY ==========
|
|
|
|
cat("\n" %+% strrep("=", 70) %+% "\n")
|
|
cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
|
|
cat(strrep("=", 70) %+% "\n")
|
|
cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
|
|
cat("Excel export:", export_paths$excel, "\n")
|
|
cat("RDS export:", export_paths$rds, "\n")
|
|
cat("CSV export:", export_paths$csv, "\n")
|
|
|
|
if (!is.null(farm_kpi_results)) {
|
|
cat("\nFarm-level KPIs: CALCULATED\n")
|
|
} else {
|
|
cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
|
|
}
|
|
|
|
cat("\n✓ Consolidated KPI calculation complete!\n")
|
|
cat(" - Per-field data exported\n")
|
|
cat(" - Farm-level KPIs calculated\n")
|
|
cat(" - All outputs in:", reports_dir, "\n\n")
|
|
}
|
|
|
|
if (sys.nframe() == 0) {
|
|
main()
|
|
}
|