Merge remote-tracking branch 'origin/code-improvements' into translation
This commit is contained in:
commit
24fd5bb8b3
20
.gitignore
vendored
20
.gitignore
vendored
|
|
@ -9,6 +9,14 @@
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pyo
|
*.pyo
|
||||||
|
*.py[cod]
|
||||||
|
*.py[cod]
|
||||||
|
myenv/myenv/
|
||||||
|
pip-wheel-metadata/
|
||||||
|
dist/
|
||||||
|
*.egg-info/
|
||||||
|
*.egg/*.egg/
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
# R Output Files
|
# R Output Files
|
||||||
*.Rout
|
*.Rout
|
||||||
|
|
@ -16,6 +24,7 @@ __pycache__/
|
||||||
*.RData
|
*.RData
|
||||||
*.Rdata
|
*.Rdata
|
||||||
.Rproj.user
|
.Rproj.user
|
||||||
|
.Rprofile
|
||||||
Rplots.pdf
|
Rplots.pdf
|
||||||
*.pdf
|
*.pdf
|
||||||
|
|
||||||
|
|
@ -48,15 +57,24 @@ reports/
|
||||||
# Experiment Outputs (temporary plots, analysis artifacts)
|
# Experiment Outputs (temporary plots, analysis artifacts)
|
||||||
python_app/harvest_detection_experiments/*/plots/
|
python_app/harvest_detection_experiments/*/plots/
|
||||||
python_app/harvest_detection_experiments/*/*.ipynb_checkpoints/
|
python_app/harvest_detection_experiments/*/*.ipynb_checkpoints/
|
||||||
|
CI_report_dashboard_planet_files/
|
||||||
|
|
||||||
# Cache Files
|
# Cache Files
|
||||||
rosm.cache/
|
rosm.cache/
|
||||||
*.cache
|
renv/cache/
|
||||||
|
|
||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
package_manager.log
|
package_manager.log
|
||||||
|
|
||||||
|
# Temporary Files
|
||||||
|
*.tmp
|
||||||
|
*.save
|
||||||
|
*.bak
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
package_manager.log
|
||||||
|
|
||||||
# Laravel Storage (contains user data and outputs)
|
# Laravel Storage (contains user data and outputs)
|
||||||
laravel_app/storage/app/*/Data/
|
laravel_app/storage/app/*/Data/
|
||||||
laravel_app/storage/app/*/reports/
|
laravel_app/storage/app/*/reports/
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,12 @@ laravel_app/
|
||||||
data_validation_tool/
|
data_validation_tool/
|
||||||
python_app/harvest_detection_experiments/
|
python_app/harvest_detection_experiments/
|
||||||
python_app/experiments/
|
python_app/experiments/
|
||||||
|
r_app/old_scripts/
|
||||||
|
r_app/experiments/
|
||||||
phase2_refinement/
|
phase2_refinement/
|
||||||
webapps/
|
webapps/
|
||||||
tools/
|
tools/
|
||||||
|
old_sh/
|
||||||
output/
|
output/
|
||||||
renv/
|
renv/
|
||||||
*.py
|
*.py
|
||||||
|
|
|
||||||
43
python_app/.gitignore
vendored
43
python_app/.gitignore
vendored
|
|
@ -1,43 +0,0 @@
|
||||||
# .gitignore
|
|
||||||
|
|
||||||
# Ignore the virtual environment directory
|
|
||||||
myenv/
|
|
||||||
|
|
||||||
# Byte-compiled / optimized files
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
|
|
||||||
# Ignore all .pyc files in any directory
|
|
||||||
**/*.pyc
|
|
||||||
|
|
||||||
# Ignore pip wheel metadata
|
|
||||||
pip-wheel-metadata/
|
|
||||||
|
|
||||||
# Ignore the `dist/` directory used for package distributions
|
|
||||||
dist/
|
|
||||||
|
|
||||||
# Ignore Python egg metadata, regenerated from source files by setuptools
|
|
||||||
*.egg-info/
|
|
||||||
*.egg-info/*
|
|
||||||
*.egg/
|
|
||||||
|
|
||||||
# Ignore Jupyter Notebook
|
|
||||||
.ipynb_checkpoints
|
|
||||||
|
|
||||||
# Ignore VSCode directory
|
|
||||||
.vscode/
|
|
||||||
|
|
||||||
# Ignore JetBrains IDEs directory
|
|
||||||
.idea/
|
|
||||||
|
|
||||||
# Ignore MacOS directory
|
|
||||||
.DS_Store
|
|
||||||
|
|
||||||
# Ignore other unwanted file
|
|
||||||
*.log
|
|
||||||
*.bak
|
|
||||||
*.swp
|
|
||||||
*.swo
|
|
||||||
*.swp
|
|
||||||
|
|
||||||
|
|
@ -111,7 +111,7 @@ def main():
|
||||||
|
|
||||||
# [3/4] Run model predictions with two-step detection
|
# [3/4] Run model predictions with two-step detection
|
||||||
print("\n[3/4] Running two-step harvest detection...")
|
print("\n[3/4] Running two-step harvest detection...")
|
||||||
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DOY reset)")
|
print(" (Using threshold=0.3, consecutive_days=2 - tuned baseline with DAH reset)")
|
||||||
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
|
refined_results = run_two_step_refinement(ci_data, model, config, scalers, device=device,
|
||||||
phase1_threshold=0.3, phase1_consecutive=2)
|
phase1_threshold=0.3, phase1_consecutive=2)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ def create_model(model_type: str, input_size: int, hidden_size: int = 128,
|
||||||
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
|
# FEATURE ENGINEERING (from src/feature_engineering.py, simplified for inline)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> pd.DataFrame:
|
def compute_ci_features(ci_series: pd.Series, dah_series: pd.Series = None) -> pd.DataFrame:
|
||||||
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
|
"""Compute all CI-based features (state, velocity, acceleration, min/max/range/std/CV)."""
|
||||||
features = pd.DataFrame(index=ci_series.index)
|
features = pd.DataFrame(index=ci_series.index)
|
||||||
|
|
||||||
|
|
@ -177,9 +177,9 @@ def compute_ci_features(ci_series: pd.Series, doy_series: pd.Series = None) -> p
|
||||||
ma = ci_series.rolling(window=window, min_periods=1).mean()
|
ma = ci_series.rolling(window=window, min_periods=1).mean()
|
||||||
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
|
features[f'{window}d_CV'] = features[f'{window}d_std'] / (ma + 1e-6)
|
||||||
|
|
||||||
# DOY normalized
|
# DAH normalized (Days After Harvest)
|
||||||
if doy_series is not None:
|
if dah_series is not None:
|
||||||
features['DOY_normalized'] = doy_series / 450.0
|
features['DAH_normalized'] = dah_series / 450.0
|
||||||
|
|
||||||
return features.fillna(0)
|
return features.fillna(0)
|
||||||
|
|
||||||
|
|
@ -193,8 +193,8 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
|
||||||
data_df: DataFrame with Date and CI data (may be a window after a harvest)
|
data_df: DataFrame with Date and CI data (may be a window after a harvest)
|
||||||
feature_names: List of feature names to extract
|
feature_names: List of feature names to extract
|
||||||
ci_column: Name of CI column
|
ci_column: Name of CI column
|
||||||
season_anchor_day: Day in FULL sequence where this season started (for DOY reset)
|
season_anchor_day: Day in FULL sequence where this season started (for DAH reset)
|
||||||
DOY will be recalculated as: 1, 2, 3, ... from this point
|
DAH will be recalculated as: 1, 2, 3, ... from this point
|
||||||
lookback_start: Starting index in original full data (for season reset calculation)
|
lookback_start: Starting index in original full data (for season reset calculation)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
@ -203,23 +203,23 @@ def extract_features(data_df: pd.DataFrame, feature_names: List[str], ci_column:
|
||||||
# Compute all CI features
|
# Compute all CI features
|
||||||
ci_series = data_df[ci_column].astype(float)
|
ci_series = data_df[ci_column].astype(float)
|
||||||
|
|
||||||
# Compute DOY (age/days since season start) - NOT day-of-year!
|
# Compute DAH (age/days since season start) - NOT day-of-year!
|
||||||
# DOY is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
|
# DAH is a continuous counter: 1, 2, 3, ..., 475 (doesn't cycle at 365)
|
||||||
# It only resets to 1 after a harvest is detected (new season)
|
# It only resets to 1 after a harvest is detected (new season)
|
||||||
doy_series = None
|
dah_series = None
|
||||||
if 'DOY_normalized' in feature_names:
|
if 'DAH_normalized' in feature_names:
|
||||||
if season_anchor_day is not None and lookback_start >= season_anchor_day:
|
if season_anchor_day is not None and lookback_start >= season_anchor_day:
|
||||||
# Season was reset after harvest. Recalculate DOY as simple counter from 1
|
# Season was reset after harvest. Recalculate DAH as simple counter from 1
|
||||||
# This is a window starting at or after harvest, so DOY should be: 1, 2, 3, ...
|
# This is a window starting at or after harvest, so DAH should be: 1, 2, 3, ...
|
||||||
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||||
elif 'DOY' in data_df.columns:
|
elif 'DAH' in data_df.columns:
|
||||||
# Use DOY directly from CSV - already calculated as continuous age counter
|
# Use DAH directly from CSV - already calculated as continuous age counter
|
||||||
doy_series = pd.Series(data_df['DOY'].astype(float).values, index=data_df.index)
|
dah_series = pd.Series(data_df['DAH'].astype(float).values, index=data_df.index)
|
||||||
else:
|
else:
|
||||||
# Fallback: create continuous age counter (1, 2, 3, ...)
|
# Fallback: create continuous age counter (1, 2, 3, ...)
|
||||||
doy_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
dah_series = pd.Series(np.arange(1, len(data_df) + 1), index=data_df.index)
|
||||||
|
|
||||||
all_features = compute_ci_features(ci_series, doy_series)
|
all_features = compute_ci_features(ci_series, dah_series)
|
||||||
|
|
||||||
# Select requested features
|
# Select requested features
|
||||||
requested = [f for f in feature_names if f in all_features.columns]
|
requested = [f for f in feature_names if f in all_features.columns]
|
||||||
|
|
@ -303,9 +303,9 @@ def load_harvest_data(data_file: Path) -> pd.DataFrame:
|
||||||
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
|
def run_phase1_growing_window(field_data, model, config, scalers, ci_column, device,
|
||||||
threshold=0.45, consecutive_days=2):
|
threshold=0.45, consecutive_days=2):
|
||||||
"""
|
"""
|
||||||
Phase 1: Growing window detection with DOY season reset.
|
Phase 1: Growing window detection with DAH season reset.
|
||||||
|
|
||||||
For each detected harvest, reset DOY counter for the next season.
|
For each detected harvest, reset DAH counter for the next season.
|
||||||
This allows the model to detect multiple consecutive harvests in multi-year data.
|
This allows the model to detect multiple consecutive harvests in multi-year data.
|
||||||
"""
|
"""
|
||||||
harvest_dates = []
|
harvest_dates = []
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@
|
||||||
"7d_std",
|
"7d_std",
|
||||||
"14d_std",
|
"14d_std",
|
||||||
"21d_std",
|
"21d_std",
|
||||||
"DOY_normalized"
|
"DAH_normalized"
|
||||||
],
|
],
|
||||||
"model": {
|
"model": {
|
||||||
"type": "LSTM",
|
"type": "LSTM",
|
||||||
|
|
|
||||||
20
r_app/.gitignore
vendored
20
r_app/.gitignore
vendored
|
|
@ -1,20 +0,0 @@
|
||||||
# .gitignore
|
|
||||||
|
|
||||||
# Ignore renv binary cache
|
|
||||||
renv/cache/
|
|
||||||
renv
|
|
||||||
|
|
||||||
# Ignore temporary files
|
|
||||||
*.tmp
|
|
||||||
*.swp
|
|
||||||
*.save
|
|
||||||
|
|
||||||
# Ignore files related to Rproj
|
|
||||||
.Rproj.user/
|
|
||||||
.Rhistory
|
|
||||||
.RData
|
|
||||||
.Rprofile
|
|
||||||
# Ignore OSX files
|
|
||||||
.DS_Store
|
|
||||||
CI_report_dashboard_planet_files/
|
|
||||||
|
|
||||||
|
|
@ -48,10 +48,30 @@
|
||||||
# #' safe_log("Check input file", "WARNING")
|
# #' safe_log("Check input file", "WARNING")
|
||||||
# #' safe_log("Failed to load data", "ERROR")
|
# #' safe_log("Failed to load data", "ERROR")
|
||||||
# #'
|
# #'
|
||||||
# safe_log <- function(message, level = "INFO") {
|
safe_log <- function(message, level = "INFO") {
|
||||||
# prefix <- sprintf("[%s]", level)
|
# Build the full log message with timestamp
|
||||||
# cat(sprintf("%s %s\n", prefix, message))
|
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
||||||
# }
|
log_msg <- sprintf("[%s] [%s] %s", timestamp, level, message)
|
||||||
|
|
||||||
|
# Only output to console if NOT rendering with knitr
|
||||||
|
if (!isTRUE(getOption('knitr.in.progress'))) {
|
||||||
|
cat(log_msg, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Always write to log file if available
|
||||||
|
if (exists("LOG_FILE", envir = .GlobalEnv)) {
|
||||||
|
log_file <- get("LOG_FILE", envir = .GlobalEnv)
|
||||||
|
if (!is.null(log_file) && nzchar(log_file)) {
|
||||||
|
tryCatch({
|
||||||
|
cat(log_msg, "\n", file = log_file, append = TRUE)
|
||||||
|
}, error = function(e) {
|
||||||
|
# Silently fail if log file can't be written
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
invisible(log_msg)
|
||||||
|
}
|
||||||
|
|
||||||
# #' SmartCane Debug Logging (Conditional)
|
# #' SmartCane Debug Logging (Conditional)
|
||||||
# #'
|
# #'
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@
|
||||||
# OUTPUT DATA:
|
# OUTPUT DATA:
|
||||||
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
# - Destination: laravel_app/storage/app/{project}/Data/extracted_ci/cumulative_vals/
|
||||||
# - Format: CSV (long format)
|
# - Format: CSV (long format)
|
||||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
# - Columns: field, sub_field, Date, FitData, DAH, value
|
||||||
#
|
#
|
||||||
# USAGE:
|
# USAGE:
|
||||||
# Rscript 21_convert_ci_rds_to_csv.R [project]
|
# Rscript 21_convert_ci_rds_to_csv.R [project]
|
||||||
|
|
@ -38,7 +38,7 @@
|
||||||
# NOTES:
|
# NOTES:
|
||||||
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
# - Data source: Uses interpolated CI data from Script 30 (growth model output)
|
||||||
# - Handles both wide format and long format inputs from growth model
|
# - Handles both wide format and long format inputs from growth model
|
||||||
# - DOY (Day of Year): Calculated from date for seasonal analysis
|
# - DAH (Days After Harvest): Calculated from date; represents crop age in days
|
||||||
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
# - Python integration: CSV format compatible with pandas/scikit-learn workflows
|
||||||
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
# - Used by: Python harvest detection models (harvest_date_prediction.py)
|
||||||
# - Exports complete growth curves with interpolated values for ML training
|
# - Exports complete growth curves with interpolated values for ML training
|
||||||
|
|
@ -82,13 +82,13 @@ wide_to_long_ci_data <- function(ci_data_wide) {
|
||||||
filter(!is.na(FitData))
|
filter(!is.na(FitData))
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Create daily interpolated sequences with DOY for each field
|
#' Create daily interpolated sequences with DAH for each field
|
||||||
#'
|
#'
|
||||||
#' For each field/sub_field combination, creates complete daily sequences from first to last date,
|
#' For each field/sub_field combination, creates complete daily sequences from first to last date,
|
||||||
#' fills in measurements, and interpolates missing dates.
|
#' fills in measurements, and interpolates missing dates.
|
||||||
#'
|
#'
|
||||||
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
|
#' @param ci_data_long Long format tibble: field, sub_field, Date, FitData
|
||||||
#' @return Tibble with: field, sub_field, Date, FitData, DOY, value
|
#' @return Tibble with: field, sub_field, Date, FitData, DAH, value
|
||||||
create_interpolated_daily_sequences <- function(ci_data_long) {
|
create_interpolated_daily_sequences <- function(ci_data_long) {
|
||||||
ci_data_long %>%
|
ci_data_long %>%
|
||||||
group_by(field, sub_field) %>%
|
group_by(field, sub_field) %>%
|
||||||
|
|
@ -106,7 +106,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
|
||||||
Date = date_seq,
|
Date = date_seq,
|
||||||
value = NA_real_,
|
value = NA_real_,
|
||||||
FitData = NA_real_,
|
FitData = NA_real_,
|
||||||
DOY = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
|
DAH = seq_along(date_seq) # Continuous day counter: 1, 2, 3, ...
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fill in actual measurement values
|
# Fill in actual measurement values
|
||||||
|
|
@ -124,7 +124,7 @@ create_interpolated_daily_sequences <- function(ci_data_long) {
|
||||||
})
|
})
|
||||||
) %>%
|
) %>%
|
||||||
unnest(data) %>%
|
unnest(data) %>%
|
||||||
select(field, sub_field, Date, FitData, DOY, value) %>%
|
select(field, sub_field, Date, FitData, DAH, value) %>%
|
||||||
arrange(field, Date)
|
arrange(field, Date)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -208,7 +208,7 @@ extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season,
|
||||||
# Add additional columns
|
# Add additional columns
|
||||||
CI <- CI %>%
|
CI <- CI %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
DOY = seq(1, n(), 1),
|
DAH = seq(1, n(), 1),
|
||||||
model = paste0("Data", season, " : ", field_name),
|
model = paste0("Data", season, " : ", field_name),
|
||||||
season = season,
|
season = season,
|
||||||
subField = field_name
|
subField = field_name
|
||||||
|
|
|
||||||
|
|
@ -141,12 +141,15 @@ suppressPackageStartupMessages({
|
||||||
library(writexl) # For writing Excel outputs (KPI summary tables)
|
library(writexl) # For writing Excel outputs (KPI summary tables)
|
||||||
library(progress) # For progress bars during field processing
|
library(progress) # For progress bars during field processing
|
||||||
|
|
||||||
# ML/Analysis (optional - only for harvest model inference)
|
# ML models (for yield prediction KPI)
|
||||||
tryCatch({
|
library(caret) # For training Random Forest with cross-validation
|
||||||
library(torch) # For PyTorch model inference (harvest readiness prediction)
|
library(CAST) # For Forward Feature Selection in caret models
|
||||||
}, error = function(e) {
|
|
||||||
message("Note: torch package not available - harvest model inference will be skipped")
|
# ML models (for yield prediction KPI)
|
||||||
})
|
library(caret) # For training Random Forest with cross-validation
|
||||||
|
library(CAST) # For Forward Feature Selection in caret models
|
||||||
|
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -316,6 +319,9 @@ main <- function() {
|
||||||
# Use centralized paths from setup object (no need for file.path calls)
|
# Use centralized paths from setup object (no need for file.path calls)
|
||||||
weekly_mosaic <- setup$weekly_mosaic_dir
|
weekly_mosaic <- setup$weekly_mosaic_dir
|
||||||
daily_vals_dir <- setup$daily_ci_vals_dir
|
daily_vals_dir <- setup$daily_ci_vals_dir
|
||||||
|
reports_dir <- setup$kpi_reports_dir
|
||||||
|
data_dir <- setup$data_dir
|
||||||
|
|
||||||
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source(here("r_app", "30_growth_model_utils.R"))
|
source(here("r_app", "30_growth_model_utils.R"))
|
||||||
|
|
@ -332,8 +338,8 @@ main <- function() {
|
||||||
message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
|
message("WORKFLOW: CALCULATING 6 FARM-LEVEL KPIs (Script 90 compatible)")
|
||||||
message(strrep("=", 70))
|
message(strrep("=", 70))
|
||||||
|
|
||||||
# Prepare inputs for KPI calculation (already created by setup_project_directories)
|
# Prepare outputs and inputs for KPI calculation (already created by setup_project_directories)
|
||||||
reports_dir_kpi <- setup$kpi_reports_dir
|
reports_dir_kpi <- file.path(setup$reports_dir, "kpis")
|
||||||
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
|
cumulative_CI_vals_dir <- setup$cumulative_CI_vals_dir
|
||||||
|
|
||||||
# Load field boundaries for workflow (use data_dir from setup)
|
# Load field boundaries for workflow (use data_dir from setup)
|
||||||
|
|
@ -356,18 +362,15 @@ main <- function() {
|
||||||
stop("ERROR loading field boundaries: ", e$message)
|
stop("ERROR loading field boundaries: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Load harvesting data
|
# Load harvesting data for yield prediction (using common helper function)
|
||||||
if (!exists("harvesting_data")) {
|
harvesting_data <- load_harvest_data(setup$data_dir)
|
||||||
warning("harvesting_data not loaded. TCH KPI will use placeholder values.")
|
|
||||||
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract current week/year from end_date
|
# Extract current week/year from end_date
|
||||||
current_week <- as.numeric(format(end_date, "%V"))
|
current_week <- as.numeric(format(end_date, "%V"))
|
||||||
current_year <- as.numeric(format(end_date, "%G"))
|
current_year <- as.numeric(format(end_date, "%G"))
|
||||||
|
|
||||||
# Call with correct signature
|
# Call with correct signature
|
||||||
kpi_results <- calculate_all_kpis(
|
kpi_results <- calculate_all_field_analysis_agronomic_support(
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
current_week = current_week,
|
current_week = current_week,
|
||||||
current_year = current_year,
|
current_year = current_year,
|
||||||
|
|
@ -376,8 +379,13 @@ main <- function() {
|
||||||
ci_rds_path = NULL,
|
ci_rds_path = NULL,
|
||||||
harvesting_data = harvesting_data,
|
harvesting_data = harvesting_data,
|
||||||
output_dir = reports_dir_kpi,
|
output_dir = reports_dir_kpi,
|
||||||
|
data_dir = setup$data_dir,
|
||||||
project_dir = project_dir
|
project_dir = project_dir
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Extract results
|
||||||
|
field_analysis_df <- kpi_results$field_analysis_df
|
||||||
|
export_paths <- kpi_results$export_paths
|
||||||
|
|
||||||
cat("\n=== KPI CALCULATION COMPLETE ===\n")
|
cat("\n=== KPI CALCULATION COMPLETE ===\n")
|
||||||
cat("Summary tables saved for Script 90 integration\n")
|
cat("Summary tables saved for Script 90 integration\n")
|
||||||
|
|
@ -389,599 +397,57 @@ main <- function() {
|
||||||
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
||||||
message(strrep("=", 70))
|
message(strrep("=", 70))
|
||||||
|
|
||||||
# Set reports_dir for CANE_SUPPLY workflow (used by export functions)
|
# Define variables needed for workflow functions
|
||||||
reports_dir <- setup$kpi_reports_dir
|
data_dir <- setup$data_dir
|
||||||
data_dir <- setup$data_dir
|
|
||||||
|
|
||||||
# Continue with existing per-field analysis code below
|
|
||||||
|
|
||||||
message("\n", strrep("-", 70))
|
|
||||||
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
|
|
||||||
message(strrep("-", 70))
|
|
||||||
|
|
||||||
weeks <- calculate_week_numbers(end_date)
|
|
||||||
current_week <- weeks$current_week
|
|
||||||
current_year <- weeks$current_year
|
|
||||||
previous_week <- weeks$previous_week
|
|
||||||
previous_year <- weeks$previous_year
|
|
||||||
|
|
||||||
message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))
|
|
||||||
# Find per-field weekly mosaics
|
|
||||||
message("Finding per-field weekly mosaics...")
|
|
||||||
|
|
||||||
if (!dir.exists(weekly_mosaic)) {
|
# Load field boundaries for workflow (use data_dir from setup)
|
||||||
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
|
message("\nLoading field boundaries for KPI calculation...")
|
||||||
"\nScript 40 (mosaic creation) must be run first."))
|
|
||||||
}
|
|
||||||
|
|
||||||
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
|
|
||||||
field_dirs <- field_dirs[field_dirs != ""]
|
|
||||||
|
|
||||||
if (length(field_dirs) == 0) {
|
|
||||||
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
|
|
||||||
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Verify we have mosaics for this week
|
|
||||||
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
|
|
||||||
per_field_files <- c()
|
|
||||||
for (field in field_dirs) {
|
|
||||||
field_mosaic_dir <- file.path(weekly_mosaic, field)
|
|
||||||
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
|
||||||
if (length(files) > 0) {
|
|
||||||
per_field_files <- c(per_field_files, files)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length(per_field_files) == 0) {
|
|
||||||
stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
|
|
||||||
"\nExpected pattern:", single_file_pattern,
|
|
||||||
"\nChecked:", weekly_mosaic))
|
|
||||||
}
|
|
||||||
|
|
||||||
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
|
|
||||||
|
|
||||||
mosaic_mode <- "per-field"
|
|
||||||
mosaic_dir <- weekly_mosaic
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Load field boundaries
|
|
||||||
tryCatch({
|
tryCatch({
|
||||||
boundaries_result <- load_field_boundaries(data_dir)
|
boundaries_result <- load_field_boundaries(setup$data_dir)
|
||||||
|
|
||||||
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
|
||||||
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
field_boundaries_sf <- boundaries_result$field_boundaries_sf
|
||||||
} else {
|
} else {
|
||||||
field_boundaries_sf <- boundaries_result
|
field_boundaries_sf <- boundaries_result
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nrow(field_boundaries_sf) == 0) {
|
if (nrow(field_boundaries_sf) == 0) {
|
||||||
stop("No fields loaded from boundaries")
|
stop("No fields loaded from boundaries")
|
||||||
}
|
}
|
||||||
|
|
||||||
message(paste(" Loaded", nrow(field_boundaries_sf), "fields"))
|
message(paste(" ✓ Loaded", nrow(field_boundaries_sf), "fields"))
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
stop("ERROR loading field boundaries: ", e$message)
|
stop("ERROR loading field boundaries: ", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Load harvesting data for yield prediction (using common helper function)
|
||||||
|
harvesting_data <- load_harvest_data(setup$data_dir)
|
||||||
|
|
||||||
|
# Extract current week/year from end_date
|
||||||
|
current_week <- as.numeric(format(end_date, "%V"))
|
||||||
|
current_year <- as.numeric(format(end_date, "%G"))
|
||||||
|
|
||||||
|
|
||||||
message("Loading historical field data for trend calculations...")
|
# Call the main orchestrator function from kpi_calculation_utils.R
|
||||||
# Load up to 8 weeks (max of 4-week and 8-week trend requirements)
|
workflow_results <- calculate_field_analysis_cane_supply(
|
||||||
# Function gracefully handles missing weeks and loads whatever exists
|
setup = setup,
|
||||||
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG) # Always 8
|
client_config = client_config,
|
||||||
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
|
end_date = end_date,
|
||||||
|
|
||||||
# Only auto-generate on first call (not in recursive calls from within load_historical_field_data)
|
|
||||||
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
|
|
||||||
|
|
||||||
historical_data <- load_historical_field_data(project_dir, current_week, current_year, reports_dir,
|
|
||||||
num_weeks = num_weeks_to_load,
|
|
||||||
auto_generate = allow_auto_gen,
|
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
|
||||||
daily_vals_dir = daily_vals_dir)
|
|
||||||
|
|
||||||
# Load harvest.xlsx for planting dates (season_start)
|
|
||||||
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
|
||||||
harvest_file_path <- file.path(data_dir, "harvest.xlsx")
|
|
||||||
|
|
||||||
harvesting_data <- tryCatch({
|
|
||||||
if (file.exists(harvest_file_path)) {
|
|
||||||
harvest_raw <- readxl::read_excel(harvest_file_path)
|
|
||||||
harvest_raw$season_start <- as.Date(harvest_raw$season_start)
|
|
||||||
harvest_raw$season_end <- as.Date(harvest_raw$season_end)
|
|
||||||
message(paste(" ✓ Loaded harvest data:", nrow(harvest_raw), "rows"))
|
|
||||||
harvest_raw
|
|
||||||
} else {
|
|
||||||
message(paste(" WARNING: harvest.xlsx not found at", harvest_file_path))
|
|
||||||
NULL
|
|
||||||
}
|
|
||||||
}, error = function(e) {
|
|
||||||
message(paste(" ERROR loading harvest.xlsx:", e$message))
|
|
||||||
NULL
|
|
||||||
})
|
|
||||||
|
|
||||||
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
|
|
||||||
|
|
||||||
# Validate planting_dates
|
|
||||||
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
|
|
||||||
message("WARNING: No planting dates available. Using NA for all fields.")
|
|
||||||
planting_dates <- data.frame(
|
|
||||||
field_id = field_boundaries_sf$field,
|
|
||||||
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
|
|
||||||
stringsAsFactors = FALSE
|
|
||||||
)
|
|
||||||
}
|
|
||||||
# Build per-field configuration
|
|
||||||
message("\nPreparing mosaic configuration for statistics calculation...")
|
|
||||||
message(" ✓ Using per-field mosaic architecture (1 TIFF per field)")
|
|
||||||
|
|
||||||
# Per-field mode: each field has its own TIFF in weekly_mosaic/{FIELD}/week_*.tif
|
|
||||||
field_grid <- list(
|
|
||||||
mosaic_dir = mosaic_dir,
|
|
||||||
mode = "per-field"
|
|
||||||
)
|
|
||||||
|
|
||||||
message("\nUsing modular RDS-based approach for weekly statistics...")
|
|
||||||
|
|
||||||
# Load/calculate CURRENT week stats (from tiles or RDS cache)
|
|
||||||
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
|
|
||||||
current_stats <- load_or_calculate_weekly_stats(
|
|
||||||
week_num = current_week,
|
|
||||||
year = current_year,
|
|
||||||
project_dir = project_dir,
|
project_dir = project_dir,
|
||||||
|
weekly_mosaic = weekly_mosaic,
|
||||||
|
daily_vals_dir = daily_vals_dir,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
mosaic_dir = field_grid$mosaic_dir,
|
data_dir = data_dir
|
||||||
reports_dir = reports_dir,
|
|
||||||
report_date = end_date
|
|
||||||
)
|
)
|
||||||
|
|
||||||
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
|
|
||||||
|
|
||||||
# Load/calculate PREVIOUS week stats (from RDS cache or tiles)
|
|
||||||
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
|
|
||||||
|
|
||||||
# Calculate report date for previous week (7 days before current)
|
|
||||||
prev_report_date <- end_date - 7
|
|
||||||
|
|
||||||
prev_stats <- load_or_calculate_weekly_stats(
|
|
||||||
week_num = previous_week,
|
|
||||||
year = previous_year,
|
|
||||||
project_dir = project_dir,
|
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
|
||||||
mosaic_dir = field_grid$mosaic_dir,
|
|
||||||
reports_dir = reports_dir,
|
|
||||||
report_date = prev_report_date
|
|
||||||
)
|
|
||||||
|
|
||||||
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
|
|
||||||
message(paste(" Columns in prev_stats:", paste(names(prev_stats), collapse = ", ")))
|
|
||||||
message(paste(" CV column non-NA values in prev_stats:", sum(!is.na(prev_stats$CV))))
|
|
||||||
|
|
||||||
# Apply trend calculations (requires both weeks)
|
|
||||||
message("\n3. Calculating trend columns...")
|
|
||||||
current_stats <- calculate_kpi_trends(current_stats, prev_stats,
|
|
||||||
project_dir = project_dir,
|
|
||||||
reports_dir = reports_dir,
|
|
||||||
current_week = current_week,
|
|
||||||
year = current_year)
|
|
||||||
|
|
||||||
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
|
|
||||||
|
|
||||||
# Load weekly harvest probabilities from script 31 (if available)
|
|
||||||
# Note: Script 31 saves to reports/kpis/field_stats/ (not field_level)
|
|
||||||
message("\n4. Loading harvest probabilities from script 31...")
|
|
||||||
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
|
|
||||||
harvest_prob_file <- file.path(harvest_prob_dir,
|
|
||||||
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
|
|
||||||
message(paste(" Looking for:", harvest_prob_file))
|
|
||||||
|
|
||||||
imminent_prob_data <- tryCatch({
|
|
||||||
if (file.exists(harvest_prob_file)) {
|
|
||||||
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
|
|
||||||
col_types = readr::cols(.default = readr::col_character()))
|
|
||||||
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
|
|
||||||
prob_df %>%
|
|
||||||
select(field, imminent_prob, detected_prob) %>%
|
|
||||||
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
|
|
||||||
} else {
|
|
||||||
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
|
|
||||||
NULL
|
|
||||||
}
|
|
||||||
}, error = function(e) {
|
|
||||||
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
|
|
||||||
NULL
|
|
||||||
})
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CALCULATE GAP FILLING KPI (2σ method from kpi_utils.R)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
message("\nCalculating gap filling scores (2σ method)...")
|
|
||||||
|
|
||||||
# Process per-field mosaics
|
|
||||||
message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
|
|
||||||
|
|
||||||
field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
|
|
||||||
|
|
||||||
process_gap_for_field <- function(field_file) {
|
|
||||||
field_id <- basename(dirname(field_file))
|
|
||||||
field_bounds <- field_boundaries_by_id[[field_id]]
|
|
||||||
|
|
||||||
if (is.null(field_bounds) || nrow(field_bounds) == 0) {
|
|
||||||
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
|
||||||
}
|
|
||||||
|
|
||||||
tryCatch({
|
|
||||||
field_raster <- terra::rast(field_file)
|
|
||||||
ci_band_name <- "CI"
|
|
||||||
if (!(ci_band_name %in% names(field_raster))) {
|
|
||||||
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
|
||||||
}
|
|
||||||
field_ci_band <- field_raster[[ci_band_name]]
|
|
||||||
names(field_ci_band) <- "CI"
|
|
||||||
|
|
||||||
gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
|
|
||||||
|
|
||||||
if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
|
|
||||||
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
|
||||||
}
|
|
||||||
|
|
||||||
gap_scores <- gap_result$field_results
|
|
||||||
gap_scores$Field_id <- gap_scores$field
|
|
||||||
gap_scores <- gap_scores[, c("Field_id", "gap_score")]
|
|
||||||
|
|
||||||
stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
|
|
||||||
}, error = function(e) {
|
|
||||||
message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message))
|
|
||||||
data.frame(Field_id = field_id, gap_score = NA_real_)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process fields sequentially with progress bar
|
|
||||||
message(" Processing gap scores for ", length(per_field_files), " fields...")
|
|
||||||
pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
|
|
||||||
|
|
||||||
results_list <- lapply(seq_along(per_field_files), function(idx) {
|
# Extract results
|
||||||
result <- process_gap_for_field(per_field_files[[idx]])
|
field_analysis_df <- workflow_results$field_analysis_df
|
||||||
utils::setTxtProgressBar(pb, idx)
|
farm_kpi_results <- workflow_results$farm_kpi_results
|
||||||
result
|
export_paths <- workflow_results$export_paths
|
||||||
})
|
|
||||||
close(pb)
|
|
||||||
|
|
||||||
gap_scores_df <- dplyr::bind_rows(results_list)
|
|
||||||
|
|
||||||
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
|
||||||
gap_scores_df <- gap_scores_df %>%
|
|
||||||
dplyr::group_by(Field_id) %>%
|
|
||||||
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
|
|
||||||
|
|
||||||
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
|
|
||||||
message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-", round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
|
|
||||||
} else {
|
|
||||||
message(" WARNING: No gap scores calculated from per-field mosaics")
|
|
||||||
gap_scores_df <- NULL
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Build final output dataframe with all 22 columns (including Gap_score)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
message("\nBuilding final field analysis output...")
|
|
||||||
|
|
||||||
# Pre-calculate acreages with geometry validation
|
|
||||||
# This avoids geometry errors during field_analysis construction
|
|
||||||
acreage_lookup <- tryCatch({
|
|
||||||
lookup_df <- field_boundaries_sf %>%
|
|
||||||
sf::st_drop_geometry() %>%
|
|
||||||
as.data.frame() %>%
|
|
||||||
mutate(
|
|
||||||
geometry_valid = sapply(seq_len(nrow(field_boundaries_sf)), function(idx) {
|
|
||||||
tryCatch({
|
|
||||||
sf::st_is_valid(field_boundaries_sf[idx, ])
|
|
||||||
}, error = function(e) FALSE)
|
|
||||||
}),
|
|
||||||
area_ha = 0
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate area for valid geometries
|
} else {
|
||||||
for (idx in which(lookup_df$geometry_valid)) {
|
# Unknown client type - log warning and exit
|
||||||
tryCatch({
|
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
|
||||||
area_m2 <- as.numeric(sf::st_area(field_boundaries_sf[idx, ]))
|
|
||||||
lookup_df$area_ha[idx] <- area_m2 / 10000
|
|
||||||
}, error = function(e) {
|
|
||||||
lookup_df$area_ha[idx] <<- NA_real_
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# Convert hectares to acres
|
|
||||||
lookup_df %>%
|
|
||||||
mutate(acreage = area_ha / 0.404686) %>%
|
|
||||||
select(field, acreage)
|
|
||||||
}, error = function(e) {
|
|
||||||
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
|
|
||||||
data.frame(field = character(0), acreage = numeric(0))
|
|
||||||
})
|
|
||||||
|
|
||||||
field_analysis_df <- current_stats %>%
|
|
||||||
mutate(
|
|
||||||
# Column 2: Farm_Section (user fills)
|
|
||||||
Farm_Section = NA_character_,
|
|
||||||
# Column 3: Field_name (from GeoJSON - already have Field_id, can look up)
|
|
||||||
Field_name = Field_id,
|
|
||||||
# Column 4: Acreage (calculate from geometry)
|
|
||||||
Acreage = {
|
|
||||||
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
|
|
||||||
if_else(is.na(acreages_vec), 0, acreages_vec)
|
|
||||||
},
|
|
||||||
# Columns 5-6: Already in current_stats (Mean_CI, Weekly_ci_change)
|
|
||||||
# Column 7: Four_week_trend (from current_stats)
|
|
||||||
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx - season_start)
|
|
||||||
Last_harvest_or_planting_date = {
|
|
||||||
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
|
|
||||||
},
|
|
||||||
# Column 9: Age_week (calculated from report date and planting date)
|
|
||||||
Age_week = {
|
|
||||||
sapply(seq_len(nrow(current_stats)), function(idx) {
|
|
||||||
planting_dt <- Last_harvest_or_planting_date[idx]
|
|
||||||
if (is.na(planting_dt)) {
|
|
||||||
return(NA_real_)
|
|
||||||
}
|
|
||||||
round(as.numeric(difftime(end_date, planting_dt, units = "weeks")), 0)
|
|
||||||
})
|
|
||||||
},
|
|
||||||
# Column 10: Phase (recalculate based on updated Age_week)
|
|
||||||
Phase = {
|
|
||||||
sapply(Age_week, function(age) {
|
|
||||||
if (is.na(age)) return(NA_character_)
|
|
||||||
if (age >= 0 & age < 4) return("Germination")
|
|
||||||
if (age >= 4 & age < 17) return("Tillering")
|
|
||||||
if (age >= 17 & age < 39) return("Grand Growth")
|
|
||||||
if (age >= 39) return("Maturation")
|
|
||||||
NA_character_
|
|
||||||
})
|
|
||||||
},
|
|
||||||
# Column 11: nmr_of_weeks_analysed (already in current_stats from calculate_kpi_trends)
|
|
||||||
# Column 12: Germination_progress (calculated here from CI values)
|
|
||||||
# Bin Pct_pixels_CI_gte_2 into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
|
|
||||||
Germination_progress = sapply(Pct_pixels_CI_gte_2, function(pct) {
|
|
||||||
if (is.na(pct)) return(NA_character_)
|
|
||||||
if (pct >= 95) return("95-100%")
|
|
||||||
else if (pct >= 90) return("90-95%")
|
|
||||||
else if (pct >= 80) return("80-90%")
|
|
||||||
else if (pct >= 70) return("70-80%")
|
|
||||||
else if (pct >= 60) return("60-70%")
|
|
||||||
else if (pct >= 50) return("50-60%")
|
|
||||||
else if (pct >= 40) return("40-50%")
|
|
||||||
else if (pct >= 30) return("30-40%")
|
|
||||||
else if (pct >= 20) return("20-30%")
|
|
||||||
else if (pct >= 10) return("10-20%")
|
|
||||||
else return("0-10%")
|
|
||||||
}),
|
|
||||||
# Column 13: Imminent_prob (from script 31 or NA if not available)
|
|
||||||
Imminent_prob = {
|
|
||||||
if (!is.null(imminent_prob_data)) {
|
|
||||||
imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)]
|
|
||||||
} else {
|
|
||||||
rep(NA_real_, nrow(current_stats))
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# Column 14: Status_Alert (based on harvest probability + crop health status)
|
|
||||||
# Priority order: Ready for harvest-check → Strong decline → Harvested/bare → NA
|
|
||||||
Status_Alert = {
|
|
||||||
sapply(seq_len(nrow(current_stats)), function(idx) {
|
|
||||||
imminent_prob <- Imminent_prob[idx]
|
|
||||||
age_w <- Age_week[idx]
|
|
||||||
weekly_ci_chg <- Weekly_ci_change[idx]
|
|
||||||
mean_ci_val <- Mean_CI[idx]
|
|
||||||
|
|
||||||
# Priority 1: Ready for harvest-check (imminent + mature cane ≥12 months)
|
|
||||||
if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_w) && age_w >= 52) {
|
|
||||||
return("Ready for harvest-check")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Priority 2: Strong decline in crop health (drop ≥2 points but still >1.5)
|
|
||||||
if (!is.na(weekly_ci_chg) && weekly_ci_chg <= -2.0 && !is.na(mean_ci_val) && mean_ci_val > 1.5) {
|
|
||||||
return("Strong decline in crop health")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Priority 3: Harvested/bare (Mean CI < 1.5)
|
|
||||||
if (!is.na(mean_ci_val) && mean_ci_val < 1.5) {
|
|
||||||
return("Harvested/bare")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Fallback: no alert
|
|
||||||
NA_character_
|
|
||||||
})
|
|
||||||
},
|
|
||||||
# Columns 15-16: CI-based columns already in current_stats (CI_range, CI_Percentiles)
|
|
||||||
# Column 17: Already in current_stats (CV)
|
|
||||||
# Column 18: Already in current_stats (CV_Trend_Short_Term)
|
|
||||||
# Column 19: CV_Trend_Long_Term (from current_stats - raw slope value)
|
|
||||||
# Column 19b: CV_Trend_Long_Term_Category (categorical interpretation of slope)
|
|
||||||
# 3 classes: More uniform (slope < -0.01), Stable uniformity (-0.01 to 0.01), Less uniform (slope > 0.01)
|
|
||||||
CV_Trend_Long_Term_Category = {
|
|
||||||
sapply(current_stats$CV_Trend_Long_Term, function(slope) {
|
|
||||||
if (is.na(slope)) {
|
|
||||||
return(NA_character_)
|
|
||||||
} else if (slope < -0.01) {
|
|
||||||
return("More uniform")
|
|
||||||
} else if (slope > 0.01) {
|
|
||||||
return("Less uniform")
|
|
||||||
} else {
|
|
||||||
return("Stable uniformity")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
},
|
|
||||||
# Columns 20-21: Already in current_stats (Cloud_pct_clear, Cloud_category)
|
|
||||||
# Bin Cloud_pct_clear into 10% intervals: 0-10%, 10-20%, ..., 80-90%, 90-95%, 95-100%
|
|
||||||
Cloud_pct_clear = sapply(Cloud_pct_clear, function(pct) {
|
|
||||||
if (is.na(pct)) return(NA_character_)
|
|
||||||
if (pct >= 95) return("95-100%")
|
|
||||||
else if (pct >= 90) return("90-95%")
|
|
||||||
else if (pct >= 80) return("80-90%")
|
|
||||||
else if (pct >= 70) return("70-80%")
|
|
||||||
else if (pct >= 60) return("60-70%")
|
|
||||||
else if (pct >= 50) return("50-60%")
|
|
||||||
else if (pct >= 40) return("40-50%")
|
|
||||||
else if (pct >= 30) return("30-40%")
|
|
||||||
else if (pct >= 20) return("20-30%")
|
|
||||||
else if (pct >= 10) return("10-20%")
|
|
||||||
else return("0-10%")
|
|
||||||
}),
|
|
||||||
# Column 22: Gap_score (2σ below median - from kpi_utils.R)
|
|
||||||
Gap_score = {
|
|
||||||
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
|
||||||
# Debug: Print first few gap scores
|
|
||||||
message(sprintf(" Joining %d gap scores to field_analysis (first 3: %s)",
|
|
||||||
nrow(gap_scores_df),
|
|
||||||
paste(head(gap_scores_df$gap_score, 3), collapse=", ")))
|
|
||||||
message(sprintf(" First 3 Field_ids in gap_scores_df: %s",
|
|
||||||
paste(head(gap_scores_df$Field_id, 3), collapse=", ")))
|
|
||||||
message(sprintf(" First 3 Field_ids in current_stats: %s",
|
|
||||||
paste(head(current_stats$Field_id, 3), collapse=", ")))
|
|
||||||
|
|
||||||
gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
|
|
||||||
} else {
|
|
||||||
rep(NA_real_, nrow(current_stats))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
) %>%
|
|
||||||
select(
|
|
||||||
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
|
|
||||||
"Last_harvest_or_planting_date", "Age_week", "Phase",
|
|
||||||
"Germination_progress",
|
|
||||||
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
|
|
||||||
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
|
|
||||||
"Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
|
|
||||||
)
|
|
||||||
|
|
||||||
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns (including Gap_score)"))
|
|
||||||
|
|
||||||
export_paths <- export_field_analysis_excel(
|
|
||||||
field_analysis_df,
|
|
||||||
NULL,
|
|
||||||
project_dir,
|
|
||||||
current_week,
|
|
||||||
current_year,
|
|
||||||
reports_dir
|
|
||||||
)
|
|
||||||
|
|
||||||
cat("\n--- Per-field Results (first 10) ---\n")
|
|
||||||
available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
|
|
||||||
available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
|
|
||||||
if (length(available_cols) > 0) {
|
|
||||||
print(head(field_analysis_df[, available_cols], 10))
|
|
||||||
}
|
|
||||||
|
|
||||||
# ========== FARM-LEVEL KPI AGGREGATION ==========
|
|
||||||
# Aggregate the per-field analysis into farm-level summary statistics
|
|
||||||
|
|
||||||
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
|
|
||||||
|
|
||||||
# Filter to only fields that have actual data (non-NA CI and valid acreage)
|
|
||||||
field_data <- field_analysis_df %>%
|
|
||||||
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
|
|
||||||
filter(Acreage > 0)
|
|
||||||
|
|
||||||
if (nrow(field_data) > 0) {
|
|
||||||
|
|
||||||
if (nrow(field_data) > 0) {
|
|
||||||
# Create summary statistics
|
|
||||||
farm_summary <- list()
|
|
||||||
|
|
||||||
# 1. PHASE DISTRIBUTION
|
|
||||||
phase_dist <- field_data %>%
|
|
||||||
group_by(Phase) %>%
|
|
||||||
summarise(
|
|
||||||
num_fields = n(),
|
|
||||||
acreage = sum(Acreage, na.rm = TRUE),
|
|
||||||
.groups = 'drop'
|
|
||||||
) %>%
|
|
||||||
rename(Category = Phase)
|
|
||||||
|
|
||||||
farm_summary$phase_distribution <- phase_dist
|
|
||||||
|
|
||||||
# 2. STATUS ALERT DISTRIBUTION
|
|
||||||
status_dist <- field_data %>%
|
|
||||||
group_by(Status_Alert) %>%
|
|
||||||
summarise(
|
|
||||||
num_fields = n(),
|
|
||||||
acreage = sum(Acreage, na.rm = TRUE),
|
|
||||||
.groups = 'drop'
|
|
||||||
) %>%
|
|
||||||
rename(Category = Status_Alert)
|
|
||||||
|
|
||||||
farm_summary$status_distribution <- status_dist
|
|
||||||
|
|
||||||
# 3. CLOUD COVERAGE DISTRIBUTION
|
|
||||||
cloud_dist <- field_data %>%
|
|
||||||
group_by(Cloud_category) %>%
|
|
||||||
summarise(
|
|
||||||
num_fields = n(),
|
|
||||||
acreage = sum(Acreage, na.rm = TRUE),
|
|
||||||
.groups = 'drop'
|
|
||||||
) %>%
|
|
||||||
rename(Category = Cloud_category)
|
|
||||||
|
|
||||||
farm_summary$cloud_distribution <- cloud_dist
|
|
||||||
|
|
||||||
# 4. OVERALL STATISTICS
|
|
||||||
farm_summary$overall_stats <- data.frame(
|
|
||||||
total_fields = nrow(field_data),
|
|
||||||
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
|
|
||||||
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
|
|
||||||
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
|
|
||||||
mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
|
|
||||||
week = current_week,
|
|
||||||
year = current_year,
|
|
||||||
date = as.character(end_date)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Print summaries
|
|
||||||
cat("\n--- PHASE DISTRIBUTION ---\n")
|
|
||||||
print(phase_dist)
|
|
||||||
|
|
||||||
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
|
|
||||||
print(status_dist)
|
|
||||||
|
|
||||||
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
|
|
||||||
print(cloud_dist)
|
|
||||||
|
|
||||||
cat("\n--- OVERALL FARM STATISTICS ---\n")
|
|
||||||
print(farm_summary$overall_stats)
|
|
||||||
|
|
||||||
farm_kpi_results <- farm_summary
|
|
||||||
} else {
|
|
||||||
farm_kpi_results <- NULL
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
farm_kpi_results <- NULL
|
|
||||||
}
|
|
||||||
|
|
||||||
# ========== FINAL SUMMARY ==========
|
|
||||||
|
|
||||||
cat("\n", strrep("=", 70), "\n")
|
|
||||||
cat("80_CALCULATE_KPIs.R - COMPLETION SUMMARY\n")
|
|
||||||
cat(strrep("=", 70), "\n")
|
|
||||||
cat("Per-field analysis fields analyzed:", nrow(field_analysis_df), "\n")
|
|
||||||
cat("Excel export:", export_paths$excel, "\n")
|
|
||||||
cat("RDS export:", export_paths$rds, "\n")
|
|
||||||
cat("CSV export:", export_paths$csv, "\n")
|
|
||||||
|
|
||||||
if (!is.null(farm_kpi_results)) {
|
|
||||||
cat("\nFarm-level KPIs: CALCULATED\n")
|
|
||||||
} else {
|
|
||||||
cat("\nFarm-level KPIs: SKIPPED (no valid tile data extracted)\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("\n✓ Consolidated KPI calculation complete!\n")
|
|
||||||
cat(" - Per-field data exported\n")
|
|
||||||
cat(" - Farm-level KPIs calculated\n")
|
|
||||||
cat(" - All outputs in:", reports_dir, "\n\n")
|
|
||||||
|
|
||||||
} else {
|
|
||||||
# Unknown client type - log warning and exit
|
|
||||||
warning(sprintf("Unknown client type: %s - no workflow matched", client_type))
|
|
||||||
cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
|
cat("\n⚠️ Warning: Client type '", client_type, "' does not match any known workflow\n", sep = "")
|
||||||
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
|
cat("Expected: 'agronomic_support' (aura) or 'cane_supply' (angata, etc.)\n")
|
||||||
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
|
cat("Check CLIENT_TYPE_MAP in parameters_project.R\n\n")
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -27,173 +27,685 @@ library(tidyr)
|
||||||
library(readxl)
|
library(readxl)
|
||||||
library(writexl)
|
library(writexl)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# ALERT THRESHOLDS & CONFIGURATION CONSTANTS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# CI change thresholds for alert categorization and status determination
|
||||||
|
# These values are project-standard and should be consistent across all workflows
|
||||||
|
CI_CHANGE_RAPID_GROWTH_THRESHOLD <- 0.5 # Weekly CI change for positive growth alert
|
||||||
|
CI_CHANGE_POSITIVE_GROWTH_THRESHOLD <- 0.2 # Weekly CI change for acceptable growth
|
||||||
|
CI_CHANGE_STABLE_THRESHOLD <- -0.2 # Weekly CI change for stable status (between -0.2 and +0.2)
|
||||||
|
CI_CHANGE_STRESS_TREND_THRESHOLD <- -0.3 # 4-week trend threshold for stress detection
|
||||||
|
CI_CHANGE_RAPID_DECLINE_THRESHOLD <- -0.5 # Weekly CI change for rapid decline alert
|
||||||
|
# Deprecated aliases (for backward compatibility if needed):
|
||||||
|
CI_CHANGE_DECLINE_THRESHOLD <- CI_CHANGE_RAPID_DECLINE_THRESHOLD # Weekly CI change threshold for decline alerts
|
||||||
|
CI_CHANGE_INCREASE_THRESHOLD <- CI_CHANGE_RAPID_GROWTH_THRESHOLD # Weekly CI change threshold for increase alerts
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
|
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
#' Placeholder: ANGATA harvest readiness assessment
|
#' Calculate acreage for each field from geometry
|
||||||
|
#' @param field_boundaries_sf sf object with field geometries
|
||||||
|
#' @return data.frame with field and acreage columns
|
||||||
|
calculate_field_acreages <- function(field_boundaries_sf) {
|
||||||
|
tryCatch({
|
||||||
|
# Project to equal-area CRS (EPSG:6933) for accurate area calculations
|
||||||
|
field_boundaries_proj <- sf::st_transform(field_boundaries_sf, "EPSG:6933")
|
||||||
|
|
||||||
|
lookup_df <- field_boundaries_proj %>%
|
||||||
|
sf::st_drop_geometry() %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
mutate(
|
||||||
|
geometry_valid = sapply(seq_len(nrow(field_boundaries_proj)), function(idx) {
|
||||||
|
tryCatch({
|
||||||
|
sf::st_is_valid(field_boundaries_proj[idx, ])
|
||||||
|
}, error = function(e) FALSE)
|
||||||
|
}),
|
||||||
|
area_ha = 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate area for valid geometries
|
||||||
|
valid_indices <- which(lookup_df$geometry_valid)
|
||||||
|
areas_ha <- vapply(valid_indices, function(idx) {
|
||||||
|
tryCatch({
|
||||||
|
area_m2 <- as.numeric(sf::st_area(field_boundaries_proj[idx, ]))
|
||||||
|
area_m2 / 10000
|
||||||
|
}, error = function(e) NA_real_)
|
||||||
|
}, numeric(1))
|
||||||
|
lookup_df$area_ha[valid_indices] <- areas_ha
|
||||||
|
|
||||||
|
# Convert hectares to acres
|
||||||
|
lookup_df %>%
|
||||||
|
mutate(acreage = area_ha / 0.404686) %>%
|
||||||
|
# Aggregate by field to handle multi-row fields (e.g., sub_fields)
|
||||||
|
group_by(field) %>%
|
||||||
|
summarise(acreage = sum(acreage, na.rm = TRUE), .groups = "drop") %>%
|
||||||
|
select(field, acreage)
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste("Warning: Could not calculate acreages from geometries -", e$message))
|
||||||
|
data.frame(field = character(0), acreage = numeric(0))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Calculate age in weeks from planting date
|
||||||
#'
|
#'
|
||||||
#' Future implementation will integrate ANGATA-specific harvest readiness criteria:
|
#' @param planting_date Date of planting
|
||||||
#' - Maturation phase detection (CI threshold-based)
|
#' @param reference_date Date to calculate age relative to (typically end_date)
|
||||||
#' - Field age tracking (days since planting)
|
#' @return Numeric age in weeks (rounded to nearest week)
|
||||||
#' - Weather-based ripeness indicators (if available)
|
calculate_age_week <- function(planting_date, reference_date) {
|
||||||
#' - Historical yield correlations
|
if (is.na(planting_date)) {
|
||||||
#'
|
return(NA_real_)
|
||||||
#' @param field_ci CI values for the field
|
|
||||||
#' @param field_age_days Days since planting
|
|
||||||
#'
|
|
||||||
#' @return Character string with harvest readiness assessment
|
|
||||||
assess_harvest_readiness <- function(field_ci, field_age_days = NULL) {
|
|
||||||
# Placeholder implementation
|
|
||||||
# Real version would check:
|
|
||||||
# - Mean CI > 3.5 (maturation threshold)
|
|
||||||
# - Age > 350 days
|
|
||||||
# - Weekly growth rate < threshold (mature plateau)
|
|
||||||
|
|
||||||
if (is.null(field_ci) || all(is.na(field_ci))) {
|
|
||||||
return("No data available")
|
|
||||||
}
|
}
|
||||||
|
round(as.numeric(difftime(reference_date, planting_date, units = "weeks")), 0)
|
||||||
mean_ci <- mean(field_ci, na.rm = TRUE)
|
}
|
||||||
|
|
||||||
if (mean_ci > 3.5) {
|
#' Assign crop phase based on age in weeks
|
||||||
return("Ready for harvest")
|
#'
|
||||||
} else if (mean_ci > 2.5) {
|
#' Determines crop phase from age in weeks using canonical PHASE_DEFINITIONS
|
||||||
return("Approaching harvest readiness")
|
#' from 80_utils_common.R for consistency across all workflows.
|
||||||
|
#'
|
||||||
|
#' @param age_week Numeric age in weeks
|
||||||
|
#' @return Character phase name (from PHASE_DEFINITIONS or "Unknown")
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' Uses the shared PHASE_DEFINITIONS to ensure identical phase boundaries
|
||||||
|
#' across all scripts. This wrapper delegates to get_phase_by_age() which
|
||||||
|
#' is the authoritative phase lookup function.
|
||||||
|
#'
|
||||||
|
#' Phase boundaries (from PHASE_DEFINITIONS):
|
||||||
|
#' - Germination: 0-6 weeks
|
||||||
|
#' - Tillering: 4-16 weeks
|
||||||
|
#' - Grand Growth: 17-39 weeks
|
||||||
|
#' - Maturation: 39+ weeks
|
||||||
|
calculate_phase <- function(age_week) {
|
||||||
|
# Delegate to canonical get_phase_by_age() from 80_utils_common.R
|
||||||
|
# This ensures all phase boundaries are consistent across workflows
|
||||||
|
get_phase_by_age(age_week)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Bin percentage into 10% intervals with special handling for 90-100%
|
||||||
|
#'
|
||||||
|
#' @param pct Numeric percentage value (0-100)
|
||||||
|
#' @return Character bin label
|
||||||
|
bin_percentage <- function(pct) {
|
||||||
|
if (is.na(pct)) return(NA_character_)
|
||||||
|
if (pct >= 95) return("95-100%")
|
||||||
|
else if (pct >= 90) return("90-95%")
|
||||||
|
else if (pct >= 80) return("80-90%")
|
||||||
|
else if (pct >= 70) return("70-80%")
|
||||||
|
else if (pct >= 60) return("60-70%")
|
||||||
|
else if (pct >= 50) return("50-60%")
|
||||||
|
else if (pct >= 40) return("40-50%")
|
||||||
|
else if (pct >= 30) return("30-40%")
|
||||||
|
else if (pct >= 20) return("20-30%")
|
||||||
|
else if (pct >= 10) return("10-20%")
|
||||||
|
else return("0-10%")
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Calculate germination progress from CI threshold percentage
|
||||||
|
#'
|
||||||
|
#' @param pct_pixels_ci_gte_2 Percentage of pixels with CI >= 2
|
||||||
|
#' @return Character bin label
|
||||||
|
calculate_germination_progress <- function(pct_pixels_ci_gte_2) {
|
||||||
|
bin_percentage(pct_pixels_ci_gte_2)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Categorize CV trend (long-term slope) into qualitative labels
|
||||||
|
#'
|
||||||
|
#' @param cv_slope Numeric slope from CV trend analysis
|
||||||
|
#' @return Character category: "More uniform", "Stable uniformity", or "Less uniform"
|
||||||
|
categorize_cv_trend_long_term <- function(cv_slope) {
|
||||||
|
if (is.na(cv_slope)) {
|
||||||
|
return(NA_character_)
|
||||||
|
} else if (cv_slope < -0.01) {
|
||||||
|
return("More uniform")
|
||||||
|
} else if (cv_slope > 0.01) {
|
||||||
|
return("Less uniform")
|
||||||
} else {
|
} else {
|
||||||
return("Not ready - continue monitoring")
|
return("Stable uniformity")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Placeholder: ANGATA supply chain status flags
|
#' Determine status alert for CANE_SUPPLY client (harvest/milling workflow)
|
||||||
|
#'
|
||||||
|
#' Alerts focus on: harvest readiness, crop health monitoring, exception detection
|
||||||
|
#' Uses WEEKLY trends (Four_week_trend) not daily noise for consistency
|
||||||
|
#' Designed for harvest/milling clients who manage expectation, not daily operations
|
||||||
#'
|
#'
|
||||||
#' Future implementation will add supply chain-specific status indicators:
|
#' Priority order:
|
||||||
#' - Harvest scheduling readiness
|
#' 1. harvest_ready → Schedule harvest operations
|
||||||
#' - Equipment availability impact
|
#' 2. harvested_bare → Record completion / detect unexpected harvest
|
||||||
#' - Transportation/logistics flags
|
#' 3. stress_detected → Monitor crop health (consistent decline)
|
||||||
#' - Quality parameter flags
|
#' 4. germination_delayed → Early warning for young fields
|
||||||
|
#' 5. growth_on_track → Positive signal (no action needed)
|
||||||
|
#' 6. NA → Normal growth (no alert)
|
||||||
#'
|
#'
|
||||||
#' @param field_analysis Data frame with field analysis results
|
#' @param imminent_prob Numeric harvest probability (0-1)
|
||||||
#'
|
#' @param age_week Numeric age in weeks since planting/harvest
|
||||||
#' @return Data frame with supply chain status columns
|
#' @param mean_ci Numeric mean Chlorophyll Index
|
||||||
assess_supply_chain_status <- function(field_analysis) {
|
#' @param four_week_trend Numeric 4-week trend in CI (slope of growth)
|
||||||
# Placeholder: return field analysis as-is
|
#' @param weekly_ci_change Numeric week-over-week CI change
|
||||||
# Real version would add columns for:
|
#' @param cv Numeric coefficient of variation (field uniformity)
|
||||||
# - schedule_ready (bool)
|
#' @return Character status alert code or NA
|
||||||
# - harvest_window_days (numeric)
|
calculate_status_alert <- function(imminent_prob, age_week, mean_ci,
|
||||||
# - transportation_priority (char)
|
four_week_trend, weekly_ci_change, cv) {
|
||||||
# - quality_flags (char)
|
|
||||||
|
|
||||||
return(field_analysis)
|
# Priority 1: HARVEST READY - highest business priority
|
||||||
|
# Field is mature (≥12 months) AND harvest model predicts imminent harvest
|
||||||
|
if (!is.na(imminent_prob) && imminent_prob > 0.5 && !is.na(age_week) && age_week >= 52) {
|
||||||
|
return("harvest_ready")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Priority 2: HARVESTED/BARE - indicator of completion (or unexpected harvest)
|
||||||
|
# Mean CI dropped below vegetative threshold
|
||||||
|
if (!is.na(mean_ci) && mean_ci < 1.5) {
|
||||||
|
return("harvested_bare")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Priority 3: STRESS DETECTED - consistent health decline (weekly trend)
|
||||||
|
# Uses Four_week_trend (smooth trend) NOT daily fluctuation to avoid noise
|
||||||
|
# Crop declining but not yet bare → opportunity to investigate
|
||||||
|
# References: CI_CHANGE_STRESS_TREND_THRESHOLD for 4-week trend detection
|
||||||
|
if (!is.na(four_week_trend) && four_week_trend < CI_CHANGE_STRESS_TREND_THRESHOLD &&
|
||||||
|
!is.na(mean_ci) && mean_ci > 1.5) {
|
||||||
|
return("stress_detected")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Priority 4: GERMINATION DELAYED - early warning for young fields
|
||||||
|
# Age 4-8 weeks is typical germination window; low CI = slow start
|
||||||
|
if (!is.na(age_week) && age_week >= 4 && age_week <= 8 &&
|
||||||
|
!is.na(mean_ci) && mean_ci < 1.5) {
|
||||||
|
return("germination_delayed")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Priority 5: GROWTH ON TRACK - positive operational status
|
||||||
|
# Field is healthy, uniform, and growing steadily (no action needed)
|
||||||
|
# Conditions: good uniformity (CV < 0.15) AND stable/positive weekly trend
|
||||||
|
# References: CI_CHANGE_STABLE_THRESHOLD (±0.2 = stable, no change)
|
||||||
|
if (!is.na(cv) && cv < 0.15 &&
|
||||||
|
!is.na(four_week_trend) && four_week_trend >= CI_CHANGE_STABLE_THRESHOLD &&
|
||||||
|
!is.na(weekly_ci_change) && weekly_ci_change >= CI_CHANGE_STABLE_THRESHOLD) {
|
||||||
|
return("growth_on_track")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default: NORMAL GROWTH (no specific alert)
|
||||||
|
# Field is growing but may have minor variability; continues normal monitoring
|
||||||
|
NA_character_
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' Calculate yield prediction for CANE_SUPPLY workflows (Wrapper)
|
||||||
|
#'
|
||||||
|
#' This function wraps the shared yield prediction model from 80_utils_common.R
|
||||||
|
#' to provide CANE_SUPPLY clients (e.g., ANGATA) with ML-based yield forecasting.
|
||||||
|
#'
|
||||||
|
#' Uses Random Forest with Forward Feature Selection trained on:
|
||||||
|
#' - Cumulative Canopy Index (CI) from growth model
|
||||||
|
#' - Days After Harvest (DAH) / crop age
|
||||||
|
#' - CI-per-day (growth velocity)
|
||||||
|
#'
|
||||||
|
#' Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD, ~8 months) into quartiles:
|
||||||
|
#' - Top 25%: High-yield fields
|
||||||
|
#' - Average: Mid-range yield fields
|
||||||
|
#' - Lowest 25%: Lower-yield fields
|
||||||
|
#'
|
||||||
|
#' @param field_boundaries_sf SF object with field geometries
|
||||||
|
#' @param harvesting_data Data frame with harvest history (must have tonnage_ha column)
|
||||||
|
#' @param cumulative_CI_vals_dir Directory with combined CI RDS files
|
||||||
|
#'
|
||||||
|
#' @return List with:
|
||||||
|
#' - summary: Data frame with field_groups, count, and yield quartile predictions
|
||||||
|
#' - field_results: Data frame with field-level forecasts (yield_forecast_t_ha in t/ha)
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' **Data Requirements:**
|
||||||
|
#' - harvesting_data must include tonnage_ha column (yield in t/ha) for training
|
||||||
|
#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||||
|
#' - If either is missing, returns graceful fallback with NA values (not fake numbers)
|
||||||
|
#'
|
||||||
|
#' **Integration:**
|
||||||
|
#' This can be called from calculate_all_field_kpis() in cane_supply workflow to add
|
||||||
|
#' a new "Yield_Forecast" column to the 22-column KPI output.
|
||||||
|
#'
|
||||||
|
#' **Example:**
|
||||||
|
#' ```r
|
||||||
|
#' yield_result <- calculate_yield_prediction_kpi_cane_supply(
|
||||||
|
#' field_boundaries_sf,
|
||||||
|
#' harvesting_data,
|
||||||
|
#' file.path(data_dir, "combined_CI")
|
||||||
|
#' )
|
||||||
|
#' # yield_result$summary has quartiles
|
||||||
|
#' # yield_result$field_results has per-field forecasts
|
||||||
|
#' ```
|
||||||
|
calculate_yield_prediction_kpi_cane_supply <- function(field_boundaries_sf,
|
||||||
|
harvesting_data,
|
||||||
|
cumulative_CI_vals_dir) {
|
||||||
|
|
||||||
|
# Call the shared yield prediction function from 80_utils_common.R
|
||||||
|
result <- calculate_yield_prediction_kpi(field_boundaries_sf, harvesting_data, cumulative_CI_vals_dir)
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#' Build complete per-field KPI dataframe with all 22 columns
|
||||||
|
#' @param current_stats data.frame with current week statistics from load_or_calculate_weekly_stats
|
||||||
|
#' @param planting_dates data.frame with field_id and planting_date columns
|
||||||
|
#' @param imminent_prob_data data.frame with Field_id and Imminent_prob_actual columns (or NULL)
|
||||||
|
#' @param gap_scores_df data.frame with Field_id and gap_score columns (or NULL)
|
||||||
|
#' @param field_boundaries_sf sf object with field geometries
|
||||||
|
#' @param end_date Date object for current report date
|
||||||
|
#' @return data.frame with all 22 KPI columns
|
||||||
|
calculate_all_field_kpis <- function(current_stats,
|
||||||
|
planting_dates,
|
||||||
|
imminent_prob_data,
|
||||||
|
gap_scores_df,
|
||||||
|
field_boundaries_sf,
|
||||||
|
end_date) {
|
||||||
|
|
||||||
|
message("\nBuilding final field analysis output...")
|
||||||
|
|
||||||
|
# Pre-calculate acreages
|
||||||
|
acreage_lookup <- calculate_field_acreages(field_boundaries_sf)
|
||||||
|
|
||||||
|
field_analysis_df <- current_stats %>%
|
||||||
|
mutate(
|
||||||
|
# Column 2: Farm_Section (user fills manually)
|
||||||
|
Farm_Section = NA_character_,
|
||||||
|
|
||||||
|
# Column 3: Field_name (from GeoJSON)
|
||||||
|
Field_name = Field_id,
|
||||||
|
|
||||||
|
# Column 4: Acreage (from geometry)
|
||||||
|
Acreage = {
|
||||||
|
acreages_vec <- acreage_lookup$acreage[match(Field_id, acreage_lookup$field)]
|
||||||
|
if_else(is.na(acreages_vec), 0, acreages_vec)
|
||||||
|
},
|
||||||
|
|
||||||
|
# Column 8: Last_harvest_or_planting_date (from harvest.xlsx)
|
||||||
|
Last_harvest_or_planting_date = {
|
||||||
|
planting_dates$planting_date[match(Field_id, planting_dates$field_id)]
|
||||||
|
},
|
||||||
|
|
||||||
|
# Column 9: Age_week (calculated)
|
||||||
|
Age_week = {
|
||||||
|
sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||||
|
calculate_age_week(Last_harvest_or_planting_date[idx], end_date)
|
||||||
|
})
|
||||||
|
},
|
||||||
|
|
||||||
|
# Column 10: Phase (based on Age_week)
|
||||||
|
Phase = sapply(Age_week, calculate_phase),
|
||||||
|
|
||||||
|
# Column 12: Germination_progress (binned Pct_pixels_CI_gte_2)
|
||||||
|
Germination_progress = sapply(Pct_pixels_CI_gte_2, calculate_germination_progress),
|
||||||
|
|
||||||
|
# Column 13: Imminent_prob (from script 31 or NA)
|
||||||
|
Imminent_prob = {
|
||||||
|
if (!is.null(imminent_prob_data)) {
|
||||||
|
as.numeric(imminent_prob_data$Imminent_prob_actual[match(Field_id, imminent_prob_data$Field_id)])
|
||||||
|
} else {
|
||||||
|
rep(NA_real_, nrow(current_stats))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
# Column 14: Status_Alert (multi-priority logic for harvest/milling workflow)
|
||||||
|
Status_Alert = {
|
||||||
|
sapply(seq_len(nrow(current_stats)), function(idx) {
|
||||||
|
calculate_status_alert(
|
||||||
|
imminent_prob = Imminent_prob[idx],
|
||||||
|
age_week = Age_week[idx],
|
||||||
|
mean_ci = Mean_CI[idx],
|
||||||
|
four_week_trend = Four_week_trend[idx],
|
||||||
|
weekly_ci_change = Weekly_ci_change[idx],
|
||||||
|
cv = CV[idx]
|
||||||
|
)
|
||||||
|
})
|
||||||
|
},
|
||||||
|
|
||||||
|
# Column 19b: CV_Trend_Long_Term_Category (categorical slope)
|
||||||
|
CV_Trend_Long_Term_Category = sapply(current_stats$CV_Trend_Long_Term, categorize_cv_trend_long_term),
|
||||||
|
|
||||||
|
# Column 21: Cloud_pct_clear (binned into intervals)
|
||||||
|
Cloud_pct_clear = sapply(Cloud_pct_clear, bin_percentage),
|
||||||
|
|
||||||
|
# Column 22: Gap_score (2σ method)
|
||||||
|
Gap_score = {
|
||||||
|
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
||||||
|
gap_scores_df$gap_score[match(current_stats$Field_id, gap_scores_df$Field_id)]
|
||||||
|
} else {
|
||||||
|
rep(NA_real_, nrow(current_stats))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
) %>%
|
||||||
|
select(
|
||||||
|
all_of(c("Field_id", "Farm_Section", "Field_name", "Acreage", "Status_Alert",
|
||||||
|
"Last_harvest_or_planting_date", "Age_week", "Phase",
|
||||||
|
"Germination_progress",
|
||||||
|
"Mean_CI", "Weekly_ci_change", "Four_week_trend", "CI_range", "CI_Percentiles",
|
||||||
|
"CV", "CV_Trend_Short_Term", "CV_Trend_Long_Term", "CV_Trend_Long_Term_Category",
|
||||||
|
"Imminent_prob", "Cloud_pct_clear", "Cloud_category", "Gap_score"))
|
||||||
|
)
|
||||||
|
|
||||||
|
message(paste("✓ Built final output with", nrow(field_analysis_df), "fields and 22 columns"))
|
||||||
|
|
||||||
|
return(field_analysis_df)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Aggregate per-field data into farm-level KPI summary
|
||||||
|
#'
|
||||||
|
#' @param field_analysis_df data.frame with per-field KPI data
|
||||||
|
#' @param current_week Numeric current week number
|
||||||
|
#' @param current_year Numeric current year
|
||||||
|
#' @param end_date Date object for current report date
|
||||||
|
#' @return List with phase_distribution, status_distribution, cloud_distribution, overall_stats
|
||||||
|
calculate_farm_level_kpis <- function(field_analysis_df, current_week, current_year, end_date) {
|
||||||
|
|
||||||
|
cat("\n=== CALCULATING FARM-LEVEL KPI SUMMARY ===\n")
|
||||||
|
|
||||||
|
# Filter to only fields with actual data
|
||||||
|
field_data <- field_analysis_df %>%
|
||||||
|
filter(!is.na(Mean_CI) & !is.na(Acreage)) %>%
|
||||||
|
filter(Acreage > 0)
|
||||||
|
|
||||||
|
if (nrow(field_data) == 0) {
|
||||||
|
message("No valid field data for farm-level aggregation")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
farm_summary <- list()
|
||||||
|
|
||||||
|
# 1. PHASE DISTRIBUTION
|
||||||
|
phase_dist <- field_data %>%
|
||||||
|
group_by(Phase) %>%
|
||||||
|
summarise(
|
||||||
|
num_fields = n(),
|
||||||
|
acreage = sum(Acreage, na.rm = TRUE),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
rename(Category = Phase)
|
||||||
|
|
||||||
|
farm_summary$phase_distribution <- phase_dist
|
||||||
|
|
||||||
|
# 2. STATUS ALERT DISTRIBUTION
|
||||||
|
status_dist <- field_data %>%
|
||||||
|
group_by(Status_Alert) %>%
|
||||||
|
summarise(
|
||||||
|
num_fields = n(),
|
||||||
|
acreage = sum(Acreage, na.rm = TRUE),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
rename(Category = Status_Alert)
|
||||||
|
|
||||||
|
farm_summary$status_distribution <- status_dist
|
||||||
|
|
||||||
|
# 3. CLOUD COVERAGE DISTRIBUTION
|
||||||
|
cloud_dist <- field_data %>%
|
||||||
|
group_by(Cloud_category) %>%
|
||||||
|
summarise(
|
||||||
|
num_fields = n(),
|
||||||
|
acreage = sum(Acreage, na.rm = TRUE),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
rename(Category = Cloud_category)
|
||||||
|
|
||||||
|
farm_summary$cloud_distribution <- cloud_dist
|
||||||
|
|
||||||
|
# 4. OVERALL STATISTICS
|
||||||
|
farm_summary$overall_stats <- data.frame(
|
||||||
|
total_fields = nrow(field_data),
|
||||||
|
total_acreage = sum(field_data$Acreage, na.rm = TRUE),
|
||||||
|
mean_ci = round(mean(field_data$Mean_CI, na.rm = TRUE), 2),
|
||||||
|
median_ci = round(median(field_data$Mean_CI, na.rm = TRUE), 2),
|
||||||
|
mean_cv = round(mean(field_data$CV, na.rm = TRUE), 4),
|
||||||
|
week = current_week,
|
||||||
|
year = current_year,
|
||||||
|
date = as.character(end_date)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print summaries
|
||||||
|
cat("\n--- PHASE DISTRIBUTION ---\n")
|
||||||
|
print(phase_dist)
|
||||||
|
|
||||||
|
cat("\n--- STATUS TRIGGER DISTRIBUTION ---\n")
|
||||||
|
print(status_dist)
|
||||||
|
|
||||||
|
cat("\n--- CLOUD COVERAGE DISTRIBUTION ---\n")
|
||||||
|
print(cloud_dist)
|
||||||
|
|
||||||
|
cat("\n--- OVERALL FARM STATISTICS ---\n")
|
||||||
|
print(farm_summary$overall_stats)
|
||||||
|
|
||||||
|
return(farm_summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
|
# ORCHESTRATOR FOR CANE_SUPPLY WORKFLOWS
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
#' Orchestrate ANGATA weekly field analysis and reporting
|
#' Main orchestrator for CANE_SUPPLY per-field KPI workflow
|
||||||
#'
|
#'
|
||||||
#' Main entry point for CANE_SUPPLY (ANGATA, etc.) workflows.
|
#' This function coordinates all KPI calculations for the per-field analysis workflow.
|
||||||
#' Currently uses common utilities; future versions will add client-specific logic.
|
#' It loads historical data, calculates current/previous week statistics, computes
|
||||||
|
#' all 22 KPI columns, and aggregates farm-level summaries.
|
||||||
#'
|
#'
|
||||||
#' @param field_boundaries_sf SF object with field geometries
|
#' @param setup List with directory paths (kpi_reports_dir, data_dir, etc.)
|
||||||
#' @param current_week ISO week number (1-53)
|
#' @param client_config List with workflow configuration (script_91_compatible, outputs)
|
||||||
#' @param current_year ISO week year
|
#' @param end_date Date object for current report date
|
||||||
#' @param mosaic_dir Directory containing weekly mosaics
|
#' @param project_dir Character project identifier
|
||||||
#' @param field_boundaries_path Path to field GeoJSON
|
#' @param weekly_mosaic Character path to weekly mosaic directory
|
||||||
#' @param harvesting_data Data frame with harvest data (optional)
|
#' @param daily_vals_dir Character path to daily values directory
|
||||||
#' @param output_dir Directory for exports
|
#' @param field_boundaries_sf sf object with field geometries
|
||||||
#' @param data_dir Base data directory
|
#' @param data_dir Character path to data directory
|
||||||
#'
|
#' @return List with field_analysis_df, farm_kpi_results, export_paths
|
||||||
#' @return List with field analysis results
|
calculate_field_analysis_cane_supply <- function(setup,
|
||||||
#'
|
client_config,
|
||||||
#' @details
|
end_date,
|
||||||
#' This function:
|
project_dir,
|
||||||
#' 1. Loads weekly mosaic and extracts field statistics
|
weekly_mosaic,
|
||||||
#' 2. Calculates field statistics (using common utilities)
|
daily_vals_dir,
|
||||||
#' 3. Prepares field analysis summary
|
field_boundaries_sf,
|
||||||
#' 4. Exports to Excel/CSV/RDS
|
data_dir) {
|
||||||
#' 5. (Future) Applies ANGATA-specific assessments
|
|
||||||
#'
|
|
||||||
calculate_field_analysis_cane_supply <- function(
|
|
||||||
field_boundaries_sf,
|
|
||||||
current_week,
|
|
||||||
current_year,
|
|
||||||
mosaic_dir,
|
|
||||||
field_boundaries_path = NULL,
|
|
||||||
harvesting_data = NULL,
|
|
||||||
output_dir = file.path(PROJECT_DIR, "output"),
|
|
||||||
data_dir = NULL
|
|
||||||
) {
|
|
||||||
|
|
||||||
message("\n============ CANE SUPPLY FIELD ANALYSIS (ANGATA, etc.) ============")
|
message("\n", strrep("=", 70))
|
||||||
|
message("CANE_SUPPLY WORKFLOW: PER-FIELD ANALYSIS (Script 91 compatible)")
|
||||||
|
message(strrep("=", 70))
|
||||||
|
|
||||||
# Load current week mosaic
|
reports_dir <- file.path(setup$reports_dir, "kpis")
|
||||||
message("Loading current week mosaic...")
|
|
||||||
current_mosaic <- load_weekly_ci_mosaic(mosaic_dir, current_week, current_year)
|
|
||||||
|
|
||||||
if (is.null(current_mosaic)) {
|
# ========== PHASE 1: WEEKLY ANALYSIS SETUP ==========
|
||||||
warning(paste("Could not load current week mosaic for week", current_week, current_year))
|
message("\n", strrep("-", 70))
|
||||||
return(NULL)
|
message("PHASE 1: PER-FIELD WEEKLY ANALYSIS ")
|
||||||
|
message(strrep("-", 70))
|
||||||
|
|
||||||
|
weeks <- calculate_week_numbers(end_date)
|
||||||
|
current_week <- weeks$current_week
|
||||||
|
current_year <- weeks$current_year
|
||||||
|
previous_week <- weeks$previous_week
|
||||||
|
previous_year <- weeks$previous_year
|
||||||
|
|
||||||
|
message(paste("Week:", current_week, "/ Year (ISO 8601):", current_year))
|
||||||
|
|
||||||
|
# Find per-field weekly mosaics
|
||||||
|
message("Finding per-field weekly mosaics...")
|
||||||
|
|
||||||
|
if (!dir.exists(weekly_mosaic)) {
|
||||||
|
stop(paste("ERROR: weekly_mosaic directory not found:", weekly_mosaic,
|
||||||
|
"\nScript 40 (mosaic creation) must be run first."))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract field statistics
|
field_dirs <- list.dirs(weekly_mosaic, full.names = FALSE, recursive = FALSE)
|
||||||
message("Extracting field statistics from current mosaic...")
|
field_dirs <- field_dirs[field_dirs != ""]
|
||||||
field_stats <- extract_field_statistics_from_ci(current_mosaic, field_boundaries_sf)
|
|
||||||
|
|
||||||
# Load previous week stats for comparison
|
if (length(field_dirs) == 0) {
|
||||||
message("Loading historical data for trends...")
|
stop(paste("ERROR: No field subdirectories found in:", weekly_mosaic,
|
||||||
target_prev <- calculate_target_week_and_year(current_week, current_year, offset_weeks = 1)
|
"\nScript 40 must create weekly_mosaic/{FIELD}/ structure."))
|
||||||
previous_stats <- NULL
|
|
||||||
|
|
||||||
previous_mosaic <- load_weekly_ci_mosaic(mosaic_dir, target_prev$week, target_prev$year)
|
|
||||||
if (!is.null(previous_mosaic)) {
|
|
||||||
previous_stats <- extract_field_statistics_from_ci(previous_mosaic, field_boundaries_sf)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Calculate 4-week historical trend
|
# Verify we have mosaics for this week
|
||||||
message("Calculating field trends...")
|
single_file_pattern <- sprintf("week_%02d_%d\\.tif", current_week, current_year)
|
||||||
ci_rds_path <- file.path(data_dir, "combined_CI", "combined_CI_data.rds")
|
per_field_files <- c()
|
||||||
|
for (field in field_dirs) {
|
||||||
|
field_mosaic_dir <- file.path(weekly_mosaic, field)
|
||||||
|
files <- list.files(field_mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
|
||||||
|
if (length(files) > 0) {
|
||||||
|
per_field_files <- c(per_field_files, files)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
field_analysis <- calculate_field_statistics(
|
if (length(per_field_files) == 0) {
|
||||||
field_stats = field_stats,
|
stop(paste("ERROR: No mosaics found for week", current_week, "year", current_year,
|
||||||
previous_stats = previous_stats,
|
"\nExpected pattern:", single_file_pattern,
|
||||||
|
"\nChecked:", weekly_mosaic))
|
||||||
|
}
|
||||||
|
|
||||||
|
message(paste(" ✓ Found", length(per_field_files), "per-field weekly mosaics"))
|
||||||
|
|
||||||
|
# ========== PHASE 2: LOAD HISTORICAL DATA ==========
|
||||||
|
message("\nLoading historical field data for trend calculations...")
|
||||||
|
num_weeks_to_load <- max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
|
||||||
|
message(paste(" Attempting to load up to", num_weeks_to_load, "weeks of historical data..."))
|
||||||
|
|
||||||
|
allow_auto_gen <- !exists("_INSIDE_AUTO_GENERATE", envir = .GlobalEnv)
|
||||||
|
|
||||||
|
historical_data <- load_historical_field_data(
|
||||||
|
project_dir, current_week, current_year, reports_dir,
|
||||||
|
num_weeks = num_weeks_to_load,
|
||||||
|
auto_generate = allow_auto_gen,
|
||||||
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
|
daily_vals_dir = daily_vals_dir
|
||||||
|
)
|
||||||
|
|
||||||
|
# ========== PHASE 3: LOAD PLANTING DATES ==========
|
||||||
|
message("\nLoading harvest data from harvest.xlsx for planting dates...")
|
||||||
|
# Use load_harvest_data() from 80_utils_common.R for consistency with 80_calculate_kpis.R
|
||||||
|
harvesting_data <- load_harvest_data(data_dir)
|
||||||
|
|
||||||
|
planting_dates <- extract_planting_dates(harvesting_data, field_boundaries_sf)
|
||||||
|
|
||||||
|
if (is.null(planting_dates) || nrow(planting_dates) == 0) {
|
||||||
|
message("WARNING: No planting dates available. Using NA for all fields.")
|
||||||
|
planting_dates <- data.frame(
|
||||||
|
field_id = field_boundaries_sf$field,
|
||||||
|
planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== PHASE 4: CALCULATE WEEKLY STATISTICS ==========
|
||||||
|
message("\nUsing modular RDS-based approach for weekly statistics...")
|
||||||
|
|
||||||
|
# Current week
|
||||||
|
message("\n1. Loading/calculating CURRENT week statistics (week", current_week, ")...")
|
||||||
|
current_stats <- load_or_calculate_weekly_stats(
|
||||||
week_num = current_week,
|
week_num = current_week,
|
||||||
year = current_year,
|
year = current_year,
|
||||||
ci_rds_path = ci_rds_path,
|
project_dir = project_dir,
|
||||||
field_boundaries_sf = field_boundaries_sf,
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
harvesting_data = harvesting_data
|
mosaic_dir = weekly_mosaic,
|
||||||
|
reports_dir = reports_dir,
|
||||||
|
report_date = end_date
|
||||||
|
)
|
||||||
|
message(paste(" ✓ Loaded/calculated stats for", nrow(current_stats), "fields in current week"))
|
||||||
|
|
||||||
|
# Previous week
|
||||||
|
message("\n2. Loading/calculating PREVIOUS week statistics (week", previous_week, ")...")
|
||||||
|
prev_report_date <- end_date - 7
|
||||||
|
|
||||||
|
prev_stats <- load_or_calculate_weekly_stats(
|
||||||
|
week_num = previous_week,
|
||||||
|
year = previous_year,
|
||||||
|
project_dir = project_dir,
|
||||||
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
|
mosaic_dir = weekly_mosaic,
|
||||||
|
reports_dir = reports_dir,
|
||||||
|
report_date = prev_report_date
|
||||||
|
)
|
||||||
|
message(paste(" ✓ Loaded/calculated stats for", nrow(prev_stats), "fields in previous week"))
|
||||||
|
|
||||||
|
# ========== PHASE 5: CALCULATE TRENDS ==========
|
||||||
|
message("\n3. Calculating trend columns...")
|
||||||
|
current_stats <- calculate_kpi_trends(
|
||||||
|
current_stats, prev_stats,
|
||||||
|
project_dir = project_dir,
|
||||||
|
reports_dir = reports_dir,
|
||||||
|
current_week = current_week,
|
||||||
|
year = current_year
|
||||||
|
)
|
||||||
|
message(paste(" ✓ Added Weekly_ci_change, CV_Trend_Short_Term, Four_week_trend, CV_Trend_Long_Term, nmr_of_weeks_analysed"))
|
||||||
|
|
||||||
|
# ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
|
||||||
|
message("\n4. Loading harvest probabilities from script 31...")
|
||||||
|
# Use get_harvest_output_path() to safely build path (stored in kpi_reports_dir)
|
||||||
|
harvest_prob_file <- get_harvest_output_path(project_dir, current_week, current_year)
|
||||||
|
message(paste(" Looking for:", harvest_prob_file))
|
||||||
|
|
||||||
|
imminent_prob_data <- tryCatch({
|
||||||
|
if (file.exists(harvest_prob_file)) {
|
||||||
|
prob_df <- readr::read_csv(harvest_prob_file, show_col_types = FALSE,
|
||||||
|
col_types = readr::cols(.default = readr::col_character()))
|
||||||
|
message(paste(" ✓ Loaded harvest probabilities for", nrow(prob_df), "fields"))
|
||||||
|
prob_df %>%
|
||||||
|
select(field, imminent_prob, detected_prob) %>%
|
||||||
|
rename(Field_id = field, Imminent_prob_actual = imminent_prob, Detected_prob = detected_prob)
|
||||||
|
} else {
|
||||||
|
message(paste(" INFO: Harvest probabilities not available (script 31 not run)"))
|
||||||
|
NULL
|
||||||
|
}
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste(" WARNING: Could not load harvest probabilities:", e$message))
|
||||||
|
NULL
|
||||||
|
})
|
||||||
|
|
||||||
|
# ========== PHASE 7: CALCULATE GAP SCORES ==========
|
||||||
|
gap_scores_df <- calculate_gap_scores(per_field_files, field_boundaries_sf)
|
||||||
|
|
||||||
|
# ========== PHASE 8: BUILD FINAL PER-FIELD DATAFRAME ==========
|
||||||
|
field_analysis_df <- calculate_all_field_kpis(
|
||||||
|
current_stats = current_stats,
|
||||||
|
planting_dates = planting_dates,
|
||||||
|
imminent_prob_data = imminent_prob_data,
|
||||||
|
gap_scores_df = gap_scores_df,
|
||||||
|
field_boundaries_sf = field_boundaries_sf,
|
||||||
|
end_date = end_date
|
||||||
)
|
)
|
||||||
|
|
||||||
if (is.null(field_analysis)) {
|
# ========== PHASE 9: EXPORT PER-FIELD RESULTS ==========
|
||||||
message("Could not generate field analysis")
|
|
||||||
return(NULL)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Generate summary
|
|
||||||
message("Generating summary statistics...")
|
|
||||||
summary_df <- generate_field_analysis_summary(field_analysis)
|
|
||||||
|
|
||||||
# Export
|
|
||||||
message("Exporting field analysis...")
|
|
||||||
export_paths <- export_field_analysis_excel(
|
export_paths <- export_field_analysis_excel(
|
||||||
field_analysis,
|
field_analysis_df,
|
||||||
summary_df,
|
NULL,
|
||||||
PROJECT_DIR,
|
project_dir,
|
||||||
current_week,
|
current_week,
|
||||||
current_year,
|
current_year,
|
||||||
output_dir
|
reports_dir
|
||||||
)
|
)
|
||||||
|
|
||||||
message(paste("\n✓ CANE_SUPPLY field analysis complete. Week", current_week, current_year, "\n"))
|
# cat("\n--- Per-field Results (first 10) ---\n")
|
||||||
|
# available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI", "Four_week_trend", "Status_Alert", "Cloud_category")
|
||||||
|
# available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
|
||||||
|
# if (length(available_cols) > 0) {
|
||||||
|
# print(head(field_analysis_df[, available_cols], 10))
|
||||||
|
# }
|
||||||
|
|
||||||
result <- list(
|
# ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ==========
|
||||||
field_analysis = field_analysis,
|
# farm_kpi_results <- calculate_farm_level_kpis(
|
||||||
summary = summary_df,
|
# field_analysis_df,
|
||||||
exports = export_paths
|
# current_week,
|
||||||
)
|
# current_year,
|
||||||
|
# end_date
|
||||||
|
# )
|
||||||
|
|
||||||
return(result)
|
# For now, farm-level KPIs are not implemented in CANE_SUPPLY workflow
|
||||||
|
farm_kpi_results <- NULL
|
||||||
|
|
||||||
|
# ========== RETURN RESULTS ==========
|
||||||
|
return(list(
|
||||||
|
field_analysis_df = field_analysis_df,
|
||||||
|
farm_kpi_results = farm_kpi_results,
|
||||||
|
export_paths = export_paths
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,34 @@
|
||||||
# - Field statistics extraction
|
# - Field statistics extraction
|
||||||
# - Week/year calculations for consistent date handling
|
# - Week/year calculations for consistent date handling
|
||||||
# - Excel/CSV/RDS export utilities
|
# - Excel/CSV/RDS export utilities
|
||||||
|
# - Yield prediction using ML models (Random Forest with Feature Selection)
|
||||||
#
|
#
|
||||||
# Used by: 80_calculate_kpis.R, all client-specific utils files
|
# Used by: 80_calculate_kpis.R, all client-specific utils files
|
||||||
|
#
|
||||||
|
# NOTE: Libraries required by yield prediction (caret, CAST, here) are loaded
|
||||||
|
# in the main script 80_calculate_kpis.R, not here. This keeps dependencies
|
||||||
|
# centralized in the orchestrator script.
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# LOAD PROJECT CONFIGURATION (Guard against re-sourcing)
|
||||||
|
# ============================================================================
|
||||||
|
# Ensure parameters_project.R has been sourced to provide global configuration
|
||||||
|
# (PROJECT, data_dir, field_boundaries_path, etc.). Use a sentinel to avoid double-sourcing.
|
||||||
|
if (!exists("PROJECT", envir = .GlobalEnv)) {
|
||||||
|
tryCatch({
|
||||||
|
source(here::here("r_app", "parameters_project.R"))
|
||||||
|
}, error = function(e) {
|
||||||
|
# Fallback: try relative path if here() doesn't work
|
||||||
|
tryCatch({
|
||||||
|
source("parameters_project.R")
|
||||||
|
}, error = function(e2) {
|
||||||
|
warning(paste("Could not source parameters_project.R:", e2$message,
|
||||||
|
"- using defaults or expecting caller to set PROJECT/data_dir"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# CONSTANTS (from 80_calculate_kpis.R)
|
# CONSTANTS (from 80_calculate_kpis.R)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -355,12 +379,11 @@ calculate_cv_trend_long_term <- function(cv_values) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Calculate Gap Filling Score KPI (2σ method)
|
#' Calculate Gap Filling Score KPI (2σ method)
|
||||||
#' @param ci_raster Current week CI raster
|
#' @param ci_raster Current week CI raster (single band)
|
||||||
#' @param field_boundaries Field boundaries
|
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||||||
#' @return Data frame with field-level gap filling scores
|
#' @return List with summary data frame and field-level results data frame
|
||||||
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
||||||
safe_log("Calculating Gap Filling Score KPI (placeholder)")
|
|
||||||
|
|
||||||
# Handle both sf and SpatVector inputs
|
# Handle both sf and SpatVector inputs
|
||||||
if (!inherits(field_boundaries, "SpatVector")) {
|
if (!inherits(field_boundaries, "SpatVector")) {
|
||||||
field_boundaries_vect <- terra::vect(field_boundaries)
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
||||||
|
|
@ -368,19 +391,11 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
||||||
field_boundaries_vect <- field_boundaries
|
field_boundaries_vect <- field_boundaries
|
||||||
}
|
}
|
||||||
|
|
||||||
# Ensure field_boundaries_vect is valid and matches field_boundaries dimensions
|
|
||||||
n_fields_vect <- length(field_boundaries_vect)
|
|
||||||
n_fields_sf <- nrow(field_boundaries)
|
|
||||||
|
|
||||||
if (n_fields_sf != n_fields_vect) {
|
|
||||||
warning(paste("Field boundary mismatch: nrow(field_boundaries)=", n_fields_sf, "vs length(field_boundaries_vect)=", n_fields_vect, ". Using actual SpatVector length."))
|
|
||||||
}
|
|
||||||
|
|
||||||
field_results <- data.frame()
|
field_results <- data.frame()
|
||||||
|
|
||||||
for (i in seq_len(nrow(field_boundaries))) {
|
for (i in seq_len(nrow(field_boundaries))) {
|
||||||
field_name <- field_boundaries$field[i]
|
field_name <- if ("field" %in% names(field_boundaries)) field_boundaries$field[i] else NA_character_
|
||||||
sub_field_name <- field_boundaries$sub_field[i]
|
sub_field_name <- if ("sub_field" %in% names(field_boundaries)) field_boundaries$sub_field[i] else NA_character_
|
||||||
field_vect <- field_boundaries_vect[i]
|
field_vect <- field_boundaries_vect[i]
|
||||||
|
|
||||||
# Extract CI values using helper function
|
# Extract CI values using helper function
|
||||||
|
|
@ -391,16 +406,17 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
||||||
# Gap score using 2σ below median to detect outliers
|
# Gap score using 2σ below median to detect outliers
|
||||||
median_ci <- median(valid_values)
|
median_ci <- median(valid_values)
|
||||||
sd_ci <- sd(valid_values)
|
sd_ci <- sd(valid_values)
|
||||||
outlier_threshold <- median_ci - (2 * sd_ci)
|
outlier_threshold <- median_ci - (1 * sd_ci)
|
||||||
low_ci_pixels <- sum(valid_values < outlier_threshold)
|
low_ci_pixels <- sum(valid_values < outlier_threshold)
|
||||||
total_pixels <- length(valid_values)
|
total_pixels <- length(valid_values)
|
||||||
gap_score <- (low_ci_pixels / total_pixels) * 100
|
gap_score <- round((low_ci_pixels / total_pixels) * 100, 2)
|
||||||
|
|
||||||
# Classify gap severity
|
# Classify gap severity
|
||||||
gap_level <- dplyr::case_when(
|
gap_level <- dplyr::case_when(
|
||||||
gap_score < 10 ~ "Minimal",
|
gap_score < 10 ~ "Minimal",
|
||||||
gap_score < 25 ~ "Moderate",
|
gap_score < 25 ~ "Moderate",
|
||||||
TRUE ~ "Significant"
|
gap_score >= 25 ~ "Significant",
|
||||||
|
TRUE ~ NA_character_
|
||||||
)
|
)
|
||||||
|
|
||||||
field_results <- rbind(field_results, data.frame(
|
field_results <- rbind(field_results, data.frame(
|
||||||
|
|
@ -412,7 +428,6 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
||||||
outlier_threshold = outlier_threshold
|
outlier_threshold = outlier_threshold
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
# Not enough valid data, fill with NA row
|
|
||||||
field_results <- rbind(field_results, data.frame(
|
field_results <- rbind(field_results, data.frame(
|
||||||
field = field_name,
|
field = field_name,
|
||||||
sub_field = sub_field_name,
|
sub_field = sub_field_name,
|
||||||
|
|
@ -423,9 +438,99 @@ calculate_gap_filling_kpi <- function(ci_raster, field_boundaries) {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Summarize results
|
||||||
|
gap_summary <- field_results %>%
|
||||||
|
dplyr::group_by(gap_level) %>%
|
||||||
|
dplyr::summarise(field_count = n(), .groups = 'drop') %>%
|
||||||
|
dplyr::mutate(percent = round((field_count / sum(field_count)) * 100, 1))
|
||||||
|
|
||||||
|
return(list(summary = gap_summary, field_results = field_results))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' Calculate gap filling scores for all per-field mosaics (wrapper)
|
||||||
|
#'
|
||||||
|
#' This wrapper handles per-field mosaic structure by iterating over
|
||||||
|
#' individual field files and calling the basic KPI function
|
||||||
|
#'
|
||||||
|
#' @param per_field_files Character vector of paths to per-field mosaic TIFFs
|
||||||
|
#' @param field_boundaries_sf sf object with field geometries
|
||||||
|
#' @return data.frame with Field_id and gap_score columns
|
||||||
|
calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
|
||||||
|
message("\nCalculating gap filling scores (2σ method)...")
|
||||||
|
message(paste(" Using per-field mosaics for", length(per_field_files), "fields"))
|
||||||
|
|
||||||
|
field_boundaries_by_id <- split(field_boundaries_sf, field_boundaries_sf$field)
|
||||||
|
|
||||||
|
process_gap_for_field <- function(field_file) {
|
||||||
|
field_id <- basename(dirname(field_file))
|
||||||
|
field_bounds <- field_boundaries_by_id[[field_id]]
|
||||||
|
|
||||||
|
if (is.null(field_bounds) || nrow(field_bounds) == 0) {
|
||||||
|
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
||||||
|
}
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
field_raster <- terra::rast(field_file)
|
||||||
|
ci_band_name <- "CI"
|
||||||
|
if (!(ci_band_name %in% names(field_raster))) {
|
||||||
|
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
||||||
|
}
|
||||||
|
field_ci_band <- field_raster[[ci_band_name]]
|
||||||
|
names(field_ci_band) <- "CI"
|
||||||
|
|
||||||
|
gap_result <- calculate_gap_filling_kpi(field_ci_band, field_bounds)
|
||||||
|
|
||||||
|
if (is.null(gap_result) || is.null(gap_result$field_results) || nrow(gap_result$field_results) == 0) {
|
||||||
|
return(data.frame(Field_id = field_id, gap_score = NA_real_))
|
||||||
|
}
|
||||||
|
|
||||||
|
gap_scores <- gap_result$field_results
|
||||||
|
gap_scores$Field_id <- gap_scores$field
|
||||||
|
gap_scores <- gap_scores[, c("Field_id", "gap_score")]
|
||||||
|
|
||||||
|
stats::aggregate(gap_score ~ Field_id, data = gap_scores, FUN = function(x) mean(x, na.rm = TRUE))
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste(" WARNING: Gap score failed for field", field_id, ":", e$message))
|
||||||
|
data.frame(Field_id = field_id, gap_score = NA_real_)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process fields sequentially with progress bar
|
||||||
|
message(" Processing gap scores for ", length(per_field_files), " fields...")
|
||||||
|
pb <- utils::txtProgressBar(min = 0, max = length(per_field_files), style = 3, width = 50)
|
||||||
|
|
||||||
|
results_list <- lapply(seq_along(per_field_files), function(idx) {
|
||||||
|
result <- process_gap_for_field(per_field_files[[idx]])
|
||||||
|
utils::setTxtProgressBar(pb, idx)
|
||||||
|
result
|
||||||
|
})
|
||||||
|
close(pb)
|
||||||
|
|
||||||
|
gap_scores_df <- dplyr::bind_rows(results_list)
|
||||||
|
|
||||||
|
if (!is.null(gap_scores_df) && nrow(gap_scores_df) > 0) {
|
||||||
|
gap_scores_df <- gap_scores_df %>%
|
||||||
|
dplyr::group_by(Field_id) %>%
|
||||||
|
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
|
||||||
|
|
||||||
|
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
|
||||||
|
|
||||||
|
# Guard against all-NA values which would produce Inf/-Inf warnings
|
||||||
|
if (any(is.finite(gap_scores_df$gap_score))) {
|
||||||
|
min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2)
|
||||||
|
max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2)
|
||||||
|
message(paste(" Gap score range:", min_score, "-", max_score, "%"))
|
||||||
|
} else {
|
||||||
|
message(" Gap score range: All values are NA (no valid gap scores)")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
message(" WARNING: No gap scores calculated from per-field mosaics")
|
||||||
|
gap_scores_df <- NULL
|
||||||
|
}
|
||||||
|
|
||||||
|
return(gap_scores_df)
|
||||||
|
}
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# HELPER FUNCTIONS
|
# HELPER FUNCTIONS
|
||||||
|
|
@ -517,6 +622,88 @@ extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# DATA LOADING HELPERS
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' Load and validate harvest data from harvest.xlsx
|
||||||
|
#'
|
||||||
|
#' Encapsulates harvest data loading with validation, type coercion, and error handling.
|
||||||
|
#' Returns a data frame with required columns (field, year, tonnage_ha) or an empty
|
||||||
|
#' data frame with proper structure if loading fails.
|
||||||
|
#'
|
||||||
|
#' @param data_dir Path to data directory containing harvest.xlsx
|
||||||
|
#'
|
||||||
|
#' @return data.frame with columns: field (character), year (numeric), tonnage_ha (numeric)
|
||||||
|
#' - On success: data frame with N rows of harvest records
|
||||||
|
#' - On failure: empty data frame with correct structure (0 rows, 3 columns)
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' **File Location**: Expected at `file.path(data_dir, "harvest.xlsx")`
|
||||||
|
#'
|
||||||
|
#' **Validation**:
|
||||||
|
#' - Checks file existence before reading
|
||||||
|
#' - Validates required columns: field, year, tonnage_ha
|
||||||
|
#' - Coerces year and tonnage_ha to numeric
|
||||||
|
#' - Logs status messages for debugging
|
||||||
|
#'
|
||||||
|
#' **Error Handling**:
|
||||||
|
#' - Missing file: Returns empty DF (logs NOTE)
|
||||||
|
#' - Missing columns: Returns empty DF (logs WARNING)
|
||||||
|
#' - Read errors: Returns empty DF (logs WARNING)
|
||||||
|
#' - Always returns valid data frame structure (won't return NULL)
|
||||||
|
#'
|
||||||
|
#' **Usage**:
|
||||||
|
#' ```r
|
||||||
|
#' harvesting_data <- load_harvest_data(setup$data_dir)
|
||||||
|
#' # harvesting_data is guaranteed to be a data.frame with 3 columns
|
||||||
|
#' # even if harvest.xlsx is unavailable or invalid
|
||||||
|
#' ```
|
||||||
|
load_harvest_data <- function(data_dir) {
|
||||||
|
harvesting_data <- NULL
|
||||||
|
harvest_file <- file.path(data_dir, "harvest.xlsx")
|
||||||
|
|
||||||
|
if (file.exists(harvest_file)) {
|
||||||
|
tryCatch({
|
||||||
|
harvesting_data <- readxl::read_excel(harvest_file)
|
||||||
|
|
||||||
|
# Ensure required columns are present
|
||||||
|
required_cols <- c("field", "year", "tonnage_ha")
|
||||||
|
if (all(required_cols %in% names(harvesting_data))) {
|
||||||
|
# Convert to data frame and ensure column types
|
||||||
|
harvesting_data <- as.data.frame(harvesting_data)
|
||||||
|
# CRITICAL: Coerce field to character to preserve leading zeros (e.g., "01", "02")
|
||||||
|
harvesting_data$field <- as.character(harvesting_data$field)
|
||||||
|
harvesting_data$year <- as.numeric(harvesting_data$year)
|
||||||
|
harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha)
|
||||||
|
|
||||||
|
message(paste(" ✓ Loaded harvest data:", nrow(harvesting_data), "records from harvest.xlsx"))
|
||||||
|
return(harvesting_data)
|
||||||
|
} else {
|
||||||
|
message(paste(" WARNING: harvest.xlsx missing required columns. Expected: field, year, tonnage_ha"))
|
||||||
|
harvesting_data <- NULL
|
||||||
|
}
|
||||||
|
}, error = function(e) {
|
||||||
|
message(paste(" WARNING: Could not read harvest.xlsx:", e$message))
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
message(paste(" NOTE: harvest.xlsx not found at", harvest_file))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fallback: create empty data frame if loading failed
|
||||||
|
if (is.null(harvesting_data)) {
|
||||||
|
message(" WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)")
|
||||||
|
harvesting_data <- data.frame(
|
||||||
|
field = character(), # Explicitly character to preserve leading zeros when data is added
|
||||||
|
year = numeric(),
|
||||||
|
tonnage_ha = numeric(),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(harvesting_data)
|
||||||
|
}
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# FIELD STATISTICS EXTRACTION
|
# FIELD STATISTICS EXTRACTION
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -678,13 +865,13 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
|
||||||
NULL
|
NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
output_subdir <- file.path(reports_dir, "field_analysis")
|
output_dir <- file.path(reports_dir)
|
||||||
if (!dir.exists(output_subdir)) {
|
if (!dir.exists(output_dir)) {
|
||||||
dir.create(output_subdir, recursive = TRUE)
|
dir.create(output_dir, recursive = TRUE)
|
||||||
}
|
}
|
||||||
|
|
||||||
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
|
excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".xlsx")
|
||||||
excel_path <- file.path(output_subdir, excel_filename)
|
excel_path <- file.path(output_dir, excel_filename)
|
||||||
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
|
excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)
|
||||||
|
|
||||||
# Build sheets list dynamically
|
# Build sheets list dynamically
|
||||||
|
|
@ -709,14 +896,14 @@ export_field_analysis_excel <- function(field_df, summary_df, project_dir, curre
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d_%d", current_week, year), ".rds")
|
rds_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".rds")
|
||||||
rds_path <- file.path(reports_dir, rds_filename)
|
rds_path <- file.path(output_dir, rds_filename)
|
||||||
|
|
||||||
saveRDS(kpi_data, rds_path)
|
saveRDS(kpi_data, rds_path)
|
||||||
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
message(paste("✓ Field analysis RDS exported to:", rds_path))
|
||||||
|
|
||||||
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
|
csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", current_week, year), ".csv")
|
||||||
csv_path <- file.path(output_subdir, csv_filename)
|
csv_path <- file.path(output_dir, csv_filename)
|
||||||
write_csv(field_df_rounded, csv_path)
|
write_csv(field_df_rounded, csv_path)
|
||||||
message(paste("✓ Field analysis CSV exported to:", csv_path))
|
message(paste("✓ Field analysis CSV exported to:", csv_path))
|
||||||
|
|
||||||
|
|
@ -1304,7 +1491,7 @@ prepare_predictions <- function(predictions, newdata) {
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
sub_field = newdata$sub_field,
|
sub_field = newdata$sub_field,
|
||||||
field = newdata$field,
|
field = newdata$field,
|
||||||
Age_days = newdata$DOY,
|
Age_days = newdata$DAH,
|
||||||
total_CI = round(newdata$cumulative_CI, 0),
|
total_CI = round(newdata$cumulative_CI, 0),
|
||||||
predicted_Tcha = round(predicted_Tcha, 0),
|
predicted_Tcha = round(predicted_Tcha, 0),
|
||||||
season = newdata$season
|
season = newdata$season
|
||||||
|
|
@ -1313,3 +1500,258 @@ prepare_predictions <- function(predictions, newdata) {
|
||||||
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
|
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# YIELD PREDICTION KPI (SHARED ML-BASED MODEL FOR ALL CLIENT TYPES)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
#' Helper function for graceful fallback when training data unavailable
|
||||||
|
#'
|
||||||
|
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||||||
|
#' @return List with summary and field_results (both with NA values)
|
||||||
|
create_fallback_result <- function(field_boundaries) {
|
||||||
|
# Convert to SpatVector if needed (for terra::project)
|
||||||
|
if (!inherits(field_boundaries, "SpatVector")) {
|
||||||
|
field_boundaries <- terra::vect(field_boundaries)
|
||||||
|
}
|
||||||
|
field_boundaries_projected <- terra::project(field_boundaries, "EPSG:6933") # Equal Earth projection
|
||||||
|
field_areas <- terra::expanse(field_boundaries_projected) / 10000 # Convert m² to hectares
|
||||||
|
total_area <- sum(field_areas)
|
||||||
|
|
||||||
|
summary_result <- data.frame(
|
||||||
|
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
|
||||||
|
count = c(0, 0, 0, nrow(field_boundaries)),
|
||||||
|
value = c(NA_real_, NA_real_, NA_real_, round(total_area, 1)),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
field_results <- data.frame(
|
||||||
|
field = character(0),
|
||||||
|
sub_field = character(0),
|
||||||
|
Age_days = numeric(0),
|
||||||
|
yield_forecast_t_ha = numeric(0),
|
||||||
|
season = numeric(0),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
return(list(summary = summary_result, field_results = field_results))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Calculate yield prediction KPI using Random Forest with Feature Selection
|
||||||
|
#'
|
||||||
|
#' Trains a Random Forest model on historical harvest data with cumulative CI,
|
||||||
|
#' days after harvest (DAH), and CI-per-day as predictors. Uses CAST::ffs() for
|
||||||
|
#' Forward Feature Selection. Predicts yields for mature fields (DAH >= DAH_MATURITY_THRESHOLD).
|
||||||
|
#'
|
||||||
|
#' @param field_boundaries Field boundaries (sf or SpatVector)
|
||||||
|
#' @param harvesting_data Data frame with harvest data including tonnage_ha column
|
||||||
|
#' @param cumulative_CI_vals_dir Directory containing "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||||
|
#'
|
||||||
|
#' @return List with:
|
||||||
|
#' - summary: Data frame with field_groups, count, value (quartiles and total area)
|
||||||
|
#' - field_results: Data frame with field-level yield forecasts (yield_forecast_t_ha)
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' **Training Data Requirements:**
|
||||||
|
#' - cumulative_CI_vals_dir must contain "All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||||
|
#' - harvesting_data must have tonnage_ha column with numeric yield values
|
||||||
|
#' - Training stops gracefully if either is missing (returns NA values, not fake numbers)
|
||||||
|
#'
|
||||||
|
#' **Model Specifications:**
|
||||||
|
#' - Algorithm: Random Forest (caret + CAST)
|
||||||
|
#' - Feature Selection: Forward Feature Selection (CAST::ffs)
|
||||||
|
#' - Cross-validation: 5-fold CV
|
||||||
|
#' - Predictors: cumulative_CI, DAH, CI_per_day
|
||||||
|
#' - Mature field threshold: DAH >= DAH_MATURITY_THRESHOLD (8 months, ~240 days)
|
||||||
|
#' - Output: Field-level yield forecasts grouped by quartile
|
||||||
|
#'
|
||||||
|
#' **Error Handling:**
|
||||||
|
#' - Missing tonnage_ha: Returns graceful fallback with NA (not zero) values
|
||||||
|
#' - No training data: Logs WARNING, returns empty field_results
|
||||||
|
#' - RDS file missing: Returns graceful fallback
|
||||||
|
#' - Prediction errors: Wrapped in tryCatch, returns fallback on failure
|
||||||
|
calculate_yield_prediction_kpi <- function(field_boundaries, harvesting_data, cumulative_CI_vals_dir) {
|
||||||
|
safe_log("Calculating yield prediction KPI using Random Forest with Feature Selection")
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Check if tonnage_ha column is present and has valid data
|
||||||
|
if (is.null(harvesting_data) ||
|
||||||
|
!("tonnage_ha" %in% names(harvesting_data)) ||
|
||||||
|
all(is.na(harvesting_data$tonnage_ha))) {
|
||||||
|
safe_log("No harvest data available: lacking tonnage_ha column or all values are NA", "WARNING")
|
||||||
|
return(create_fallback_result(field_boundaries))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if CI quadrant RDS file exists
|
||||||
|
ci_quadrant_path <- file.path(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
|
||||||
|
if (!file.exists(ci_quadrant_path)) {
|
||||||
|
safe_log(paste("CI quadrant file not found at:", ci_quadrant_path), "WARNING")
|
||||||
|
return(create_fallback_result(field_boundaries))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load CI quadrant data and fill missing field/sub_field values
|
||||||
|
CI_quadrant <- readRDS(ci_quadrant_path) %>%
|
||||||
|
dplyr::group_by(model) %>%
|
||||||
|
tidyr::fill(field, sub_field, .direction = "downup") %>%
|
||||||
|
dplyr::ungroup()
|
||||||
|
|
||||||
|
# Rename year column to season for consistency
|
||||||
|
harvesting_data_renamed <- harvesting_data %>% dplyr::rename(season = year)
|
||||||
|
|
||||||
|
# Join CI and yield data
|
||||||
|
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data_renamed,
|
||||||
|
by = c("field", "sub_field", "season")) %>%
|
||||||
|
dplyr::group_by(sub_field, season) %>%
|
||||||
|
dplyr::slice(which.max(DAH)) %>%
|
||||||
|
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DAH, season, sub_area) %>%
|
||||||
|
dplyr::mutate(CI_per_day = ifelse(DAH > 0, cumulative_CI / DAH, NA_real_))
|
||||||
|
|
||||||
|
|
||||||
|
# Define predictors and response variables
|
||||||
|
predictors <- c("cumulative_CI", "DAH", "CI_per_day")
|
||||||
|
response <- "tonnage_ha"
|
||||||
|
|
||||||
|
# Prepare training dataset (fields with harvest data)
|
||||||
|
CI_and_yield_test <- CI_and_yield %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
dplyr::filter(!is.na(tonnage_ha))
|
||||||
|
|
||||||
|
# Check if we have minimum training data
|
||||||
|
if (nrow(CI_and_yield_test) == 0) {
|
||||||
|
safe_log("No training data available: no fields with tonnage_ha observations", "WARNING")
|
||||||
|
return(create_fallback_result(field_boundaries))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pre-clean training data: remove rows with any NAs in predictors or response
|
||||||
|
# This is required because CAST::ffs doesn't support na.rm parameter
|
||||||
|
CI_and_yield_test <- CI_and_yield_test %>%
|
||||||
|
dplyr::filter(!dplyr::if_any(dplyr::all_of(c(predictors, response)), is.na))
|
||||||
|
|
||||||
|
if (nrow(CI_and_yield_test) == 0) {
|
||||||
|
safe_log("No complete training data after removing NAs in predictors/response", "WARNING")
|
||||||
|
return(create_fallback_result(field_boundaries))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prepare prediction dataset (fields without harvest data, mature fields only)
|
||||||
|
prediction_yields <- CI_and_yield %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
dplyr::filter(is.na(tonnage_ha) & DAH >= DAH_MATURITY_THRESHOLD) # Mature fields only
|
||||||
|
|
||||||
|
# Configure model training parameters
|
||||||
|
ctrl <- caret::trainControl(
|
||||||
|
method = "cv",
|
||||||
|
savePredictions = TRUE,
|
||||||
|
allowParallel = TRUE,
|
||||||
|
number = 5,
|
||||||
|
verboseIter = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Train the model with forward feature selection
|
||||||
|
set.seed(202) # For reproducibility
|
||||||
|
safe_log("Training Random Forest model with Forward Feature Selection...")
|
||||||
|
model_ffs_rf <- CAST::ffs(
|
||||||
|
CI_and_yield_test[, predictors],
|
||||||
|
CI_and_yield_test[, response],
|
||||||
|
method = "rf",
|
||||||
|
trControl = ctrl,
|
||||||
|
importance = TRUE,
|
||||||
|
withinSE = TRUE,
|
||||||
|
tuneLength = 5
|
||||||
|
)
|
||||||
|
|
||||||
|
# Predict yields on validation data (same as training data for RMSE calculation)
|
||||||
|
pred_ffs_rf <- prepare_predictions(
|
||||||
|
stats::predict(model_ffs_rf, newdata = CI_and_yield_test),
|
||||||
|
CI_and_yield_test
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract cross-validated RMSE from the model object (more honest than in-sample RMSE)
|
||||||
|
# The CAST::ffs model stores CV results in $results data frame
|
||||||
|
# We extract the RMSE from the best model (lowest RMSE across folds)
|
||||||
|
if (!is.null(model_ffs_rf$results) && "RMSE" %in% names(model_ffs_rf$results)) {
|
||||||
|
# Get minimum RMSE from cross-validation results (best model from feature selection)
|
||||||
|
rmse_value <- min(model_ffs_rf$results$RMSE, na.rm = TRUE)
|
||||||
|
safe_log(paste("Yield prediction RMSE (cross-validated):", round(rmse_value, 2), "t/ha"))
|
||||||
|
} else {
|
||||||
|
# Fallback: compute in-sample RMSE if CV results unavailable, but label it clearly
|
||||||
|
rmse_value <- sqrt(mean((pred_ffs_rf$predicted_Tcha - CI_and_yield_test$tonnage_ha)^2, na.rm = TRUE))
|
||||||
|
safe_log(paste("Yield prediction RMSE (in-sample/training):", round(rmse_value, 2), "t/ha"))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predict yields for current season (mature fields >= DAH_MATURITY_THRESHOLD days)
|
||||||
|
if (nrow(prediction_yields) > 0) {
|
||||||
|
pred_rf_current_season <- prepare_predictions(
|
||||||
|
stats::predict(model_ffs_rf, newdata = prediction_yields),
|
||||||
|
prediction_yields
|
||||||
|
) %>%
|
||||||
|
dplyr::filter(Age_days >= DAH_MATURITY_THRESHOLD) %>%
|
||||||
|
dplyr::select(c("field", "Age_days", "predicted_Tcha", "season"))
|
||||||
|
} else {
|
||||||
|
pred_rf_current_season <- data.frame()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate summary statistics for KPI
|
||||||
|
if (nrow(pred_rf_current_season) > 0) {
|
||||||
|
safe_log(paste("Number of fields with yield predictions:", nrow(pred_rf_current_season)))
|
||||||
|
safe_log(paste("Predicted yield range:",
|
||||||
|
round(min(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
|
||||||
|
"-",
|
||||||
|
round(max(pred_rf_current_season$predicted_Tcha, na.rm = TRUE), 1),
|
||||||
|
"t/ha"))
|
||||||
|
|
||||||
|
# Calculate quartiles for grouping
|
||||||
|
yield_quartiles <- quantile(pred_rf_current_season$predicted_Tcha,
|
||||||
|
probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
|
||||||
|
|
||||||
|
# Count fields in each quartile
|
||||||
|
top_25_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[3], na.rm = TRUE)
|
||||||
|
average_count <- sum(pred_rf_current_season$predicted_Tcha >= yield_quartiles[1] &
|
||||||
|
pred_rf_current_season$predicted_Tcha < yield_quartiles[3], na.rm = TRUE)
|
||||||
|
lowest_25_count <- sum(pred_rf_current_season$predicted_Tcha < yield_quartiles[1], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Calculate total area
|
||||||
|
if (!inherits(field_boundaries, "SpatVector")) {
|
||||||
|
field_boundaries_vect <- terra::vect(field_boundaries)
|
||||||
|
} else {
|
||||||
|
field_boundaries_vect <- field_boundaries
|
||||||
|
}
|
||||||
|
|
||||||
|
# Handle both sf and SpatVector inputs for area calculation
|
||||||
|
if (inherits(field_boundaries, "sf")) {
|
||||||
|
field_boundaries_projected <- sf::st_transform(field_boundaries, "EPSG:6933")
|
||||||
|
field_areas <- sf::st_area(field_boundaries_projected) / 10000 # m² to hectares
|
||||||
|
} else {
|
||||||
|
field_boundaries_projected <- terra::project(field_boundaries_vect, "EPSG:6933")
|
||||||
|
field_areas <- terra::expanse(field_boundaries_projected) / 10000
|
||||||
|
}
|
||||||
|
total_area <- sum(as.numeric(field_areas))
|
||||||
|
|
||||||
|
safe_log(paste("Total area:", round(total_area, 1), "hectares"))
|
||||||
|
|
||||||
|
# Build summary result
|
||||||
|
result <- data.frame(
|
||||||
|
field_groups = c("Top 25%", "Average", "Lowest 25%", "Total area forecasted"),
|
||||||
|
count = c(top_25_count, average_count, lowest_25_count, nrow(field_boundaries)),
|
||||||
|
value = c(round(yield_quartiles[3], 1), round(yield_quartiles[2], 1),
|
||||||
|
round(yield_quartiles[1], 1), round(total_area, 1)),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare field-level results
|
||||||
|
field_level_results <- pred_rf_current_season %>%
|
||||||
|
dplyr::select(field, Age_days, predicted_Tcha, season) %>%
|
||||||
|
dplyr::rename(yield_forecast_t_ha = predicted_Tcha)
|
||||||
|
|
||||||
|
safe_log("✓ Yield prediction complete")
|
||||||
|
return(list(summary = result, field_results = field_level_results))
|
||||||
|
} else {
|
||||||
|
safe_log(paste("No fields meet maturity threshold (DAH >=", DAH_MATURITY_THRESHOLD, ") for prediction"), "WARNING")
|
||||||
|
return(list(summary = create_fallback_result(field_boundaries)$summary,
|
||||||
|
field_results = data.frame()))
|
||||||
|
}
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
safe_log(paste("Error in yield prediction:", e$message), "ERROR")
|
||||||
|
return(create_fallback_result(field_boundaries))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -2,7 +2,7 @@
|
||||||
params:
|
params:
|
||||||
ref: "word-styles-reference-var1.docx"
|
ref: "word-styles-reference-var1.docx"
|
||||||
output_file: CI_report.docx
|
output_file: CI_report.docx
|
||||||
report_date: "2025-09-30"
|
report_date: "2026-02-04"
|
||||||
data_dir: "angata"
|
data_dir: "angata"
|
||||||
mail_day: "Wednesday"
|
mail_day: "Wednesday"
|
||||||
borders: FALSE
|
borders: FALSE
|
||||||
|
|
@ -61,6 +61,10 @@ suppressPackageStartupMessages({
|
||||||
library(flextable) # For formatted tables in Word output (professional table styling)
|
library(flextable) # For formatted tables in Word output (professional table styling)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Configure tmap for static plotting (required for legend.outside to work)
|
||||||
|
tmap_mode("plot") # CRITICAL: Must be "plot" mode for legends outside to render properly
|
||||||
|
tmap_options(component.autoscale = FALSE)
|
||||||
|
|
||||||
# Load custom utility functions
|
# Load custom utility functions
|
||||||
tryCatch({
|
tryCatch({
|
||||||
source("r_app/report_utils.R")
|
source("r_app/report_utils.R")
|
||||||
|
|
@ -140,12 +144,12 @@ week_suffix <- paste0("week", sprintf("%02d", current_week), "_", current_year)
|
||||||
|
|
||||||
# Candidate filenames we expect (exact and common variants)
|
# Candidate filenames we expect (exact and common variants)
|
||||||
expected_summary_names <- c(
|
expected_summary_names <- c(
|
||||||
paste0(project_dir, "_kpi_summary_tables_", week_suffix, ".rds"),
|
paste0(project_dir, "_field_analysis_", week_suffix, ".rds"),
|
||||||
paste0(project_dir, "_kpi_summary_tables_", date_suffix, ".rds"),
|
paste0(project_dir, "_field_analysis_", date_suffix, ".rds"),
|
||||||
paste0(project_dir, "_kpi_summary_tables.rds"),
|
paste0(project_dir, "_field_analysis.rds"),
|
||||||
"kpi_summary_tables.rds",
|
"field_analysis.rds",
|
||||||
paste0("kpi_summary_tables_", week_suffix, ".rds"),
|
paste0("field_analysis_", week_suffix, ".rds"),
|
||||||
paste0("kpi_summary_tables_", date_suffix, ".rds")
|
paste0("field_analysis_", date_suffix, ".rds")
|
||||||
)
|
)
|
||||||
|
|
||||||
expected_field_details_names <- c(
|
expected_field_details_names <- c(
|
||||||
|
|
@ -165,13 +169,26 @@ try_load_from_dir <- function(dir, candidates) {
|
||||||
return(NULL)
|
return(NULL)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Try primary directory first
|
# Try primary directory first (field_level/)
|
||||||
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
|
summary_file <- try_load_from_dir(kpi_data_dir, expected_summary_names)
|
||||||
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
|
field_details_file <- try_load_from_dir(kpi_data_dir, expected_field_details_names)
|
||||||
|
|
||||||
# If not found, perform a workspace-wide search (slower) limited to laravel_app storage
|
# If not found, try parent directory (kpis/) where RDS is often saved by Script 80
|
||||||
if (is.null(summary_file) || is.null(field_details_file)) {
|
if (is.null(summary_file) || is.null(field_details_file)) {
|
||||||
safe_log(paste("KPI files not found in", kpi_data_dir, "—searching workspace for RDS files"))
|
parent_dir <- dirname(kpi_data_dir) # One level up: reports/kpis/
|
||||||
|
safe_log(paste("KPI files not found in", kpi_data_dir, "—trying parent directory:", parent_dir))
|
||||||
|
|
||||||
|
if (is.null(summary_file)) {
|
||||||
|
summary_file <- try_load_from_dir(parent_dir, expected_summary_names)
|
||||||
|
}
|
||||||
|
if (is.null(field_details_file)) {
|
||||||
|
field_details_file <- try_load_from_dir(parent_dir, expected_field_details_names)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# If still not found, perform a workspace-wide search (slower) limited to laravel_app storage
|
||||||
|
if (is.null(summary_file) || is.null(field_details_file)) {
|
||||||
|
safe_log(paste("KPI files not found in", kpi_data_dir, "or parent directory—searching workspace for RDS files"))
|
||||||
# List rds files under laravel_app/storage/app recursively
|
# List rds files under laravel_app/storage/app recursively
|
||||||
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
|
files <- list.files(path = file.path("laravel_app", "storage", "app"), pattern = "\\.rds$", recursive = TRUE, full.names = TRUE)
|
||||||
# Try to match by expected names
|
# Try to match by expected names
|
||||||
|
|
@ -483,7 +500,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
|
|
||||||
## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready
|
## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready
|
||||||
|
|
||||||
```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
|
```{r overview_map, fig.width=7, fig.height=6, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
|
||||||
# Create a hexbin overview map with ggplot
|
# Create a hexbin overview map with ggplot
|
||||||
tryCatch({
|
tryCatch({
|
||||||
# Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
|
# Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
|
||||||
|
|
@ -510,6 +527,8 @@ tryCatch({
|
||||||
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
|
TARGET_CRS <- 4326 # WGS84 for web basemap compatibility (was 32736 UTM)
|
||||||
|
|
||||||
# Process polygons into points
|
# Process polygons into points
|
||||||
|
# IMPORTANT: Calculate centroids in projected CRS (UTM 36S for southern Africa) to avoid
|
||||||
|
# st_centroid warnings about longitude/latitude data, then transform back to WGS84
|
||||||
points_processed <- field_boundaries_sf %>%
|
points_processed <- field_boundaries_sf %>%
|
||||||
st_make_valid() %>%
|
st_make_valid() %>%
|
||||||
mutate(
|
mutate(
|
||||||
|
|
@ -525,8 +544,9 @@ tryCatch({
|
||||||
analysis_data %>% select(Field_id, Status_trigger),
|
analysis_data %>% select(Field_id, Status_trigger),
|
||||||
by = c("field" = "Field_id")
|
by = c("field" = "Field_id")
|
||||||
) %>%
|
) %>%
|
||||||
st_transform(crs = TARGET_CRS) %>%
|
st_transform(crs = 32736) %>% # UTM zone 36S (southern Africa)
|
||||||
st_centroid() %>%
|
st_centroid() %>%
|
||||||
|
st_transform(crs = TARGET_CRS) %>%
|
||||||
bind_cols(st_coordinates(.))
|
bind_cols(st_coordinates(.))
|
||||||
|
|
||||||
# Validate coordinates - check for NaN, Inf, or missing values
|
# Validate coordinates - check for NaN, Inf, or missing values
|
||||||
|
|
@ -553,30 +573,8 @@ tryCatch({
|
||||||
labels_vec[length(labels_vec)] <- ">30"
|
labels_vec[length(labels_vec)] <- ">30"
|
||||||
labels_vec[1] <- "0.1"
|
labels_vec[1] <- "0.1"
|
||||||
|
|
||||||
# Create dummy point to anchor hexbin grids for consistency
|
|
||||||
dummy_point <- data.frame(
|
|
||||||
field = NA,
|
|
||||||
sub_field = NA,
|
|
||||||
area_ac = 0,
|
|
||||||
Status_trigger = NA,
|
|
||||||
X = min(points_processed$X, na.rm = TRUE),
|
|
||||||
Y = min(points_processed$Y, na.rm = TRUE),
|
|
||||||
geometry = NA
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert dummy point to sf and add xy coordinates
|
|
||||||
dummy_point <- st_as_sf(dummy_point, coords = c("X", "Y"), crs = st_crs(points_ready))
|
|
||||||
dummy_point <- cbind(dummy_point, st_coordinates(dummy_point))
|
|
||||||
|
|
||||||
# Mark dummy point with anchor flag before binding
|
|
||||||
# Referenced: dummy_point, st_as_sf, st_coordinates, area_ac
|
|
||||||
dummy_point$anchor_dummy <- TRUE
|
|
||||||
|
|
||||||
# Add dummy point to ensure consistent hexbin grid anchoring
|
|
||||||
points_ready <- rbind(points_ready, dummy_point)
|
|
||||||
points_not_ready <- rbind(points_not_ready, dummy_point)
|
|
||||||
|
|
||||||
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
|
# Calculate data bounds for coordinate limits (prevents basemap scale conflicts)
|
||||||
|
# Use actual data bounds without dummy points to avoid column mismatch
|
||||||
x_limits <- c(
|
x_limits <- c(
|
||||||
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
|
floor(min(points_processed$X, na.rm = TRUE) * 20) / 20, # Round down to avoid edge clipping
|
||||||
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
|
ceiling(max(points_processed$X, na.rm = TRUE) * 20) / 20 # Round up for padding
|
||||||
|
|
@ -653,22 +651,17 @@ tryCatch({
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove dummy point rows after grid anchoring to prevent dummy cells in plot
|
|
||||||
# Referenced: points_ready, points_not_ready, anchor_dummy flag filtering
|
|
||||||
points_ready <- points_ready %>% filter(!anchor_dummy, na.rm = TRUE)
|
|
||||||
points_not_ready <- points_not_ready %>% filter(!anchor_dummy, na.rm = TRUE)
|
|
||||||
|
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
warning("Error creating hexbin map:", e$message)
|
warning("Error creating hexbin map:", e$message)
|
||||||
})
|
})
|
||||||
```
|
```
|
||||||
|
|
||||||
\newpage
|
|
||||||
## 1.2 Key Performance Indicators
|
## 1.2 Key Performance Indicators
|
||||||
|
|
||||||
```{r combined_kpi_table, echo=FALSE, results='asis'}
|
```{r combined_kpi_table, echo=FALSE, results='asis'}
|
||||||
# Create summary KPI table from field_analysis_summary data
|
# Create consolidated KPI table from field_analysis data
|
||||||
# This shows: Phases, Triggers, Area Change, and Total Farm acreage
|
# Shows: Phases, Triggers, Area Change, Cloud Influence, and Total Farm
|
||||||
|
|
||||||
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
||||||
# Load field analysis data
|
# Load field analysis data
|
||||||
|
|
@ -679,24 +672,46 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
!is.data.frame(summary_data$field_analysis_summary)) {
|
!is.data.frame(summary_data$field_analysis_summary)) {
|
||||||
|
|
||||||
# Create summary by aggregating by Status_Alert and Phase categories
|
# Create summary by aggregating by Status_Alert and Phase categories
|
||||||
# This groups fields by their phase and status to show distribution
|
|
||||||
phase_summary <- field_analysis_df %>%
|
phase_summary <- field_analysis_df %>%
|
||||||
filter(!is.na(Phase)) %>%
|
filter(!is.na(Phase)) %>%
|
||||||
group_by(Phase) %>%
|
group_by(Phase) %>%
|
||||||
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
|
summarise(
|
||||||
|
Acreage = sum(Acreage, na.rm = TRUE),
|
||||||
|
Field_count = n_distinct(Field_id),
|
||||||
|
.groups = "drop"
|
||||||
|
) %>%
|
||||||
mutate(Category = Phase) %>%
|
mutate(Category = Phase) %>%
|
||||||
select(Category, Acreage)
|
select(Category, Acreage, Field_count)
|
||||||
|
|
||||||
# Try to create Status trigger summary - use Status_Alert if available, otherwise use empty
|
# Create Status trigger summary - includes both active alerts and "No active triggers"
|
||||||
trigger_summary <- tryCatch({
|
trigger_summary <- tryCatch({
|
||||||
field_analysis_df %>%
|
# Active alerts (fields with non-NA Status_Alert)
|
||||||
|
active_alerts <- field_analysis_df %>%
|
||||||
filter(!is.na(Status_Alert), Status_Alert != "") %>%
|
filter(!is.na(Status_Alert), Status_Alert != "") %>%
|
||||||
group_by(Status_Alert) %>%
|
group_by(Status_Alert) %>%
|
||||||
summarise(Acreage = sum(Acreage, na.rm = TRUE), .groups = "drop") %>%
|
summarise(
|
||||||
|
Acreage = sum(Acreage, na.rm = TRUE),
|
||||||
|
Field_count = n_distinct(Field_id),
|
||||||
|
.groups = "drop"
|
||||||
|
) %>%
|
||||||
mutate(Category = Status_Alert) %>%
|
mutate(Category = Status_Alert) %>%
|
||||||
select(Category, Acreage)
|
select(Category, Acreage, Field_count)
|
||||||
|
|
||||||
|
# No active triggers (fields with NA Status_Alert)
|
||||||
|
no_alerts <- field_analysis_df %>%
|
||||||
|
filter(is.na(Status_Alert) | Status_Alert == "") %>%
|
||||||
|
summarise(
|
||||||
|
Acreage = sum(Acreage, na.rm = TRUE),
|
||||||
|
Field_count = n_distinct(Field_id),
|
||||||
|
.groups = "drop"
|
||||||
|
) %>%
|
||||||
|
mutate(Category = "No active triggers") %>%
|
||||||
|
select(Category, Acreage, Field_count)
|
||||||
|
|
||||||
|
# Combine active alerts and no-alert fields
|
||||||
|
bind_rows(active_alerts, no_alerts)
|
||||||
}, error = function(e) {
|
}, error = function(e) {
|
||||||
data.frame(Category = character(), Acreage = numeric())
|
data.frame(Category = character(), Acreage = numeric(), Field_count = numeric())
|
||||||
})
|
})
|
||||||
|
|
||||||
# Combine into summary
|
# Combine into summary
|
||||||
|
|
@ -709,25 +724,38 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
|
|
||||||
# Phase names and trigger names to extract from summary
|
# Phase names and trigger names to extract from summary
|
||||||
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
|
phase_names <- c("Germination", "Tillering", "Grand Growth", "Maturation", "Unknown Phase")
|
||||||
trigger_names <- c("Harvest Ready", "Strong Recovery", "Growth On Track", "Stress Detected",
|
# Trigger names now include both active alerts AND "No active triggers" (calculated dynamically above)
|
||||||
"Germination Complete", "Germination Started", "No Active Trigger",
|
trigger_names <- c("harvest_ready", "harvested_bare", "stress_detected",
|
||||||
"Ready for harvest-check", "Strong decline in crop health", "Harvested/bare")
|
"germination_delayed", "growth_on_track", "No active triggers")
|
||||||
|
|
||||||
# Extract phase distribution - match on category names directly
|
# Extract phase distribution - match on category names directly
|
||||||
if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
|
if (!is.null(field_analysis_summary) && nrow(field_analysis_summary) > 0) {
|
||||||
|
# Phase rows with field count
|
||||||
phase_rows <- field_analysis_summary %>%
|
phase_rows <- field_analysis_summary %>%
|
||||||
filter(Category %in% phase_names) %>%
|
filter(Category %in% phase_names) %>%
|
||||||
select(Category, Acreage) %>%
|
select(Category, Acreage, Field_count) %>%
|
||||||
mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1)
|
mutate(KPI_Group = "PHASE DISTRIBUTION", .before = 1)
|
||||||
|
|
||||||
# Extract status triggers - match on category names directly
|
# Trigger rows with field count
|
||||||
trigger_rows <- field_analysis_summary %>%
|
trigger_rows <- field_analysis_summary %>%
|
||||||
filter(Category %in% trigger_names) %>%
|
filter(Category %in% trigger_names) %>%
|
||||||
select(Category, Acreage) %>%
|
select(Category, Acreage, Field_count) %>%
|
||||||
mutate(KPI_Group = "STATUS TRIGGERS", .before = 1)
|
mutate(KPI_Group = "OPERATIONAL ALERTS", .before = 1)
|
||||||
|
|
||||||
|
# If no triggers found, add a placeholder row
|
||||||
|
if (nrow(trigger_rows) == 0) {
|
||||||
|
trigger_rows <- data.frame(
|
||||||
|
KPI_Group = "OPERATIONAL ALERTS",
|
||||||
|
Category = "No active triggers",
|
||||||
|
Acreage = 0,
|
||||||
|
Field_count = 0,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
# Calculate area change from field_analysis data
|
# Calculate area change from field_analysis data
|
||||||
total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE)
|
total_acreage <- sum(field_analysis_df$Acreage, na.rm = TRUE)
|
||||||
|
total_fields <- n_distinct(field_analysis_df$Field_id)
|
||||||
|
|
||||||
# Parse Weekly_ci_change to determine improvement/decline
|
# Parse Weekly_ci_change to determine improvement/decline
|
||||||
parse_ci_change <- function(change_str) {
|
parse_ci_change <- function(change_str) {
|
||||||
|
|
@ -741,10 +769,20 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
|
|
||||||
field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change)
|
field_analysis_df$ci_change_numeric <- sapply(field_analysis_df$Weekly_ci_change, parse_ci_change)
|
||||||
|
|
||||||
improving_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric > 0.2], na.rm = TRUE)
|
# Area change rows with field count
|
||||||
declining_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric < -0.2], na.rm = TRUE)
|
improving_df <- field_analysis_df %>%
|
||||||
stable_acreage <- sum(field_analysis_df$Acreage[field_analysis_df$ci_change_numeric >= -0.2 &
|
filter(ci_change_numeric > 0.2)
|
||||||
field_analysis_df$ci_change_numeric <= 0.2], na.rm = TRUE)
|
stable_df <- field_analysis_df %>%
|
||||||
|
filter(ci_change_numeric >= -0.2 & ci_change_numeric <= 0.2)
|
||||||
|
declining_df <- field_analysis_df %>%
|
||||||
|
filter(ci_change_numeric < -0.2)
|
||||||
|
|
||||||
|
improving_acreage <- sum(improving_df$Acreage, na.rm = TRUE)
|
||||||
|
improving_field_count <- n_distinct(improving_df$Field_id)
|
||||||
|
stable_acreage <- sum(stable_df$Acreage, na.rm = TRUE)
|
||||||
|
stable_field_count <- n_distinct(stable_df$Field_id)
|
||||||
|
declining_acreage <- sum(declining_df$Acreage, na.rm = TRUE)
|
||||||
|
declining_field_count <- n_distinct(declining_df$Field_id)
|
||||||
|
|
||||||
improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0)
|
improving_pct <- ifelse(total_acreage > 0, round(improving_acreage / total_acreage * 100, 1), 0)
|
||||||
declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0)
|
declining_pct <- ifelse(total_acreage > 0, round(declining_acreage / total_acreage * 100, 1), 0)
|
||||||
|
|
@ -761,24 +799,54 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
KPI_Group = "AREA CHANGE",
|
KPI_Group = "AREA CHANGE",
|
||||||
Category = c("Improving", "Stable", "Declining"),
|
Category = c("Improving", "Stable", "Declining"),
|
||||||
Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)),
|
Acreage = c(round(improving_acreage, 2), round(stable_acreage, 2), round(declining_acreage, 2)),
|
||||||
|
Field_count = c(improving_field_count, stable_field_count, declining_field_count),
|
||||||
Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")),
|
Percent = c(paste0(improving_pct, "%"), paste0(stable_pct, "%"), paste0(declining_pct, "%")),
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Cloud influence rows with field count - aggregate by Cloud_category
|
||||||
|
cloud_rows <- tryCatch({
|
||||||
|
field_analysis_df %>%
|
||||||
|
filter(!is.na(Cloud_category)) %>%
|
||||||
|
group_by(Cloud_category) %>%
|
||||||
|
summarise(
|
||||||
|
Acreage = sum(Acreage, na.rm = TRUE),
|
||||||
|
Field_count = n_distinct(Field_id),
|
||||||
|
.groups = "drop"
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
KPI_Group = "CLOUD INFLUENCE",
|
||||||
|
Category = Cloud_category,
|
||||||
|
Percent = paste0(round(Acreage / total_acreage * 100, 1), "%"),
|
||||||
|
Acreage = round(Acreage, 2)
|
||||||
|
) %>%
|
||||||
|
select(KPI_Group, Category, Acreage, Field_count, Percent)
|
||||||
|
}, error = function(e) {
|
||||||
|
data.frame(
|
||||||
|
KPI_Group = character(),
|
||||||
|
Category = character(),
|
||||||
|
Acreage = numeric(),
|
||||||
|
Field_count = numeric(),
|
||||||
|
Percent = character()
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
# Total farm row
|
# Total farm row
|
||||||
total_row <- data.frame(
|
total_row <- data.frame(
|
||||||
KPI_Group = "TOTAL FARM",
|
KPI_Group = "TOTAL FARM",
|
||||||
Category = "Total Acreage",
|
Category = "Total Acreage",
|
||||||
Acreage = round(total_acreage, 2),
|
Acreage = round(total_acreage, 2),
|
||||||
|
Field_count = total_fields,
|
||||||
Percent = "100%",
|
Percent = "100%",
|
||||||
stringsAsFactors = FALSE
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
# Combine all rows with percentages for all
|
# Combine all rows
|
||||||
combined_df <- bind_rows(
|
combined_df <- bind_rows(
|
||||||
phase_pcts,
|
phase_pcts,
|
||||||
trigger_pcts,
|
trigger_pcts,
|
||||||
area_change_rows,
|
area_change_rows,
|
||||||
|
cloud_rows,
|
||||||
total_row
|
total_row
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -789,7 +857,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
KPI_display = if_else(row_number() == 1, KPI_Group, "")
|
KPI_display = if_else(row_number() == 1, KPI_Group, "")
|
||||||
) %>%
|
) %>%
|
||||||
ungroup() %>%
|
ungroup() %>%
|
||||||
select(KPI_display, Category, Acreage, Percent)
|
select(KPI_display, Category, Acreage, Percent, Field_count)
|
||||||
|
|
||||||
# Render as flextable with merged cells
|
# Render as flextable with merged cells
|
||||||
ft <- flextable(combined_df) %>%
|
ft <- flextable(combined_df) %>%
|
||||||
|
|
@ -797,7 +865,8 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
KPI_display = "KPI Category",
|
KPI_display = "KPI Category",
|
||||||
Category = "Item",
|
Category = "Item",
|
||||||
Acreage = "Acreage",
|
Acreage = "Acreage",
|
||||||
Percent = "Percent"
|
Percent = "Percentage of total fields",
|
||||||
|
Field_count = "# Fields"
|
||||||
) %>%
|
) %>%
|
||||||
merge_v(j = "KPI_display") %>%
|
merge_v(j = "KPI_display") %>%
|
||||||
autofit()
|
autofit()
|
||||||
|
|
@ -807,8 +876,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
phase_count <- nrow(phase_rows)
|
phase_count <- nrow(phase_rows)
|
||||||
trigger_count <- nrow(trigger_rows)
|
trigger_count <- nrow(trigger_rows)
|
||||||
area_count <- nrow(area_change_rows)
|
area_count <- nrow(area_change_rows)
|
||||||
|
cloud_count <- nrow(cloud_rows)
|
||||||
|
|
||||||
# Add lines after phases, triggers, and area change groups (before totals)
|
# Add lines after phases, triggers, area change, and cloud groups (before totals)
|
||||||
if (phase_count > 0) {
|
if (phase_count > 0) {
|
||||||
ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1))
|
ft <- ft %>% hline(i = phase_count, border = officer::fp_border(width = 1))
|
||||||
}
|
}
|
||||||
|
|
@ -818,6 +888,9 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
if (area_count > 0) {
|
if (area_count > 0) {
|
||||||
ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1))
|
ft <- ft %>% hline(i = phase_count + trigger_count + area_count, border = officer::fp_border(width = 1))
|
||||||
}
|
}
|
||||||
|
if (cloud_count > 0) {
|
||||||
|
ft <- ft %>% hline(i = phase_count + trigger_count + area_count + cloud_count, border = officer::fp_border(width = 1))
|
||||||
|
}
|
||||||
|
|
||||||
ft
|
ft
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -828,40 +901,6 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Cloud Coverage Summary
|
|
||||||
|
|
||||||
```{r cloud_coverage_summary, echo=FALSE}
|
|
||||||
# Display cloud coverage summary aggregated by category
|
|
||||||
# Cloud coverage data is included in the field_analysis RDS from Script 80
|
|
||||||
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
|
||||||
field_analysis_df <- summary_data$field_analysis
|
|
||||||
|
|
||||||
# Aggregate cloud coverage by category
|
|
||||||
cloud_summary <- field_analysis_df %>%
|
|
||||||
filter(!is.na(Cloud_category)) %>%
|
|
||||||
group_by(Cloud_category) %>%
|
|
||||||
summarise(
|
|
||||||
"Number of Fields" = n(),
|
|
||||||
"Total Acreage" = round(sum(Acreage, na.rm = TRUE), 1),
|
|
||||||
.groups = "drop"
|
|
||||||
) %>%
|
|
||||||
rename("Cloud Category" = Cloud_category) %>%
|
|
||||||
arrange(`Cloud Category`)
|
|
||||||
if (nrow(cloud_summary) > 0) {
|
|
||||||
# Create flextable
|
|
||||||
ft <- flextable(cloud_summary) %>%
|
|
||||||
autofit() %>%
|
|
||||||
theme_vanilla()
|
|
||||||
|
|
||||||
ft
|
|
||||||
} else {
|
|
||||||
cat("Cloud coverage data not available for summary.\n")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cat("Field analysis data not available for cloud coverage summary.\n")
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
```{r data, message=TRUE, warning=TRUE, include=FALSE}
|
```{r data, message=TRUE, warning=TRUE, include=FALSE}
|
||||||
# All data comes from the field analysis performed in 09_field_analysis_weekly.R
|
# All data comes from the field analysis performed in 09_field_analysis_weekly.R
|
||||||
# The report renders KPI tables and field summaries from that data
|
# The report renders KPI tables and field summaries from that data
|
||||||
|
|
@ -951,9 +990,9 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```{r ci_fig, echo=FALSE, fig.align='right', out.width='40%', fig.cap="Chlorophyll Index Example"}
|
<div align="center">
|
||||||
knitr::include_graphics("CI_graph_example.png")
|

|
||||||
```
|
</div>
|
||||||
|
|
||||||
|
|
||||||
### Data File Structure and Columns
|
### Data File Structure and Columns
|
||||||
|
|
@ -1025,15 +1064,42 @@ Both algorithms are not always in sync, and can have contradictory results. Wide
|
||||||
## Report Metadata
|
## Report Metadata
|
||||||
|
|
||||||
```{r report_metadata, echo=FALSE}
|
```{r report_metadata, echo=FALSE}
|
||||||
|
# Calculate total area from field analysis data
|
||||||
|
total_area_acres <- 0
|
||||||
|
if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
||||||
|
total_area_acres <- sum(summary_data$field_analysis$Acreage, na.rm = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate total fields
|
||||||
|
total_fields_count <- 0
|
||||||
|
if (exists("AllPivots0")) {
|
||||||
|
total_fields_count <- nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise())
|
||||||
|
} else if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% names(summary_data)) {
|
||||||
|
total_fields_count <- n_distinct(summary_data$field_analysis$Field_id)
|
||||||
|
}
|
||||||
|
|
||||||
metadata_info <- data.frame(
|
metadata_info <- data.frame(
|
||||||
Metric = c("Report Generated", "Data Source", "Analysis Period", "Total Fields", "Next Update"),
|
Metric = c(
|
||||||
|
"Report Generated",
|
||||||
|
"Data Source",
|
||||||
|
"Analysis Period",
|
||||||
|
"Total Fields [number]",
|
||||||
|
"Total Area [acres]",
|
||||||
|
"Next Update",
|
||||||
|
"Service provided",
|
||||||
|
"Starting date service"
|
||||||
|
),
|
||||||
Value = c(
|
Value = c(
|
||||||
format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
|
format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
|
||||||
paste("Project", toupper(project_dir)),
|
paste("Project", toupper(project_dir)),
|
||||||
paste("Week", current_week, "of", year),
|
paste("Week", current_week, "of", year),
|
||||||
ifelse(exists("AllPivots0"), nrow(AllPivots0 %>% filter(!is.na(field)) %>% group_by(field) %>% summarise()), "Unknown"),
|
ifelse(total_fields_count > 0, total_fields_count, "Unknown"),
|
||||||
"Next Wednesday"
|
ifelse(total_area_acres > 0, round(total_area_acres, 0), "Unknown"),
|
||||||
)
|
"Next Wednesday",
|
||||||
|
"Cane Supply Office - Weekly",
|
||||||
|
"23 dec 2025"
|
||||||
|
),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
)
|
)
|
||||||
|
|
||||||
ft <- flextable(metadata_info) %>%
|
ft <- flextable(metadata_info) %>%
|
||||||
|
|
@ -1043,4 +1109,4 @@ ft <- flextable(metadata_info) %>%
|
||||||
ft
|
ft
|
||||||
```
|
```
|
||||||
|
|
||||||
*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team.*
|
*This report was automatically generated by the SmartCane monitoring system. For questions or additional analysis, please contact the technical team at info@smartcane.ag.*
|
||||||
|
|
@ -239,7 +239,7 @@
|
||||||
#
|
#
|
||||||
# OUTPUT:
|
# OUTPUT:
|
||||||
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
# - laravel_app/storage/app/{PROJECT}/ci_data_for_python.csv
|
||||||
# - Columns: field, sub_field, Date, FitData, DOY, value
|
# - Columns: field, sub_field, Date, FitData, DAH, value
|
||||||
#
|
#
|
||||||
# PARAMETERS:
|
# PARAMETERS:
|
||||||
# PROJECT: angata, chemba, xinavane, esa, simba
|
# PROJECT: angata, chemba, xinavane, esa, simba
|
||||||
|
|
@ -438,8 +438,8 @@
|
||||||
# rmarkdown::render(
|
# rmarkdown::render(
|
||||||
rmarkdown::render(
|
rmarkdown::render(
|
||||||
"r_app/90_CI_report_with_kpis_agronomic_support.Rmd",
|
"r_app/90_CI_report_with_kpis_agronomic_support.Rmd",
|
||||||
params = list(data_dir = "aura", report_date = as.Date("2022-12-08")),
|
params = list(data_dir = "aura", report_date = as.Date("2026-02-04")),
|
||||||
output_file = "SmartCane_Report_agronomic_support_aura_2022-12-08.docx",
|
output_file = "SmartCane_Report_agronomic_support_aura_2026-02-04.docx",
|
||||||
output_dir = "laravel_app/storage/app/aura/reports"
|
output_dir = "laravel_app/storage/app/aura/reports"
|
||||||
)
|
)
|
||||||
#
|
#
|
||||||
|
|
@ -450,7 +450,7 @@ rmarkdown::render(
|
||||||
rmarkdown::render(
|
rmarkdown::render(
|
||||||
"r_app/91_CI_report_with_kpis_cane_supply.Rmd",
|
"r_app/91_CI_report_with_kpis_cane_supply.Rmd",
|
||||||
params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
params = list(data_dir = "angata", report_date = as.Date("2026-02-04")),
|
||||||
output_file = "SmartCane_Report_basemap_test.docx",
|
output_file = "SmartCane_Report_cane_supply_angata_2026-02-04.docx",
|
||||||
output_dir = "laravel_app/storage/app/angata/reports"
|
output_dir = "laravel_app/storage/app/angata/reports"
|
||||||
)
|
)
|
||||||
#
|
#
|
||||||
|
|
|
||||||
|
|
@ -33,8 +33,14 @@ suppressPackageStartupMessages({
|
||||||
})
|
})
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION
|
# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
|
||||||
|
# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
|
||||||
|
# ~240 days ≈ 8 months, typical sugarcane maturity window
|
||||||
|
DAH_MATURITY_THRESHOLD <- 240
|
||||||
|
|
||||||
# Maps project names to client types for pipeline control
|
# Maps project names to client types for pipeline control
|
||||||
# This determines which scripts run and what outputs they produce
|
# This determines which scripts run and what outputs they produce
|
||||||
|
|
||||||
|
|
@ -45,7 +51,8 @@ CLIENT_TYPE_MAP <- list(
|
||||||
"esa" = "agronomic_support",
|
"esa" = "agronomic_support",
|
||||||
"simba" = "agronomic_support",
|
"simba" = "agronomic_support",
|
||||||
"john" = "agronomic_support",
|
"john" = "agronomic_support",
|
||||||
"huss" = "agronomic_support"
|
"huss" = "agronomic_support",
|
||||||
|
"aura" = "agronomic_support"
|
||||||
)
|
)
|
||||||
|
|
||||||
#' Get client type for a project
|
#' Get client type for a project
|
||||||
|
|
@ -162,13 +169,11 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
|
||||||
|
|
||||||
# TIER 5: MOSAICS (Script 40 output)
|
# TIER 5: MOSAICS (Script 40 output)
|
||||||
weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
|
weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
|
||||||
field_tiles_ci_dir <- here(weekly_mosaic_dir, "field_tiles_CI")
|
weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max")
|
||||||
|
|
||||||
# TIER 6: KPI & REPORTING (Scripts 80/90/91 output)
|
# TIER 6: KPI & REPORTING (Scripts 80/90/91 output)
|
||||||
reports_dir <- here(laravel_storage_dir, "reports")
|
reports_dir <- here(laravel_storage_dir, "reports")
|
||||||
kpi_reports_dir <- here(reports_dir, "kpis", "field_level")
|
kpi_reports_dir <- here(reports_dir, "kpis")
|
||||||
kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats")
|
|
||||||
kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis")
|
|
||||||
|
|
||||||
# TIER 7: SUPPORT (various scripts)
|
# TIER 7: SUPPORT (various scripts)
|
||||||
vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction
|
vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction
|
||||||
|
|
@ -180,8 +185,8 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
|
||||||
merged_tif_folder, field_tiles_dir, field_tiles_ci_dir,
|
merged_tif_folder, field_tiles_dir, field_tiles_ci_dir,
|
||||||
extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
|
extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
|
||||||
growth_model_interpolated_dir,
|
growth_model_interpolated_dir,
|
||||||
weekly_mosaic_dir, field_tiles_ci_dir,
|
weekly_mosaic_dir, weekly_tile_max_dir,
|
||||||
reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
|
reports_dir, kpi_reports_dir,
|
||||||
data_dir, vrt_dir, harvest_dir, log_dir
|
data_dir, vrt_dir, harvest_dir, log_dir
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -215,13 +220,11 @@ setup_project_directories <- function(project_dir, data_source = "merged_tif") {
|
||||||
|
|
||||||
# TIER 5: Mosaics
|
# TIER 5: Mosaics
|
||||||
weekly_mosaic_dir = weekly_mosaic_dir,
|
weekly_mosaic_dir = weekly_mosaic_dir,
|
||||||
field_tiles_ci_dir = field_tiles_ci_dir,
|
weekly_tile_max_dir = weekly_tile_max_dir,
|
||||||
|
|
||||||
# TIER 6: KPI & reporting
|
# TIER 6: KPI & reporting
|
||||||
reports_dir = reports_dir,
|
reports_dir = reports_dir,
|
||||||
kpi_reports_dir = kpi_reports_dir,
|
kpi_reports_dir = kpi_reports_dir,
|
||||||
kpi_field_stats_dir = kpi_field_stats_dir,
|
|
||||||
kpi_field_analysis_dir = kpi_field_analysis_dir,
|
|
||||||
|
|
||||||
# TIER 7: Support
|
# TIER 7: Support
|
||||||
data_dir = data_dir,
|
data_dir = data_dir,
|
||||||
|
|
@ -511,7 +514,63 @@ setup_logging <- function(log_dir) {
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths
|
# Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths
|
||||||
|
|
||||||
|
#' Detect mosaic mode from project structure
|
||||||
|
#'
|
||||||
|
#' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics
|
||||||
|
#'
|
||||||
|
#' @param project_dir Character. Project name
|
||||||
|
#' @return Character. "tiled" or "single-file"
|
||||||
|
detect_mosaic_mode <- function(project_dir) {
|
||||||
|
# Per-field architecture is standard - always return "single-file"
|
||||||
|
# unless weekly_tile_max directory exists with content
|
||||||
|
mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
|
||||||
|
|
||||||
|
if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) {
|
||||||
|
return("tiled")
|
||||||
|
}
|
||||||
|
return("single-file")
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Detect grid size from tile directory structure
|
||||||
|
#'
|
||||||
|
#' For per-field architecture, returns "unknown" (grid-based tiling is legacy)
|
||||||
|
#'
|
||||||
|
#' @param project_dir Character. Project name
|
||||||
|
#' @return Character. Grid size ("unknown" for per-field)
|
||||||
|
detect_grid_size <- function(project_dir) {
|
||||||
|
# Per-field architecture doesn't use grid-based organization
|
||||||
|
return("unknown")
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Get project storage path
|
||||||
|
#'
|
||||||
|
#' @param project_dir Character. Project name
|
||||||
|
#' @param subdir Character. Optional subdirectory (default NULL)
|
||||||
|
#' @return Character. Full path
|
||||||
|
get_project_storage_path <- function(project_dir, subdir = NULL) {
|
||||||
|
path <- file.path("laravel_app", "storage", "app", project_dir)
|
||||||
|
if (!is.null(subdir)) {
|
||||||
|
path <- file.path(path, subdir)
|
||||||
|
}
|
||||||
|
return(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Get mosaic directory
|
||||||
|
#'
|
||||||
|
#' @param project_dir Character. Project name
|
||||||
|
#' @param mosaic_mode Character. "tiled" or "single-file"
|
||||||
|
#' @return Character. Full path to mosaic directory
|
||||||
|
get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
|
||||||
|
if (mosaic_mode == "auto") {
|
||||||
|
mosaic_mode <- detect_mosaic_mode(project_dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mosaic_mode == "tiled") {
|
||||||
|
get_project_storage_path(project_dir, "weekly_tile_max")
|
||||||
|
} else {
|
||||||
|
get_project_storage_path(project_dir, "weekly_mosaic")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#' Get KPI directory based on client type
|
#' Get KPI directory based on client type
|
||||||
#'
|
#'
|
||||||
|
|
@ -552,6 +611,20 @@ check_harvest_output_exists <- function(project_dir, week_num, year_num) {
|
||||||
file.exists(path)
|
file.exists(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' Get mosaic verification directory
|
||||||
|
#'
|
||||||
|
#' @param project_dir Character. Project name
|
||||||
|
#' @param mosaic_mode Character. "tiled" or "single-file"
|
||||||
|
#' @return Character. Full path to mosaic directory
|
||||||
|
get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
|
||||||
|
base <- file.path("laravel_app", "storage", "app", project_dir)
|
||||||
|
|
||||||
|
if (mosaic_mode == "tiled") {
|
||||||
|
file.path(base, "weekly_tile_max")
|
||||||
|
} else {
|
||||||
|
file.path(base, "weekly_mosaic")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#' Check if mosaic files exist for a specific week
|
#' Check if mosaic files exist for a specific week
|
||||||
#'
|
#'
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
|
||||||
"\n`","``
|
"\n`","``
|
||||||
")
|
")
|
||||||
|
|
||||||
cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE))
|
cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE))
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Creates a Chlorophyll Index map for a pivot
|
#' Creates a Chlorophyll Index map for a pivot
|
||||||
|
|
@ -34,12 +34,13 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
|
||||||
#' @param pivot_spans Additional boundary data for the field
|
#' @param pivot_spans Additional boundary data for the field
|
||||||
#' @param show_legend Whether to show the legend (default: FALSE)
|
#' @param show_legend Whether to show the legend (default: FALSE)
|
||||||
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
|
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
|
||||||
|
#' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom")
|
||||||
#' @param week Week number to display in the title
|
#' @param week Week number to display in the title
|
||||||
#' @param age Age of the crop in weeks
|
#' @param age Age of the crop in weeks
|
||||||
#' @param borders Whether to display field borders (default: FALSE)
|
#' @param borders Whether to display field borders (default: FALSE)
|
||||||
#' @return A tmap object with the CI map
|
#' @return A tmap object with the CI map
|
||||||
#'
|
#'
|
||||||
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, colorblind = FALSE){
|
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week, age, borders = FALSE, colorblind = FALSE){
|
||||||
# Input validation
|
# Input validation
|
||||||
if (missing(pivot_raster) || is.null(pivot_raster)) {
|
if (missing(pivot_raster) || is.null(pivot_raster)) {
|
||||||
stop("pivot_raster is required")
|
stop("pivot_raster is required")
|
||||||
|
|
@ -64,25 +65,29 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
|
||||||
map <- tm_shape(pivot_raster, unit = "m")
|
map <- tm_shape(pivot_raster, unit = "m")
|
||||||
|
|
||||||
# Add raster with continuous spectrum (fixed scale 8-1 for consistent comparison, reversed)
|
# Add raster with continuous spectrum (fixed scale 8-1 for consistent comparison, reversed)
|
||||||
map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
|
map <- map + tm_raster(
|
||||||
limits = c(1,8)),
|
"CI",
|
||||||
col.legend = tm_legend(title = "CI",
|
col.scale = tm_scale_continuous(
|
||||||
orientation = if(legend_is_portrait) "portrait" else "landscape",
|
values = palette,
|
||||||
show = show_legend,
|
limits = c(1, 8),
|
||||||
position = if(show_legend) tm_pos_out("left", "center") else c("left", "bottom"),
|
ticks = seq(1, 8, by = 1),
|
||||||
reverse = TRUE
|
outliers.trunc = c(TRUE, TRUE)
|
||||||
))
|
),
|
||||||
|
col.legend = tm_legend(
|
||||||
|
title = "CI",
|
||||||
|
orientation = if (legend_is_portrait) "portrait" else "landscape",
|
||||||
|
show = show_legend,
|
||||||
|
position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"),
|
||||||
|
reverse = TRUE
|
||||||
|
)
|
||||||
|
)
|
||||||
# Add layout elements
|
# Add layout elements
|
||||||
map <- map + tm_title(text = paste0("Max CI week ", week,"\n", age, " weeks (", age * 7, " days) old"),
|
map <- map + tm_layout(
|
||||||
size = 0.7)
|
main.title = paste0("Max CI week ", week,"\n", age, " weeks (", age * 7, " days) old"),
|
||||||
# Add layout configuration to prevent legend rescaling
|
main.title.size = 0.7,
|
||||||
map <- map + tm_layout(legend.position = c("left", "bottom"),
|
#legend.height = 0.85, # Constrain vertical legend height to not exceed map
|
||||||
legend.outside = FALSE,
|
asp = 1 # Fixed aspect ratio
|
||||||
inner.margins = 0.05,
|
)
|
||||||
asp = 1) # Force 1:1 aspect ratio for consistent sizing
|
|
||||||
|
|
||||||
# Add bounds/view settings for fixed aspect ratio
|
|
||||||
map <- map + tm_view(asp = 1)
|
|
||||||
|
|
||||||
# Add borders if requested
|
# Add borders if requested
|
||||||
if (borders) {
|
if (borders) {
|
||||||
|
|
@ -104,13 +109,14 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
|
||||||
#' @param pivot_spans Additional boundary data for the field
|
#' @param pivot_spans Additional boundary data for the field
|
||||||
#' @param show_legend Whether to show the legend (default: FALSE)
|
#' @param show_legend Whether to show the legend (default: FALSE)
|
||||||
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
|
#' @param legend_is_portrait Whether to show the legend in portrait orientation (default: FALSE)
|
||||||
|
#' @param legend_position Position for the legend when shown: "left", "right", "top", "bottom" (default: "bottom")
|
||||||
#' @param week_1 First week number for comparison
|
#' @param week_1 First week number for comparison
|
||||||
#' @param week_2 Second week number for comparison
|
#' @param week_2 Second week number for comparison
|
||||||
#' @param age Age of the crop in weeks
|
#' @param age Age of the crop in weeks
|
||||||
#' @param borders Whether to display field borders (default: TRUE)
|
#' @param borders Whether to display field borders (default: TRUE)
|
||||||
#' @return A tmap object with the CI difference map
|
#' @return A tmap object with the CI difference map
|
||||||
#'
|
#'
|
||||||
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, colorblind = FALSE){
|
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, legend_position = "bottom", week_1, week_2, age, borders = TRUE, colorblind = FALSE){
|
||||||
# Input validation
|
# Input validation
|
||||||
if (missing(pivot_raster) || is.null(pivot_raster)) {
|
if (missing(pivot_raster) || is.null(pivot_raster)) {
|
||||||
stop("pivot_raster is required")
|
stop("pivot_raster is required")
|
||||||
|
|
@ -135,26 +141,30 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
|
||||||
map <- tm_shape(pivot_raster, unit = "m")
|
map <- tm_shape(pivot_raster, unit = "m")
|
||||||
|
|
||||||
# Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale, reversed)
|
# Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale, reversed)
|
||||||
map <- map + tm_raster(col.scale = tm_scale_continuous(values = palette,
|
map <- map + tm_raster(
|
||||||
midpoint = 0,
|
"CI",
|
||||||
limits = c(-3, 3)),
|
col.scale = tm_scale_continuous(
|
||||||
col.legend = tm_legend(title = "CI diff.",
|
values = palette,
|
||||||
orientation = if(legend_is_portrait) "portrait" else "landscape",
|
limits = c(-3, 3),
|
||||||
show = show_legend,
|
ticks = seq(-3, 3, by = 1),
|
||||||
position = if(show_legend) tm_pos_out("right", "center") else c("left", "bottom"),
|
midpoint = 0,
|
||||||
reverse = TRUE
|
outliers.trunc = c(TRUE, TRUE)
|
||||||
))
|
),
|
||||||
|
col.legend = tm_legend(
|
||||||
|
title = "CI diff.",
|
||||||
|
orientation = if (legend_is_portrait) "portrait" else "landscape",
|
||||||
|
show = show_legend,
|
||||||
|
position = if (show_legend) tm_pos_out(legend_position, "center") else c("left", "bottom"),
|
||||||
|
reverse = TRUE
|
||||||
|
)
|
||||||
|
)
|
||||||
# Add layout elements
|
# Add layout elements
|
||||||
map <- map + tm_title(text = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"),
|
map <- map + tm_layout(
|
||||||
size = 0.7)
|
main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks (", age * 7, " days) old"),
|
||||||
# Add layout configuration to prevent legend rescaling
|
main.title.size = 0.7,
|
||||||
map <- map + tm_layout(legend.position = c("right", "bottom"),
|
#legend.height = 0.85, # Constrain vertical legend height to not exceed map
|
||||||
legend.outside = FALSE,
|
asp = 1 # Fixed aspect ratio
|
||||||
inner.margins = 0.05,
|
)
|
||||||
asp = 1) # Force 1:1 aspect ratio for consistent sizing
|
|
||||||
|
|
||||||
# Add bounds/view settings for fixed aspect ratio
|
|
||||||
map <- map + tm_view(asp = 1)
|
|
||||||
|
|
||||||
# Add borders if requested
|
# Add borders if requested
|
||||||
if (borders) {
|
if (borders) {
|
||||||
|
|
@ -269,18 +279,16 @@ ci_plot <- function(pivotName,
|
||||||
|
|
||||||
# Create historical maps only if data is available
|
# Create historical maps only if data is available
|
||||||
# Build list with all available maps - order matches original: [m2, m1, current, diff_1w, diff_3w]
|
# Build list with all available maps - order matches original: [m2, m1, current, diff_1w, diff_3w]
|
||||||
# Widths match original hardcoded: c(0.23, 0.18, 0.18, 0.18, 0.23)
|
|
||||||
maps_to_arrange <- list()
|
maps_to_arrange <- list()
|
||||||
widths_to_use <- c()
|
|
||||||
field_heading_note <- ""
|
field_heading_note <- ""
|
||||||
|
|
||||||
# Try to create 2-week ago map (legend on left)
|
# Try to create 2-week ago map (legend on left)
|
||||||
if (!is.null(singlePivot_m2)) {
|
if (!is.null(singlePivot_m2)) {
|
||||||
CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2,
|
CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2,
|
||||||
show_legend = TRUE, legend_is_portrait = TRUE,
|
show_legend = TRUE, legend_is_portrait = TRUE,
|
||||||
|
legend_position = "left",
|
||||||
week = week_minus_2, age = age - 2, borders = borders, colorblind = colorblind_friendly)
|
week = week_minus_2, age = age - 2, borders = borders, colorblind = colorblind_friendly)
|
||||||
maps_to_arrange <- c(maps_to_arrange, list(CImap_m2))
|
maps_to_arrange <- c(maps_to_arrange, list(CImap_m2))
|
||||||
widths_to_use <- c(widths_to_use, 0.24)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Try to create 1-week ago map
|
# Try to create 1-week ago map
|
||||||
|
|
@ -289,12 +297,10 @@ ci_plot <- function(pivotName,
|
||||||
show_legend = FALSE, legend_is_portrait = FALSE,
|
show_legend = FALSE, legend_is_portrait = FALSE,
|
||||||
week = week_minus_1, age = age - 1, borders = borders, colorblind = colorblind_friendly)
|
week = week_minus_1, age = age - 1, borders = borders, colorblind = colorblind_friendly)
|
||||||
maps_to_arrange <- c(maps_to_arrange, list(CImap_m1))
|
maps_to_arrange <- c(maps_to_arrange, list(CImap_m1))
|
||||||
widths_to_use <- c(widths_to_use, 0.17)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Always add current week map (center position)
|
# Always add current week map (center position)
|
||||||
maps_to_arrange <- c(maps_to_arrange, list(CImap))
|
maps_to_arrange <- c(maps_to_arrange, list(CImap))
|
||||||
widths_to_use <- c(widths_to_use, 0.17)
|
|
||||||
|
|
||||||
# Try to create 1-week difference map
|
# Try to create 1-week difference map
|
||||||
if (!is.null(abs_CI_last_week)) {
|
if (!is.null(abs_CI_last_week)) {
|
||||||
|
|
@ -302,21 +308,17 @@ ci_plot <- function(pivotName,
|
||||||
show_legend = FALSE, legend_is_portrait = FALSE,
|
show_legend = FALSE, legend_is_portrait = FALSE,
|
||||||
week_1 = week, week_2 = week_minus_1, age = age, borders = borders, colorblind = colorblind_friendly)
|
week_1 = week, week_2 = week_minus_1, age = age, borders = borders, colorblind = colorblind_friendly)
|
||||||
maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_last_week))
|
maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_last_week))
|
||||||
widths_to_use <- c(widths_to_use, 0.17)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Try to create 3-week difference map (legend on right)
|
# Try to create 3-week difference map (legend on right)
|
||||||
if (!is.null(abs_CI_three_week)) {
|
if (!is.null(abs_CI_three_week)) {
|
||||||
CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2,
|
CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2,
|
||||||
show_legend = TRUE, legend_is_portrait = TRUE,
|
show_legend = TRUE, legend_is_portrait = TRUE,
|
||||||
|
legend_position = "right",
|
||||||
week_1 = week, week_2 = week_minus_3, age = age, borders = borders, colorblind = colorblind_friendly)
|
week_1 = week, week_2 = week_minus_3, age = age, borders = borders, colorblind = colorblind_friendly)
|
||||||
maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_three_week))
|
maps_to_arrange <- c(maps_to_arrange, list(CI_max_abs_three_week))
|
||||||
widths_to_use <- c(widths_to_use, 0.24)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Normalize widths to sum to 1
|
|
||||||
widths_to_use <- widths_to_use / sum(widths_to_use)
|
|
||||||
|
|
||||||
# Add note if historical data is limited
|
# Add note if historical data is limited
|
||||||
if (length(maps_to_arrange) == 1) {
|
if (length(maps_to_arrange) == 1) {
|
||||||
field_heading_note <- " (Current week only - historical data not yet available)"
|
field_heading_note <- " (Current week only - historical data not yet available)"
|
||||||
|
|
@ -324,8 +326,21 @@ ci_plot <- function(pivotName,
|
||||||
field_heading_note <- " (Limited historical data)"
|
field_heading_note <- " (Limited historical data)"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Arrange the maps with normalized widths
|
# Arrange the maps in a row with more width for first and last (for legends)
|
||||||
tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths_to_use)))
|
# Give maps with legends (1st and 5th) more space: 23%, middle maps get 18% each
|
||||||
|
widths <- if (length(maps_to_arrange) == 5) {
|
||||||
|
c(0.23, 0.18, 0.18, 0.18, 0.23)
|
||||||
|
} else if (length(maps_to_arrange) == 4) {
|
||||||
|
c(0.25, 0.25, 0.25, 0.25) # Equal if only 4 maps
|
||||||
|
} else if (length(maps_to_arrange) == 3) {
|
||||||
|
c(0.33, 0.33, 0.34) # Equal if only 3 maps
|
||||||
|
} else if (length(maps_to_arrange) == 2) {
|
||||||
|
c(0.5, 0.5) # Equal if only 2 maps
|
||||||
|
} else {
|
||||||
|
NULL # Single map or other cases
|
||||||
|
}
|
||||||
|
|
||||||
|
tst <- do.call(tmap_arrange, c(maps_to_arrange, list(nrow = 1, widths = widths)))
|
||||||
|
|
||||||
# Output heading and map to R Markdown
|
# Output heading and map to R Markdown
|
||||||
age_months <- round(age / 4.348, 1)
|
age_months <- round(age / 4.348, 1)
|
||||||
|
|
@ -342,10 +357,10 @@ ci_plot <- function(pivotName,
|
||||||
#' Creates a plot showing Chlorophyll Index data over time for a pivot field
|
#' Creates a plot showing Chlorophyll Index data over time for a pivot field
|
||||||
#'
|
#'
|
||||||
#' @param pivotName The name or ID of the pivot field to visualize
|
#' @param pivotName The name or ID of the pivot field to visualize
|
||||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DOY, cumulative_CI, value and season columns
|
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, sub_field, Date, DAH, cumulative_CI, value and season columns
|
||||||
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
|
#' @param plot_type Type of plot to generate: "absolute", "cumulative", or "both"
|
||||||
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
|
#' @param facet_on Whether to facet the plot by season (TRUE) or overlay all seasons (FALSE)
|
||||||
#' @param x_unit Unit for x-axis: "days" for DOY or "weeks" for week number (default: "days")
|
#' @param x_unit Unit for x-axis: "days" for DAH or "weeks" for week number (default: "days")
|
||||||
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
|
#' @param colorblind_friendly Whether to use colorblind-friendly color schemes (default: FALSE)
|
||||||
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
|
#' @param show_benchmarks Whether to show historical benchmark lines (default: FALSE)
|
||||||
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
|
#' @param estate_name Name of the estate for benchmark calculation (required if show_benchmarks = TRUE)
|
||||||
|
|
@ -378,7 +393,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
|
|
||||||
# Process data
|
# Process data
|
||||||
data_ci2 <- data_ci %>%
|
data_ci2 <- data_ci %>%
|
||||||
dplyr::mutate(CI_rate = cumulative_CI / DOY,
|
dplyr::mutate(CI_rate = cumulative_CI / DAH,
|
||||||
week = lubridate::week(Date)) %>%
|
week = lubridate::week(Date)) %>%
|
||||||
dplyr::group_by(field) %>%
|
dplyr::group_by(field) %>%
|
||||||
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
|
dplyr::mutate(mean_CIrate_rolling_10_days = zoo::rollapplyr(CI_rate, width = 10, FUN = mean, partial = TRUE),
|
||||||
|
|
@ -433,7 +448,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
|
|
||||||
# Determine x-axis variable based on x_unit parameter
|
# Determine x-axis variable based on x_unit parameter
|
||||||
x_var <- if (x_unit == "days") {
|
x_var <- if (x_unit == "days") {
|
||||||
if (facet_on) "Date" else "DOY"
|
if (facet_on) "Date" else "DAH"
|
||||||
} else {
|
} else {
|
||||||
"week"
|
"week"
|
||||||
}
|
}
|
||||||
|
|
@ -443,14 +458,21 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
"weeks" = "Week Number")
|
"weeks" = "Week Number")
|
||||||
|
|
||||||
# Calculate dynamic max values for breaks
|
# Calculate dynamic max values for breaks
|
||||||
max_doy <- max(plot_data$DOY, na.rm = TRUE) + 20
|
max_dah <- max(plot_data$DAH, na.rm = TRUE) + 20
|
||||||
max_week <- max(as.numeric(plot_data$week), na.rm = TRUE) + ceiling(20 / 7)
|
max_week <- max(as.numeric(plot_data$week), na.rm = TRUE) + ceiling(20 / 7)
|
||||||
|
|
||||||
# Create plot with either facets by season or overlay by DOY/week
|
# Create plot with either facets by season or overlay by DAH/week
|
||||||
if (facet_on) {
|
if (facet_on) {
|
||||||
g <- ggplot2::ggplot(data = plot_data) +
|
g <- ggplot2::ggplot(data = plot_data) +
|
||||||
ggplot2::facet_wrap(~season, scales = "free_x") +
|
ggplot2::facet_wrap(~season, scales = "free_x") +
|
||||||
ggplot2::geom_line(ggplot2::aes_string(x = x_var, y = "ci_value", col = "sub_field", group = "sub_field")) +
|
ggplot2::geom_line(
|
||||||
|
ggplot2::aes(
|
||||||
|
x = .data[[x_var]],
|
||||||
|
y = .data[["ci_value"]],
|
||||||
|
col = .data[["sub_field"]],
|
||||||
|
group = .data[["sub_field"]]
|
||||||
|
)
|
||||||
|
) +
|
||||||
ggplot2::labs(title = paste("Plot of", y_label),
|
ggplot2::labs(title = paste("Plot of", y_label),
|
||||||
color = "Field Name",
|
color = "Field Name",
|
||||||
y = y_label,
|
y = y_label,
|
||||||
|
|
@ -460,10 +482,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
breaks = scales::breaks_pretty(),
|
breaks = scales::breaks_pretty(),
|
||||||
labels = function(x) round(as.numeric(x - min(x)) / 30.44, 1))) +
|
labels = function(x) round(as.numeric(x - min(x)) / 30.44, 1))) +
|
||||||
ggplot2::theme_minimal() +
|
ggplot2::theme_minimal() +
|
||||||
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
|
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
|
||||||
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
|
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
|
||||||
axis.title.x.top = ggplot2::element_text(size = 8),
|
axis.title.x.top = ggplot2::element_text(size = 8),
|
||||||
legend.justification = c(1, 0), legend.position = c(1, 0),
|
legend.justification = c(1, 0),
|
||||||
|
legend.position = "inside",
|
||||||
|
legend.position.inside = c(1, 0),
|
||||||
legend.title = ggplot2::element_text(size = 8),
|
legend.title = ggplot2::element_text(size = 8),
|
||||||
legend.text = ggplot2::element_text(size = 8)) +
|
legend.text = ggplot2::element_text(size = 8)) +
|
||||||
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
||||||
|
|
@ -482,32 +506,46 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
benchmark_subset <- benchmark_data %>%
|
benchmark_subset <- benchmark_data %>%
|
||||||
dplyr::filter(ci_type == ci_type_filter) %>%
|
dplyr::filter(ci_type == ci_type_filter) %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
benchmark_x = if (x_var == "DOY") {
|
benchmark_x = if (x_var == "DAH") {
|
||||||
DOY
|
DAH
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
DOY / 7 # Approximate conversion
|
DAH / 7 # Approximate conversion
|
||||||
} else {
|
} else {
|
||||||
DOY # For Date, use DOY as is (may not align perfectly)
|
DAH # For Date, use DAH as is (may not align perfectly)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
ggplot2::geom_smooth(
|
ggplot2::geom_smooth(
|
||||||
data = benchmark_subset,
|
data = benchmark_subset,
|
||||||
ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
|
ggplot2::aes(
|
||||||
color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
|
x = .data[["benchmark_x"]],
|
||||||
|
y = .data[["benchmark_value"]],
|
||||||
|
group = factor(.data[["percentile"]])
|
||||||
|
),
|
||||||
|
color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} +
|
} +
|
||||||
# Plot older seasons with lighter lines
|
# Plot older seasons with lighter lines
|
||||||
ggplot2::geom_line(
|
ggplot2::geom_line(
|
||||||
data = plot_data %>% dplyr::filter(!is_latest),
|
data = plot_data %>% dplyr::filter(!is_latest),
|
||||||
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
|
ggplot2::aes(
|
||||||
size = 0.7, alpha = 0.4
|
x = .data[[x_var]],
|
||||||
|
y = .data[["ci_value"]],
|
||||||
|
col = .data[["season"]],
|
||||||
|
group = .data[["season"]]
|
||||||
|
),
|
||||||
|
linewidth = 0.7, alpha = 0.4
|
||||||
) +
|
) +
|
||||||
# Plot latest season with thicker, more prominent line
|
# Plot latest season with thicker, more prominent line
|
||||||
ggplot2::geom_line(
|
ggplot2::geom_line(
|
||||||
data = plot_data %>% dplyr::filter(is_latest),
|
data = plot_data %>% dplyr::filter(is_latest),
|
||||||
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
|
ggplot2::aes(
|
||||||
size = 1.5, alpha = 1
|
x = .data[[x_var]],
|
||||||
|
y = .data[["ci_value"]],
|
||||||
|
col = .data[["season"]],
|
||||||
|
group = .data[["season"]]
|
||||||
|
),
|
||||||
|
linewidth = 1.5, alpha = 1
|
||||||
) +
|
) +
|
||||||
ggplot2::labs(title = paste("Plot of", y_label, "for Field", pivotName, title_suffix),
|
ggplot2::labs(title = paste("Plot of", y_label, "for Field", pivotName, title_suffix),
|
||||||
color = "Season",
|
color = "Season",
|
||||||
|
|
@ -515,17 +553,19 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
x = x_label) +
|
x = x_label) +
|
||||||
color_scale +
|
color_scale +
|
||||||
{
|
{
|
||||||
if (x_var == "DOY") {
|
if (x_var == "DAH") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, max_doy, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, max_week, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, max_week, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
}
|
}
|
||||||
} +
|
} +
|
||||||
ggplot2::theme_minimal() +
|
ggplot2::theme_minimal() +
|
||||||
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
|
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
|
||||||
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
|
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
|
||||||
axis.title.x.top = ggplot2::element_text(size = 8),
|
axis.title.x.top = ggplot2::element_text(size = 8),
|
||||||
legend.justification = c(1, 0), legend.position = c(1, 0),
|
legend.justification = c(1, 0),
|
||||||
|
legend.position = "inside",
|
||||||
|
legend.position.inside = c(1, 0),
|
||||||
legend.title = ggplot2::element_text(size = 8),
|
legend.title = ggplot2::element_text(size = 8),
|
||||||
legend.text = ggplot2::element_text(size = 8)) +
|
legend.text = ggplot2::element_text(size = 8)) +
|
||||||
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
||||||
|
|
@ -561,7 +601,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
|
|
||||||
# Determine x-axis variable based on x_unit parameter
|
# Determine x-axis variable based on x_unit parameter
|
||||||
x_var <- if (x_unit == "days") {
|
x_var <- if (x_unit == "days") {
|
||||||
if (facet_on) "Date" else "DOY"
|
if (facet_on) "Date" else "DAH"
|
||||||
} else {
|
} else {
|
||||||
"week"
|
"week"
|
||||||
}
|
}
|
||||||
|
|
@ -578,7 +618,7 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
}
|
}
|
||||||
|
|
||||||
# Calculate dynamic max values for breaks
|
# Calculate dynamic max values for breaks
|
||||||
max_doy_both <- max(plot_data_both$DOY, na.rm = TRUE) + 20
|
max_dah_both <- max(plot_data_both$DAH, na.rm = TRUE) + 20
|
||||||
max_week_both <- max(as.numeric(plot_data_both$week), na.rm = TRUE) + ceiling(20 / 7)
|
max_week_both <- max(as.numeric(plot_data_both$week), na.rm = TRUE) + ceiling(20 / 7)
|
||||||
|
|
||||||
# Create the faceted plot
|
# Create the faceted plot
|
||||||
|
|
@ -588,12 +628,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
if (!is.null(benchmark_data)) {
|
if (!is.null(benchmark_data)) {
|
||||||
benchmark_subset <- benchmark_data %>%
|
benchmark_subset <- benchmark_data %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
benchmark_x = if (x_var == "DOY") {
|
benchmark_x = if (x_var == "DAH") {
|
||||||
DOY
|
DAH
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
DOY / 7
|
DAH / 7
|
||||||
} else {
|
} else {
|
||||||
DOY
|
DAH
|
||||||
},
|
},
|
||||||
ci_type_label = case_when(
|
ci_type_label = case_when(
|
||||||
ci_type == "value" ~ "10-Day Rolling Mean CI",
|
ci_type == "value" ~ "10-Day Rolling Mean CI",
|
||||||
|
|
@ -603,8 +643,12 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
)
|
)
|
||||||
ggplot2::geom_smooth(
|
ggplot2::geom_smooth(
|
||||||
data = benchmark_subset,
|
data = benchmark_subset,
|
||||||
ggplot2::aes_string(x = "benchmark_x", y = "benchmark_value", group = "factor(percentile)"),
|
ggplot2::aes(
|
||||||
color = "gray70", size = 0.5, se = FALSE, inherit.aes = FALSE
|
x = .data[["benchmark_x"]],
|
||||||
|
y = .data[["benchmark_value"]],
|
||||||
|
group = factor(.data[["percentile"]])
|
||||||
|
),
|
||||||
|
color = "gray70", linewidth = 0.5, se = FALSE, inherit.aes = FALSE
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} +
|
} +
|
||||||
|
|
@ -612,14 +656,24 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
# Plot older seasons with lighter lines
|
# Plot older seasons with lighter lines
|
||||||
ggplot2::geom_line(
|
ggplot2::geom_line(
|
||||||
data = plot_data_both %>% dplyr::filter(!is_latest),
|
data = plot_data_both %>% dplyr::filter(!is_latest),
|
||||||
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
|
ggplot2::aes(
|
||||||
size = 0.7, alpha = 0.4
|
x = .data[[x_var]],
|
||||||
|
y = .data[["ci_value"]],
|
||||||
|
col = .data[["season"]],
|
||||||
|
group = .data[["season"]]
|
||||||
|
),
|
||||||
|
linewidth = 0.7, alpha = 0.4
|
||||||
) +
|
) +
|
||||||
# Plot latest season with thicker, more prominent line
|
# Plot latest season with thicker, more prominent line
|
||||||
ggplot2::geom_line(
|
ggplot2::geom_line(
|
||||||
data = plot_data_both %>% dplyr::filter(is_latest),
|
data = plot_data_both %>% dplyr::filter(is_latest),
|
||||||
ggplot2::aes_string(x = x_var, y = "ci_value", col = "season", group = "season"),
|
ggplot2::aes(
|
||||||
size = 1.5, alpha = 1
|
x = .data[[x_var]],
|
||||||
|
y = .data[["ci_value"]],
|
||||||
|
col = .data[["season"]],
|
||||||
|
group = .data[["season"]]
|
||||||
|
),
|
||||||
|
linewidth = 1.5, alpha = 1
|
||||||
) +
|
) +
|
||||||
ggplot2::labs(title = paste("CI Analysis for Field", pivotName),
|
ggplot2::labs(title = paste("CI Analysis for Field", pivotName),
|
||||||
color = "Season",
|
color = "Season",
|
||||||
|
|
@ -627,8 +681,8 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
x = x_label) +
|
x = x_label) +
|
||||||
color_scale +
|
color_scale +
|
||||||
{
|
{
|
||||||
if (x_var == "DOY") {
|
if (x_var == "DAH") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, max_doy_both, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, 450, by = 50), sec.axis = ggplot2::sec_axis(~ . / 30.44, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
} else if (x_var == "week") {
|
} else if (x_var == "week") {
|
||||||
ggplot2::scale_x_continuous(breaks = seq(0, max_week_both, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
ggplot2::scale_x_continuous(breaks = seq(0, max_week_both, by = 4), sec.axis = ggplot2::sec_axis(~ . / 4.348, name = "Age in Months", breaks = seq(0, 14, by = 1)))
|
||||||
} else if (x_var == "Date") {
|
} else if (x_var == "Date") {
|
||||||
|
|
@ -636,12 +690,14 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
}
|
}
|
||||||
} +
|
} +
|
||||||
ggplot2::theme_minimal() +
|
ggplot2::theme_minimal() +
|
||||||
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
|
ggplot2::theme(axis.text.x = ggplot2::element_text(hjust = 0.5),
|
||||||
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
|
axis.text.x.top = ggplot2::element_text(hjust = 0.5),
|
||||||
axis.title.x.top = ggplot2::element_text(size = 8),
|
axis.title.x.top = ggplot2::element_text(size = 8),
|
||||||
legend.justification = c(1, 0), legend.position = c(1, 0),
|
legend.justification = c(1, 0),
|
||||||
legend.title = ggplot2::element_text(size = 8),
|
legend.position = "inside",
|
||||||
legend.text = ggplot2::element_text(size = 8)) +
|
legend.position.inside = c(1, 0),
|
||||||
|
legend.title = ggplot2::element_text(size = 8),
|
||||||
|
legend.text = ggplot2::element_text(size = 8)) +
|
||||||
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
ggplot2::guides(color = ggplot2::guide_legend(nrow = 2, byrow = TRUE))
|
||||||
|
|
||||||
# For the rolling mean data, we want to set reasonable y-axis limits
|
# For the rolling mean data, we want to set reasonable y-axis limits
|
||||||
|
|
@ -659,9 +715,11 @@ cum_ci_plot <- function(pivotName, ci_quadrant_data = CI_quadrant, plot_type = "
|
||||||
dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]]))
|
dummy_data[["season"]] <- factor("dummy", levels = levels(plot_data_both[["season"]]))
|
||||||
|
|
||||||
g_both <- g_both +
|
g_both <- g_both +
|
||||||
ggplot2::geom_point(data = dummy_data,
|
ggplot2::geom_point(
|
||||||
ggplot2::aes_string(x = x_var, y = "ci_value"),
|
data = dummy_data,
|
||||||
alpha = 0, size = 0) # Invisible points to set scale
|
ggplot2::aes(x = .data[[x_var]], y = .data[["ci_value"]]),
|
||||||
|
alpha = 0, size = 0
|
||||||
|
) # Invisible points to set scale
|
||||||
|
|
||||||
# Display the combined faceted plot
|
# Display the combined faceted plot
|
||||||
subchunkify(g_both, 2.8, 10)
|
subchunkify(g_both, 2.8, 10)
|
||||||
|
|
@ -698,9 +756,11 @@ cum_ci_plot2 <- function(pivotName){
|
||||||
x = "Date", y = "CI Rate") +
|
x = "Date", y = "CI Rate") +
|
||||||
theme_minimal() +
|
theme_minimal() +
|
||||||
theme(axis.text.x = element_text(hjust = 0.5),
|
theme(axis.text.x = element_text(hjust = 0.5),
|
||||||
legend.justification = c(1, 0), legend.position = c(1, 0),
|
legend.justification = c(1, 0),
|
||||||
legend.title = element_text(size = 8),
|
legend.position = "inside",
|
||||||
legend.text = element_text(size = 8)) +
|
legend.position.inside = c(1, 0),
|
||||||
|
legend.title = element_text(size = 8),
|
||||||
|
legend.text = element_text(size = 8)) +
|
||||||
annotate("text", x = midpoint_date, y = 2, label = "No data available", size = 6, hjust = 0.5)
|
annotate("text", x = midpoint_date, y = 2, label = "No data available", size = 6, hjust = 0.5)
|
||||||
|
|
||||||
subchunkify(g, 3.2, 10)
|
subchunkify(g, 3.2, 10)
|
||||||
|
|
@ -750,30 +810,9 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
|
||||||
target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed
|
target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed
|
||||||
target_year <- lubridate::isoyear(target_date)
|
target_year <- lubridate::isoyear(target_date)
|
||||||
|
|
||||||
# Primary approach: Try single-file mosaic path first
|
# Load single-file mosaic for the given week
|
||||||
path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
|
path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
|
||||||
|
|
||||||
# Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles
|
|
||||||
if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) {
|
|
||||||
# Try to locate tile-based mosaics in weekly_tile_max instead
|
|
||||||
tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path)
|
|
||||||
|
|
||||||
# Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.)
|
|
||||||
if (dir.exists(tile_mosaic_path)) {
|
|
||||||
tile_files <- list.files(tile_mosaic_path,
|
|
||||||
pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"),
|
|
||||||
full.names = TRUE)
|
|
||||||
|
|
||||||
if (length(tile_files) > 0) {
|
|
||||||
# Found tiles - return the first tile as primary, note that multiple tiles exist
|
|
||||||
safe_log(paste("Single-file mosaic not found for week", target_week, target_year,
|
|
||||||
"but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO")
|
|
||||||
# Return first tile - caller should aggregate if needed
|
|
||||||
path_to_week <- tile_files[1] # Return first tile; downstream can handle multiple tiles
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Log the path calculation
|
# Log the path calculation
|
||||||
safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")
|
safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")
|
||||||
|
|
||||||
|
|
@ -788,11 +827,11 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
|
||||||
|
|
||||||
#' Computes historical percentile benchmarks for CI data per estate
|
#' Computes historical percentile benchmarks for CI data per estate
|
||||||
#'
|
#'
|
||||||
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DOY, cumulative_CI, value, season columns
|
#' @param ci_quadrant_data Data frame containing CI quadrant data with field, Date, DAH, cumulative_CI, value, season columns
|
||||||
#' @param estate_name Name of the estate/client to filter data for
|
#' @param estate_name Name of the estate/client to filter data for
|
||||||
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
|
#' @param percentiles Vector of percentiles to compute (e.g., c(10, 50, 90))
|
||||||
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
|
#' @param min_seasons Minimum number of seasons required for reliable benchmarks (default: 3)
|
||||||
#' @return Data frame with DOY, percentile, ci_type, benchmark_value, or NULL if insufficient data
|
#' @return Data frame with DAH, percentile, ci_type, benchmark_value, or NULL if insufficient data
|
||||||
#'
|
#'
|
||||||
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
|
compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c(10, 50, 90), min_seasons = 3) {
|
||||||
# Input validation
|
# Input validation
|
||||||
|
|
@ -821,7 +860,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
# Prepare data for both CI types
|
# Prepare data for both CI types
|
||||||
data_prepared <- data_filtered %>%
|
data_prepared <- data_filtered %>%
|
||||||
dplyr::ungroup() %>% # Ensure no existing groupings
|
dplyr::ungroup() %>% # Ensure no existing groupings
|
||||||
dplyr::select(DOY, value, cumulative_CI, season) %>%
|
dplyr::select(DAH, value, cumulative_CI, season) %>%
|
||||||
tidyr::pivot_longer(
|
tidyr::pivot_longer(
|
||||||
cols = c("value", "cumulative_CI"),
|
cols = c("value", "cumulative_CI"),
|
||||||
names_to = "ci_type",
|
names_to = "ci_type",
|
||||||
|
|
@ -829,9 +868,9 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
) %>%
|
) %>%
|
||||||
dplyr::filter(!is.na(ci_value)) # Remove NA values
|
dplyr::filter(!is.na(ci_value)) # Remove NA values
|
||||||
|
|
||||||
# Compute percentiles for each DOY and ci_type
|
# Compute percentiles for each DAH and ci_type
|
||||||
benchmarks <- data_prepared %>%
|
benchmarks <- data_prepared %>%
|
||||||
dplyr::group_by(DOY, ci_type) %>%
|
dplyr::group_by(DAH, ci_type) %>%
|
||||||
dplyr::summarise(
|
dplyr::summarise(
|
||||||
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
|
p10 = tryCatch(quantile(ci_value, 0.1, na.rm = TRUE), error = function(e) NA_real_),
|
||||||
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
|
p50 = tryCatch(quantile(ci_value, 0.5, na.rm = TRUE), error = function(e) NA_real_),
|
||||||
|
|
@ -839,7 +878,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
n_observations = n(),
|
n_observations = n(),
|
||||||
.groups = 'drop'
|
.groups = 'drop'
|
||||||
) %>%
|
) %>%
|
||||||
dplyr::filter(n_observations >= min_seasons) %>% # Only include DOYs with sufficient data
|
dplyr::filter(n_observations >= min_seasons) %>% # Only include DAHs with sufficient data
|
||||||
tidyr::pivot_longer(
|
tidyr::pivot_longer(
|
||||||
cols = c(p10, p50, p90),
|
cols = c(p10, p50, p90),
|
||||||
names_to = "percentile",
|
names_to = "percentile",
|
||||||
|
|
@ -856,7 +895,7 @@ compute_ci_benchmarks <- function(ci_quadrant_data, estate_name, percentiles = c
|
||||||
|
|
||||||
# Rename columns for clarity
|
# Rename columns for clarity
|
||||||
benchmarks <- benchmarks %>%
|
benchmarks <- benchmarks %>%
|
||||||
dplyr::select(DOY, ci_type, percentile, benchmark_value)
|
dplyr::select(DAH, ci_type, percentile, benchmark_value)
|
||||||
|
|
||||||
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
|
safe_log(paste("Computed CI benchmarks for estate", estate_name, "with", length(unique_seasons), "seasons and", nrow(benchmarks), "benchmark points"), "INFO")
|
||||||
|
|
||||||
|
|
@ -1043,7 +1082,7 @@ get_field_priority_level <- function(cv, morans_i) {
|
||||||
#'
|
#'
|
||||||
#' @param field_name Name of the field to summarize
|
#' @param field_name Name of the field to summarize
|
||||||
#' @param field_details_table Data frame with field-level KPI details
|
#' @param field_details_table Data frame with field-level KPI details
|
||||||
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DOY, season columns
|
#' @param CI_quadrant Data frame containing CI quadrant data with Date, DAH, season columns
|
||||||
#' @param report_date Report date (used for filtering current season data)
|
#' @param report_date Report date (used for filtering current season data)
|
||||||
#' @return Formatted text string with field KPI summary
|
#' @return Formatted text string with field KPI summary
|
||||||
#'
|
#'
|
||||||
|
|
@ -1064,10 +1103,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
|
||||||
}
|
}
|
||||||
current_season <- current_season_data %>% pull(season)
|
current_season <- current_season_data %>% pull(season)
|
||||||
|
|
||||||
# Get the most recent DOY from the current season
|
# Get the most recent DAH from the current season
|
||||||
field_age_data <- CI_quadrant %>%
|
field_age_data <- CI_quadrant %>%
|
||||||
filter(field == field_name, season == current_season) %>%
|
filter(field == field_name, season == current_season) %>%
|
||||||
pull(DOY)
|
pull(DAH)
|
||||||
|
|
||||||
field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_
|
field_age <- if (length(field_age_data) > 0) max(field_age_data, na.rm = TRUE) else NA_real_
|
||||||
# Filter data for this specific field
|
# Filter data for this specific field
|
||||||
|
|
@ -1082,7 +1121,7 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
|
||||||
# For categorical data, take the most common value or highest risk level
|
# For categorical data, take the most common value or highest risk level
|
||||||
field_summary <- field_data %>%
|
field_summary <- field_data %>%
|
||||||
summarise(
|
summarise(
|
||||||
field_size = sum(`Field Size (ha)`, na.rm = TRUE),
|
field_size = sum(`Field Size (acres)`, na.rm = TRUE),
|
||||||
uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
|
uniformity_levels = paste(unique(`Growth Uniformity`), collapse = "/"),
|
||||||
avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)),
|
avg_yield_forecast = ifelse(is.na(`Yield Forecast (t/ha)`[1]), NA, mean(`Yield Forecast (t/ha)`, na.rm = TRUE)),
|
||||||
max_gap_score = max(`Gap Score`, na.rm = TRUE),
|
max_gap_score = max(`Gap Score`, na.rm = TRUE),
|
||||||
|
|
@ -1117,10 +1156,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
|
||||||
}
|
}
|
||||||
|
|
||||||
kpi_text <- paste0(
|
kpi_text <- paste0(
|
||||||
"Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels,
|
"Size: ", round(field_summary$field_size * 0.404686, 1), " ha | Mean CI: ", round(field_summary$avg_mean_ci, 2),
|
||||||
|
" | Growth Uniformity: ", field_summary$uniformity_levels,
|
||||||
" | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1),
|
" | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1),
|
||||||
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk,
|
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk
|
||||||
" | Mean CI: ", round(field_summary$avg_mean_ci, 2)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Wrap in smaller text HTML tags for Word output
|
# Wrap in smaller text HTML tags for Word output
|
||||||
|
|
@ -1134,4 +1173,33 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' Normalize field_details_table column structure
|
||||||
|
#'
|
||||||
|
#' Standardizes column names and ensures all expected KPI columns exist.
|
||||||
|
#' Handles Field → Field_id rename and injects missing columns as NA.
|
||||||
|
#'
|
||||||
|
#' @param field_details_table data.frame to normalize
|
||||||
|
#' @return data.frame with standardized column structure
|
||||||
|
normalize_field_details_columns <- function(field_details_table) {
|
||||||
|
if (is.null(field_details_table) || nrow(field_details_table) == 0) {
|
||||||
|
return(field_details_table)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Rename Field → Field_id if needed
|
||||||
|
if ("Field" %in% names(field_details_table) && !("Field_id" %in% names(field_details_table))) {
|
||||||
|
field_details_table <- field_details_table %>%
|
||||||
|
dplyr::rename(Field_id = Field)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure all expected KPI columns exist; add as NA if missing
|
||||||
|
expected_cols <- c("Field_id", "Mean_CI", "CV", "TCH_Forecasted", "Gap_Score",
|
||||||
|
"Trend_Interpretation", "Weekly_CI_Change", "Uniformity_Interpretation",
|
||||||
|
"Decline_Severity", "Patchiness_Risk")
|
||||||
|
for (col in expected_cols) {
|
||||||
|
if (!col %in% names(field_details_table)) {
|
||||||
|
field_details_table[[col]] <- NA
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(field_details_table)
|
||||||
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -15,5 +15,12 @@
|
||||||
"vcs.ignore.cellar": true,
|
"vcs.ignore.cellar": true,
|
||||||
"vcs.ignore.library": true,
|
"vcs.ignore.library": true,
|
||||||
"vcs.ignore.local": true,
|
"vcs.ignore.local": true,
|
||||||
"vcs.manage.ignores": true
|
"vcs.manage.ignores": true,
|
||||||
|
"ignored.directories": [
|
||||||
|
"old_sh",
|
||||||
|
"r_app/old_scripts",
|
||||||
|
"r_app/experiments",
|
||||||
|
"python_app/experiments",
|
||||||
|
"webapps"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue