Enhance project configuration and reporting utilities; update .gitignore for PNG exceptions, add CI change thresholds, and improve historical data handling in KPI calculations.

This commit is contained in:
Timon 2026-02-18 10:54:42 +01:00
parent af5c53e084
commit f1821dab59
8 changed files with 224 additions and 61 deletions

6
r_app/.gitignore vendored
View file

@ -8,8 +8,14 @@ renv
*.tmp
*.swp
*.save
# Ignore ALL PNG files by default (generated outputs, analysis plots, etc.)
*.png
# EXCEPTIONS: Explicitly track intentional PNG assets
# Uncomment or add lines below for PNG files that should be committed to git
!r_app/CI_graph_example.png
# Ignore files related to Rproj
.Rproj.user/
.Rhistory

View file

@ -24,8 +24,6 @@ library(tidyr)
library(readxl)
library(writexl)
library(spdep)
library(caret)
library(CAST)
# ============================================================================
# SHARED HELPER FUNCTIONS (NOW IN 80_UTILS_COMMON.R)
@ -588,7 +586,7 @@ create_summary_tables <- function(all_kpis) {
#' @param current_year Current year
#'
#' @return Data frame with one row per field, all KPI columns
create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_week, current_year) {
create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_week, current_year, current_stats = NULL) {
# Start with field identifiers AND field_idx for joining
result <- field_boundaries_sf %>%
@ -602,6 +600,20 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
) %>%
select(field_idx, Field_id, Field_name, Week, Year)
# ============================================
# GROUP 0: MEAN CI (from field statistics)
# ============================================
if (!is.null(current_stats)) {
result <- result %>%
left_join(
current_stats %>%
select(Field_id, Mean_CI),
by = "Field_id"
)
} else {
result$Mean_CI <- NA_real_
}
# ============================================
# GROUP 1: FIELD UNIFORMITY (KPI 1)
# ============================================
@ -609,7 +621,8 @@ create_field_detail_table <- function(field_boundaries_sf, all_kpis, current_wee
left_join(
all_kpis$uniformity %>%
select(field_idx, CV = cv_value,
Uniformity_Category = uniformity_category),
Uniformity_Category = uniformity_category,
Uniformity_Interpretation = interpretation),
by = "field_idx"
)
@ -915,9 +928,95 @@ calculate_all_field_analysis_agronomic_support <- function(
data_dir = data_dir, project_dir = project_dir)
message("Calculating KPI 4: Growth Decline...")
growth_decline_kpi <- calculate_growth_decline_kpi(
ci_pixels_by_field
)
# Load historical field statistics to build weekly mean CI time series per field
# (growth_decline_kpi expects temporal series, not spatial pixel arrays)
weekly_mean_ci_by_field <- list()
# Build list of weekly mean CI values for each field (4-week lookback)
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
weekly_ci_values <- c()
}
# Try to load historical data for trend calculation
if (!is.null(output_dir) && !is.null(project_dir)) {
tryCatch({
historical_data <- load_historical_field_data(
project_dir = project_dir,
current_week = current_week,
current_year = current_year,
reports_dir = output_dir,
num_weeks = 4,
auto_generate = FALSE,
field_boundaries_sf = field_boundaries_sf
)
if (!is.null(historical_data) && length(historical_data) > 0) {
message(" Building weekly mean CI time series from historical data...")
# Initialize list with empty vectors for each field
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
weekly_mean_ci_by_field[[field_idx]] <- c()
}
# Extract Mean_CI from each historical week (reverse order to go chronologically)
for (hist_idx in rev(seq_along(historical_data))) {
hist_week <- historical_data[[hist_idx]]
hist_data <- hist_week$data
# Extract Mean_CI column if available
if ("Mean_CI" %in% names(hist_data)) {
# Match fields between historical data and field_boundaries
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
field_name <- field_boundaries_sf$field[field_idx]
# Find matching row in historical data by field name/ID
field_row <- which(
(hist_data$Field_id == field_name | hist_data$Field_name == field_name) &
!is.na(hist_data$Mean_CI)
)
if (length(field_row) > 0) {
mean_ci_val <- as.numeric(hist_data$Mean_CI[field_row[1]])
if (!is.na(mean_ci_val)) {
weekly_mean_ci_by_field[[field_idx]] <- c(weekly_mean_ci_by_field[[field_idx]], mean_ci_val)
}
}
}
}
}
message(paste(" ✓ Loaded weekly Mean_CI for", sum(sapply(weekly_mean_ci_by_field, length) > 0), "fields"))
}
}, error = function(e) {
message(paste(" Note: Could not load historical field data for trend analysis:", e$message))
})
}
# If no historical data available, create empty vectors (will result in "Insufficient data")
if (length(weekly_mean_ci_by_field) == 0 || all(sapply(weekly_mean_ci_by_field, length) == 0)) {
message(" Warning: No historical weekly CI data available - using current week only")
for (field_idx in seq_len(nrow(field_boundaries_sf))) {
# Use current week mean CI as single-point series (insufficient for trend)
if (!is.null(current_stats) && nrow(current_stats) > 0) {
field_name <- field_boundaries_sf$field[field_idx]
matching_row <- which(
(current_stats$Field_id == field_name | current_stats$Field_name == field_name) &
!is.na(current_stats$Mean_CI)
)
if (length(matching_row) > 0) {
weekly_mean_ci_by_field[[field_idx]] <- c(as.numeric(current_stats$Mean_CI[matching_row[1]]))
} else {
weekly_mean_ci_by_field[[field_idx]] <- NA_real_
}
} else {
weekly_mean_ci_by_field[[field_idx]] <- NA_real_
}
}
}
# Calculate growth decline using weekly time series (not spatial pixel arrays)
growth_decline_kpi <- calculate_growth_decline_kpi(weekly_mean_ci_by_field)
message("Calculating KPI 5: Field Patchiness...")
# Calculate patchiness using both Gini coefficient and Moran's I spatial clustering
@ -943,6 +1042,16 @@ calculate_all_field_analysis_agronomic_support <- function(
# Use the common wrapper function (same as cane supply)
gap_scores_result <- calculate_gap_scores(per_field_files, field_boundaries_sf)
# Guard against NULL or empty result from calculate_gap_scores
if (is.null(gap_scores_result) || nrow(gap_scores_result) == 0) {
message(" Warning: calculate_gap_scores returned NULL/empty - creating fallback")
gap_scores_result <- data.frame(
Field_id = field_boundaries_sf$field,
gap_score = NA_real_,
stringsAsFactors = FALSE
)
}
# Convert to the format expected by orchestrator
gap_filling_kpi <- gap_scores_result %>%
mutate(field_idx = match(Field_id, field_boundaries_sf$field)) %>%
@ -999,7 +1108,8 @@ calculate_all_field_analysis_agronomic_support <- function(
field_boundaries_sf = field_boundaries_sf,
all_kpis = all_kpis,
current_week = current_week,
current_year = current_year
current_year = current_year,
current_stats = current_stats
)
# Create summary tables

View file

@ -27,6 +27,15 @@ library(tidyr)
library(readxl)
library(writexl)
# ============================================================================
# ALERT THRESHOLDS & CONFIGURATION CONSTANTS
# ============================================================================
# CI change thresholds for alert categorization
# These values are project-standard and should be consistent across all workflows
CI_CHANGE_DECLINE_THRESHOLD <- -0.5 # Weekly CI change threshold for decline alerts
CI_CHANGE_INCREASE_THRESHOLD <- 0.5 # Weekly CI change threshold for increase alerts
# ============================================================================
# ANGATA-SPECIFIC HELPER FUNCTIONS (Placeholder Section)
# ============================================================================
@ -139,7 +148,7 @@ categorize_cv_trend_long_term <- function(cv_slope) {
#' Determine status alert based on harvest probability and crop health
#' Priority order:
#' 1. harvest_ready (imminent + mature ≥12 months)
#' 2. decline_stress (drop ≥2 points but still >1.5)
#' 2. decline_stress (drop ≥CI_CHANGE_DECLINE_THRESHOLD but still >1.5)
#' 3. harvested_bare (Mean CI < 1.5)
#' @param imminent_prob Numeric harvest probability
#' @param age_week Numeric age in weeks
@ -152,8 +161,8 @@ calculate_status_alert <- function(imminent_prob, age_week, weekly_ci_change, me
return("harvest_ready")
}
# Priority 2: Strong decline
if (!is.na(weekly_ci_change) && weekly_ci_change <= -2.0 && !is.na(mean_ci) && mean_ci > 1.5) {
# Priority 2: Strong decline (using configurable threshold)
if (!is.na(weekly_ci_change) && weekly_ci_change <= CI_CHANGE_DECLINE_THRESHOLD && !is.na(mean_ci) && mean_ci > 1.5) {
return("decline_stress")
}
@ -582,7 +591,7 @@ calculate_field_analysis_cane_supply <- function(setup,
# ========== PHASE 6: LOAD HARVEST PROBABILITIES ==========
message("\n4. Loading harvest probabilities from script 31...")
harvest_prob_dir <- file.path(data_dir, "..", "reports", "kpis", "field_stats")
harvest_prob_dir <- setup$kpi_field_stats_dir
harvest_prob_file <- file.path(harvest_prob_dir,
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, current_week, current_year))
message(paste(" Looking for:", harvest_prob_file))
@ -634,13 +643,23 @@ calculate_field_analysis_cane_supply <- function(setup,
# print(head(field_analysis_df[, available_cols], 10))
# }
# # ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ==========
# ========== PHASE 10: CALCULATE FARM-LEVEL KPIS ==========
# farm_kpi_results <- calculate_farm_level_kpis(
# field_analysis_df,
# current_week,
# current_year,
# end_date
# )
# For now, farm-level KPIs are not implemented in CANE_SUPPLY workflow
farm_kpi_results <- NULL
# ========== RETURN RESULTS ==========
return(list(
field_analysis_df = field_analysis_df,
farm_kpi_results = farm_kpi_results,
export_paths = export_paths
))
}
# ============================================================================

View file

@ -17,6 +17,25 @@
# centralized in the orchestrator script.
# ============================================================================
# ============================================================================
# LOAD PROJECT CONFIGURATION (Guard against re-sourcing)
# ============================================================================
# Ensure parameters_project.R has been sourced to provide global configuration
# (PROJECT, data_dir, field_boundaries_path, etc.). Use a sentinel to avoid double-sourcing.
if (!exists("PROJECT", envir = .GlobalEnv)) {
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
# Fallback: try relative path if here() doesn't work
tryCatch({
source("parameters_project.R")
}, error = function(e2) {
warning(paste("Could not source parameters_project.R:", e2$message,
"- using defaults or expecting caller to set PROJECT/data_dir"))
})
})
}
# ============================================================================
# CONSTANTS (from 80_calculate_kpis.R)
# ============================================================================
@ -495,8 +514,15 @@ calculate_gap_scores <- function(per_field_files, field_boundaries_sf) {
dplyr::summarise(gap_score = mean(gap_score, na.rm = TRUE), .groups = "drop")
message(paste(" ✓ Calculated gap scores for", nrow(gap_scores_df), "fields"))
message(paste(" Gap score range:", round(min(gap_scores_df$gap_score, na.rm=TRUE), 2), "-",
round(max(gap_scores_df$gap_score, na.rm=TRUE), 2), "%"))
# Guard against all-NA values which would produce Inf/-Inf warnings
if (any(is.finite(gap_scores_df$gap_score))) {
min_score <- round(min(gap_scores_df$gap_score, na.rm = TRUE), 2)
max_score <- round(max(gap_scores_df$gap_score, na.rm = TRUE), 2)
message(paste(" Gap score range:", min_score, "-", max_score, "%"))
} else {
message(" Gap score range: All values are NA (no valid gap scores)")
}
} else {
message(" WARNING: No gap scores calculated from per-field mosaics")
gap_scores_df <- NULL
@ -645,6 +671,8 @@ load_harvest_data <- function(data_dir) {
if (all(required_cols %in% names(harvesting_data))) {
# Convert to data frame and ensure column types
harvesting_data <- as.data.frame(harvesting_data)
# CRITICAL: Coerce field to character to preserve leading zeros (e.g., "01", "02")
harvesting_data$field <- as.character(harvesting_data$field)
harvesting_data$year <- as.numeric(harvesting_data$year)
harvesting_data$tonnage_ha <- as.numeric(harvesting_data$tonnage_ha)
@ -664,7 +692,12 @@ load_harvest_data <- function(data_dir) {
# Fallback: create empty data frame if loading failed
if (is.null(harvesting_data)) {
message(" WARNING: No harvest data available. TCH yield prediction will use graceful fallback (NA values)")
harvesting_data <- data.frame(field = character(), year = numeric(), tonnage_ha = numeric())
harvesting_data <- data.frame(
field = character(), # Explicitly character to preserve leading zeros when data is added
year = numeric(),
tonnage_ha = numeric(),
stringsAsFactors = FALSE
)
}
return(harvesting_data)

View file

@ -376,13 +376,13 @@ prev_week_1_date <- report_date_obj - 7
prev_week_2_date <- report_date_obj - 14
prev_week_3_date <- report_date_obj - 21
week_minus_1 <- lubridate::isoweek(prev_week_1_date)
week_minus_1 <- sprintf("%02d", lubridate::isoweek(prev_week_1_date))
week_minus_1_year <- lubridate::isoyear(prev_week_1_date)
week_minus_2 <- lubridate::isoweek(prev_week_2_date)
week_minus_2 <- sprintf("%02d", lubridate::isoweek(prev_week_2_date))
week_minus_2_year <- lubridate::isoyear(prev_week_2_date)
week_minus_3 <- lubridate::isoweek(prev_week_3_date)
week_minus_3 <- sprintf("%02d", lubridate::isoweek(prev_week_3_date))
week_minus_3_year <- lubridate::isoyear(prev_week_3_date)
# Format current week with leading zeros
@ -468,10 +468,14 @@ if (!is.null(CI_quadrant) && nrow(CI_quadrant) > 0) {
<span style="font-size:100pt; line-height:1.0; font-weight:700;">Satellite Based Field Reporting</span>
:::
::: {custom-style="Cover_subtitle" style="text-align:center; margin-top:18px;"}
<span style="font-size:20pt; font-weight:600;">Chlorophyll Index (CI) Monitoring Report — `r toupper(params$data_dir)` Farm (Week `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); if (!is.null(params$week)) params$week else format(rd, '%V') }`, `r { rd <- params$report_date; rd <- if (inherits(rd, "Date")) rd else suppressWarnings(as.Date(rd)); if (is.na(rd)) rd <- Sys.Date(); format(rd, '%Y') }`)</span>
:::
\newpage
## Report Summary
**Farm Location:** `r toupper(project_dir)` Estate
@ -907,8 +911,8 @@ tryCatch({
# Aggregate mosaics for three weeks: current, week-1, week-3
farm_mosaic_current <- aggregate_mosaics_safe(current_week, current_iso_year, "current week")
farm_mosaic_minus_1 <- aggregate_mosaics_safe(week_minus_1, week_minus_1_year, "week-1")
farm_mosaic_minus_3 <- aggregate_mosaics_safe(week_minus_3, week_minus_3_year, "week-3")
farm_mosaic_minus_1 <- aggregate_mosaics_safe(as.numeric(week_minus_1), week_minus_1_year, "week-1")
farm_mosaic_minus_3 <- aggregate_mosaics_safe(as.numeric(week_minus_3), week_minus_3_year, "week-3")
# Extract CI band (5th band, or named "CI") from each aggregated mosaic
farm_ci_current <- NULL
@ -1547,7 +1551,6 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
select(
Field = Field_id,
`Field Size (acres)` = field_size_acres,
`Growth Uniformity` = Uniformity_Interpretation,
`Mean CI` = Mean_CI,
`Weekly CI Change` = Weekly_CI_Change,
`Yield Forecast (t/ha)` = TCH_Forecasted,
@ -1561,7 +1564,6 @@ if (!exists("field_details_table") || is.null(field_details_table) || nrow(field
select(
Field = Field_id,
`Field Size (acres)` = field_size_acres,
`Growth Uniformity` = Uniformity_Interpretation,
`Mean CI` = Mean_CI,
`Yield Forecast (t/ha)` = TCH_Forecasted,
`Gap Score` = Gap_Score,
@ -1616,8 +1618,9 @@ The Chlorophyll Index (CI) is a vegetation index that measures the relative amou
CI values typically range from 0 (bare soil or severely stressed vegetation) to 7+ (very healthy, dense vegetation). For sugarcane, values between 3-7 generally indicate good crop health, depending on the growth stage.
<div align="center">
![Chlorophyll Index Example](CI_graph_example.png)
</div>
### What You'll Find in This Report:

View file

@ -487,7 +487,7 @@ if (exists("summary_data") && !is.null(summary_data) && "field_analysis" %in% na
## 1.1 Overview of cane supply area, showing zones with number of acres being harvest ready
```{r overview_map, fig.width=9, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
```{r overview_map, fig.width=8, fig.height=7, fig.align="center", echo=FALSE, message=FALSE, warning=FALSE}
# Create a hexbin overview map with ggplot
tryCatch({
# Use per-field field_analysis data from RDS (already loaded in load_kpi_data chunk)
@ -643,7 +643,7 @@ tryCatch({
})
```
\newpage
## 1.2 Key Performance Indicators
```{r combined_kpi_table, echo=FALSE, results='asis'}
@ -931,9 +931,9 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
```{r ci_fig, echo=FALSE, fig.align='right', out.width='40%', fig.cap="Chlorophyll Index Example"}
knitr::include_graphics("CI_graph_example.png")
```
<div align="center">
![Chlorophyll Index Example](CI_graph_example.png)
</div>
### Data File Structure and Columns

View file

@ -623,13 +623,19 @@ detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_
#' Detect mosaic mode from project structure
#'
#' Determine mosaic architecture (legacy detection function)
#'
#' NOTE: This is a legacy function kept for backward compatibility.
#' The project has moved to per-field (single-file) architecture.
#' `weekly_tile_max` is no longer created in all_dirs, so this will always return "single-file"
#'
#' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics
#'
#' @param project_dir Character. Project name
#' @return Character. "tiled" or "single-file"
#' @return Character. "tiled" or "single-file" (now always "single-file")
detect_mosaic_mode <- function(project_dir) {
# Per-field architecture is standard - always return "single-file"
# unless weekly_tile_max directory exists with content
# Legacy support: check if weekly_tile_max exists (it won't in standard setup)
mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) {
@ -662,11 +668,15 @@ get_project_storage_path <- function(project_dir, subdir = NULL) {
return(path)
}
#' Get mosaic directory
#' Get mosaic directory (legacy function)
#'
#' NOTE: This is a legacy helper kept for backward compatibility.
#' In the standard per-field workflow, this returns weekly_mosaic directory.
#' The "tiled" mode is no longer created (weekly_tile_max_dir was removed from all_dirs).
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
#' @param mosaic_mode Character. "tiled" or "single-file" (auto-detects if "auto")
#' @return Character. Full path to mosaic directory (typically weekly_mosaic)
get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
if (mosaic_mode == "auto") {
mosaic_mode <- detect_mosaic_mode(project_dir)
@ -718,11 +728,14 @@ check_harvest_output_exists <- function(project_dir, week_num, year_num) {
file.exists(path)
}
#' Get mosaic verification directory
#' Get mosaic verification directory (legacy function)
#'
#' NOTE: This is a legacy helper kept for backward compatibility.
#' Standard workflow uses weekly_mosaic; tiled mode is no longer created.
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
#' @return Character. Full path to mosaic directory for verification
get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
base <- file.path("laravel_app", "storage", "app", project_dir)

View file

@ -802,30 +802,9 @@ get_week_path <- function(mosaic_path, input_date, week_offset) {
target_week <- sprintf("%02d", lubridate::isoweek(target_date)) # Left-pad week number with a zero if needed
target_year <- lubridate::isoyear(target_date)
# Primary approach: Try single-file mosaic path first
# Load single-file mosaic for the given week
path_to_week <- here::here(mosaic_path, paste0("week_", target_week, "_", target_year, ".tif"))
# Smart fallback: If single-file doesn't exist AND path contains "weekly_mosaic", check for tiles
if (!file.exists(path_to_week) && grepl("weekly_mosaic", mosaic_path)) {
# Try to locate tile-based mosaics in weekly_tile_max instead
tile_mosaic_path <- sub("weekly_mosaic", "weekly_tile_max", mosaic_path)
# Look for any tile files matching the week pattern (e.g., week_XX_YYYY_00.tif, week_XX_YYYY_01.tif, etc.)
if (dir.exists(tile_mosaic_path)) {
tile_files <- list.files(tile_mosaic_path,
pattern = paste0("^week_", target_week, "_", target_year, "_(\\d{2})\\.tif$"),
full.names = TRUE)
if (length(tile_files) > 0) {
# Found tiles - return the first tile as primary, note that multiple tiles exist
safe_log(paste("Single-file mosaic not found for week", target_week, target_year,
"but found", length(tile_files), "tile files in weekly_tile_max. Using tile approach."), "INFO")
# Return first tile - caller should aggregate if needed
path_to_week <- tile_files[1] # Return first tile; downstream can handle multiple tiles
}
}
}
# Log the path calculation
safe_log(paste("Calculated path for week", target_week, "of year", target_year, ":", path_to_week), "INFO")
@ -1169,10 +1148,10 @@ generate_field_kpi_summary <- function(field_name, field_details_table, CI_quadr
}
kpi_text <- paste0(
"Size: ", round(field_summary$field_size, 1), " ha | Growth Uniformity: ", field_summary$uniformity_levels,
"Size: ", round(field_summary$field_size * 0.404686, 1), " ha | Mean CI: ", round(field_summary$avg_mean_ci, 2),
" | Growth Uniformity: ", field_summary$uniformity_levels,
" | ", yield_text, " | Gap Score: ", round(field_summary$max_gap_score, 1),
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk,
" | Mean CI: ", round(field_summary$avg_mean_ci, 2)
" | Decline Risk: ", field_summary$highest_decline_risk, " | Weed Risk: ", field_summary$highest_weed_risk
)
# Wrap in smaller text HTML tags for Word output