SmartCane/r_app/experiments/crop_messaging/crop_analysis_messaging.R
2025-09-05 15:23:41 +02:00

1278 lines
50 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# CROP_ANALYSIS_MESSAGING.R
# =========================
# This script analyzes weekly CI mosaics to detect changes and generate automated messages
# about crop conditions. It compares two weeks of data to assess:
# - Field uniformity (high vs low variation)
# - CI change trends (increase, stable, decrease)
# - Generates contextual messages based on analysis
# - Outputs results in multiple formats: WhatsApp/Word text, CSV, and Markdown
#
# Usage: Rscript crop_analysis_messaging.R [current_week] [previous_week] [estate_name]
# - current_week: Current week number (e.g., 30)
# - previous_week: Previous week number (e.g., 29)
# - estate_name: Estate name (e.g., "simba", "chemba")
#
# Examples:
# Rscript crop_analysis_messaging.R 32 31 simba
# Rscript crop_analysis_messaging.R 30 29 chemba
#
# The script automatically:
# 1. Loads the correct estate configuration
# 2. Analyzes weekly mosaics
# 3. Generates field-by-field analysis
# 4. Creates output files in multiple formats
# 5. Displays WhatsApp-ready text in console
#
# 1. Load required packages
# -----------------------
suppressPackageStartupMessages({
library(sf)
library(terra)
library(tidyverse)
library(lubridate)
library(here)
library(spdep) # For spatial statistics
})
# 2. Analysis configuration
# -----------------------
# Thresholds for change detection
CI_CHANGE_INCREASE_THRESHOLD <- 0.5
CI_CHANGE_DECREASE_THRESHOLD <- -0.5
# Thresholds for field uniformity (coefficient of variation as decimal)
UNIFORMITY_THRESHOLD <- 0.15 # Below this = good uniformity, above = requires attention
EXCELLENT_UNIFORMITY_THRESHOLD <- 0.08 # Below this = excellent uniformity
POOR_UNIFORMITY_THRESHOLD <- 0.25 # Above this = poor uniformity, urgent attention needed
# Thresholds for spatial clustering (adjusted for agricultural fields)
# Agricultural fields naturally have spatial autocorrelation, so higher thresholds are needed
MORAN_THRESHOLD_HIGH <- 0.95 # Above this = very strong clustering (problematic patterns)
MORAN_THRESHOLD_MODERATE <- 0.85 # Above this = moderate clustering
MORAN_THRESHOLD_LOW <- 0.7 # Above this = normal field continuity
# Threshold for acceptable area percentage
ACCEPTABLE_AREA_THRESHOLD <- 40 # Below this percentage = management issue
# 3. Helper functions
# -----------------
#' Calculate uniformity metrics using terra statistics (optimized)
#' @param mean_val Mean CI value from terra
#' @param sd_val Standard deviation from terra
#' @param median_val Median CI value from terra
#' @param min_val Minimum CI value from terra
#' @param max_val Maximum CI value from terra
#' @param values Raw values for quantile calculations only
#' @return List with various uniformity metrics (all scaled to be comparable)
calculate_uniformity_metrics_terra <- function(mean_val, sd_val, median_val, min_val, max_val, values) {
if (is.na(mean_val) || length(values) < 2) return(list(
cv = NA, iqr_cv = NA, range_cv = NA,
mad_cv = NA, percentile_cv = NA, interpretation = "insufficient_data"
))
# 1. Coefficient of variation (from terra) - already normalized
cv <- sd_val / mean_val
# 2. IQR-based CV (IQR/median) - using R's built-in IQR function
iqr_val <- IQR(values, na.rm = TRUE)
iqr_cv <- iqr_val / median_val
# 3. Range-based CV (range/mean) - using terra min/max
range_val <- max_val - min_val
range_cv <- range_val / mean_val
# 4. MAD-based CV (MAD/median) - using R's built-in mad function
mad_val <- mad(values, constant = 1.4826, na.rm = TRUE) # scaled to match SD for normal distribution
mad_cv <- mad_val / median_val
# 5. Percentile-based CV (P90-P10)/mean - using R's built-in quantile
percentiles <- quantile(values, c(0.1, 0.9), na.rm = TRUE)
percentile_cv <- (percentiles[2] - percentiles[1]) / mean_val
# Interpretation based on CV thresholds (all metrics now comparable)
# CV < 0.15 = Very uniform, 0.15-0.30 = Moderate variation, 0.30-0.50 = High variation, >0.50 = Very high variation
interpret_uniformity <- function(metric_value) {
if (is.na(metric_value)) return("unknown")
if (metric_value < 0.15) return("very uniform")
if (metric_value < 0.30) return("moderate variation")
if (metric_value < 0.50) return("high variation")
return("very high variation")
}
return(list(
cv = cv,
iqr_cv = iqr_cv,
range_cv = range_cv,
mad_cv = mad_cv,
percentile_cv = percentile_cv,
cv_interpretation = interpret_uniformity(cv),
iqr_interpretation = interpret_uniformity(iqr_cv),
mad_interpretation = interpret_uniformity(mad_cv),
percentile_interpretation = interpret_uniformity(percentile_cv)
))
}
#' Calculate percentage within acceptable range using terra mean
#' Acceptable range = within 25% of the field mean CI value
#' This indicates what percentage of the field has "normal" performance
#' @param mean_val Mean CI value from terra
#' @param values Raw CI values
#' @param threshold_factor Factor to multiply mean by for acceptable range (default 0.25 = 25%)
#' @return Percentage of values within acceptable range
calculate_acceptable_percentage_terra <- function(mean_val, values, threshold_factor = 0.25) {
values <- values[!is.na(values) & is.finite(values)]
if (length(values) < 2 || is.na(mean_val)) return(NA)
threshold <- mean_val * threshold_factor # 25% of mean as default
within_range <- abs(values - mean_val) <= threshold
percentage <- (sum(within_range) / length(values)) * 100
return(percentage)
}
#' Calculate coefficient of variation for uniformity assessment
#' @param values Numeric vector of CI values
#' @return Coefficient of variation (CV) as decimal
calculate_cv <- function(values) {
values <- values[!is.na(values) & is.finite(values)]
if (length(values) < 2) return(NA)
cv <- sd(values) / mean(values) # Keep as decimal
return(cv)
}
#' Calculate Shannon entropy for spatial heterogeneity assessment
#' Higher entropy = more heterogeneous/variable field
#' Lower entropy = more homogeneous/uniform field
#' @param values Numeric vector of CI values
#' @param n_bins Number of bins for histogram (default 10)
#' @return Shannon entropy value
calculate_entropy <- function(values, n_bins = 10) {
values <- values[!is.na(values) & is.finite(values)]
if (length(values) < 2) return(NA)
# Create histogram bins
value_range <- range(values)
breaks <- seq(value_range[1], value_range[2], length.out = n_bins + 1)
# Count values in each bin
bin_counts <- hist(values, breaks = breaks, plot = FALSE)$counts
# Calculate probabilities (remove zero counts)
probabilities <- bin_counts[bin_counts > 0] / sum(bin_counts)
# Calculate Shannon entropy: H = -sum(p * log(p))
entropy <- -sum(probabilities * log(probabilities))
return(entropy)
}
#' Calculate percentage of field with positive vs negative change
#' @param current_values Current week CI values
#' @param previous_values Previous week CI values
#' @return List with percentage of positive and negative change areas
calculate_change_percentages <- function(current_values, previous_values) {
# Ensure same length (should be from same field boundaries)
if (length(current_values) != length(previous_values)) {
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
}
# Calculate pixel-wise change
change_values <- current_values - previous_values
valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)]
if (length(valid_changes) < 2) {
return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA))
}
# Count positive, negative, and stable areas
positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100
negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100
stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100
return(list(
positive_pct = positive_pct,
negative_pct = negative_pct,
stable_pct = stable_pct
))
}
#' Calculate spatial autocorrelation (Moran's I) for a field
#' @param ci_raster Terra raster of CI values
#' @param field_boundary Terra vector of field boundary
#' @return List with Moran's I statistic and p-value
calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) {
tryCatch({
# Crop and mask raster to field boundary
field_raster <- terra::crop(ci_raster, field_boundary)
field_raster <- terra::mask(field_raster, field_boundary)
# Convert to points for spatial analysis
raster_points <- terra::as.points(field_raster, na.rm = TRUE)
# Check if we have enough points
if (length(raster_points) < 10) {
return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data"))
}
# Convert to sf for spdep
points_sf <- sf::st_as_sf(raster_points)
# Create spatial weights matrix (k-nearest neighbors)
coords <- sf::st_coordinates(points_sf)
# Use adaptive number of neighbors based on sample size
k_neighbors <- min(8, max(4, floor(nrow(coords) / 10)))
knn_nb <- spdep::knearneigh(coords, k = k_neighbors)
knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE)
# Calculate Moran's I
ci_values <- points_sf[[1]] # First column contains CI values
moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE)
# Interpret results
morans_i <- moran_result$estimate[1]
p_value <- moran_result$p.value
interpretation <- if (is.na(morans_i)) {
"insufficient_data"
} else if (p_value > 0.05) {
"random" # Not significant spatial pattern
} else if (morans_i > MORAN_THRESHOLD_HIGH) {
"very_strong_clustering" # Very strong clustering - may indicate management issues
} else if (morans_i > MORAN_THRESHOLD_MODERATE) {
"strong_clustering" # Strong clustering - worth monitoring
} else if (morans_i > MORAN_THRESHOLD_LOW) {
"normal_continuity" # Normal field continuity - expected for uniform fields
} else if (morans_i > 0.3) {
"weak_clustering" # Some clustering present
} else if (morans_i < -0.3) {
"dispersed" # Checkerboard pattern
} else {
"low_autocorrelation" # Low spatial autocorrelation
}
return(list(
morans_i = morans_i,
p_value = p_value,
interpretation = interpretation
))
}, error = function(e) {
warning(paste("Error calculating spatial autocorrelation:", e$message))
return(list(morans_i = NA, p_value = NA, interpretation = "error"))
})
}
#' Calculate percentage of field in extreme values using simple threshold
#' Hotspots = areas with CI > mean + 1.5*SD (high-performing areas)
#' Coldspots = areas with CI < mean - 1.5*SD (underperforming areas)
#' @param values Numeric vector of CI values
#' @param threshold_multiplier Standard deviation multiplier (default 1.5)
#' @return List with percentage of hotspots and coldspots
calculate_extreme_percentages_simple <- function(values, threshold_multiplier = 1.5) {
if (length(values) < 10) return(list(hotspot_pct = NA, coldspot_pct = NA, method = "insufficient_data"))
mean_val <- mean(values, na.rm = TRUE)
sd_val <- sd(values, na.rm = TRUE)
# Hotspots: significantly ABOVE average (good performance)
upper_threshold <- mean_val + (threshold_multiplier * sd_val)
# Coldspots: significantly BELOW average (poor performance)
lower_threshold <- mean_val - (threshold_multiplier * sd_val)
hotspot_pct <- sum(values > upper_threshold, na.rm = TRUE) / length(values) * 100
coldspot_pct <- sum(values < lower_threshold, na.rm = TRUE) / length(values) * 100
return(list(
hotspot_pct = hotspot_pct,
coldspot_pct = coldspot_pct,
method = "simple_threshold",
threshold_used = threshold_multiplier
))
}
#' Categorize CI change based on thresholds
#' @param change_value Mean change in CI between weeks
#' @return Character string: "increase", "stable", or "decrease"
categorize_change <- function(change_value) {
if (is.na(change_value)) return("unknown")
if (change_value >= CI_CHANGE_INCREASE_THRESHOLD) return("increase")
if (change_value <= CI_CHANGE_DECREASE_THRESHOLD) return("decrease")
return("stable")
}
#' Categorize field uniformity based on coefficient of variation and spatial pattern
#' @param cv_value Coefficient of variation (primary uniformity metric)
#' @param spatial_info List with spatial autocorrelation results
#' @param extreme_pct List with hotspot/coldspot percentages
#' @param acceptable_pct Percentage of field within acceptable range
#' @return Character string describing field uniformity pattern
categorize_uniformity_enhanced <- function(cv_value, spatial_info, extreme_pct, acceptable_pct = NA) {
if (is.na(cv_value)) return("unknown variation")
# Check for poor uniformity first (urgent issues)
if (cv_value > POOR_UNIFORMITY_THRESHOLD || (!is.na(acceptable_pct) && acceptable_pct < ACCEPTABLE_AREA_THRESHOLD)) {
return("poor uniformity - urgent attention needed")
}
# Check for excellent uniformity
if (cv_value <= EXCELLENT_UNIFORMITY_THRESHOLD && (!is.na(acceptable_pct) && acceptable_pct >= 45)) {
return("excellent uniformity")
}
# Check for good uniformity
if (cv_value <= UNIFORMITY_THRESHOLD) {
return("good uniformity")
}
# Field has moderate variation - determine if localized or distributed
spatial_pattern <- spatial_info$interpretation
hotspot_pct <- extreme_pct$hotspot_pct
coldspot_pct <- extreme_pct$coldspot_pct
# Determine pattern type based on CV (primary) and spatial pattern (secondary)
if (spatial_pattern %in% c("very_strong_clustering") && !is.na(hotspot_pct) && (hotspot_pct > 15 || coldspot_pct > 5)) {
# Very strong clustering with substantial extreme areas - likely problematic
if (hotspot_pct > coldspot_pct) {
return("localized high-performing areas")
} else if (coldspot_pct > hotspot_pct) {
return("localized problem areas")
} else {
return("localized hotspots and coldspots")
}
} else if (spatial_pattern %in% c("strong_clustering") && !is.na(hotspot_pct) && (hotspot_pct > 10 || coldspot_pct > 3)) {
# Strong clustering with moderate extreme areas
if (hotspot_pct > coldspot_pct) {
return("localized high-performing areas")
} else if (coldspot_pct > hotspot_pct) {
return("localized problem areas")
} else {
return("clustered variation")
}
} else {
# Normal field continuity or weak patterns - rely primarily on CV
return("moderate variation")
}
}
#' Generate enhanced message based on analysis results including spatial patterns
#' @param uniformity_category Character: enhanced uniformity category with spatial info
#' @param change_category Character: "increase", "stable", or "decrease"
#' @param extreme_pct List with hotspot/coldspot percentages
#' @param acceptable_pct Percentage of field within acceptable range
#' @param morans_i Moran's I value for additional context
#' @param growth_stage Character: growth stage (simplified for now)
#' @return List with message and worth_sending flag
generate_enhanced_message <- function(uniformity_category, change_category, extreme_pct, acceptable_pct = NA, morans_i = NA, growth_stage = "vegetation stage") {
# Enhanced message matrix based on spatial patterns
messages <- list()
# Poor uniformity scenarios (urgent)
if (uniformity_category == "poor uniformity - urgent attention needed") {
messages <- list(
"stable" = list(
message = "🚨 URGENT: Poor field uniformity detected - immediate management review required",
worth_sending = TRUE
),
"decrease" = list(
message = "🚨 CRITICAL: Poor uniformity with declining trend - emergency intervention needed",
worth_sending = TRUE
),
"increase" = list(
message = "⚠️ CAUTION: Improving but still poor uniformity - continue intensive monitoring",
worth_sending = TRUE
)
)
}
# Excellent uniformity scenarios
else if (uniformity_category == "excellent uniformity") {
messages <- list(
"stable" = list(
message = "✅ Excellent: Optimal field uniformity and stability",
worth_sending = FALSE
),
"decrease" = list(
message = "⚠️ Alert: Excellent uniformity but declining - investigate cause early",
worth_sending = TRUE
),
"increase" = list(
message = "🌟 Outstanding: Excellent uniformity with continued improvement",
worth_sending = FALSE
)
)
}
# Good uniformity scenarios
else if (uniformity_category == "good uniformity") {
# Check for very strong clustering which may indicate management issues
if (!is.na(morans_i) && morans_i > MORAN_THRESHOLD_HIGH) {
messages <- list(
"stable" = list(
message = "⚠️ Alert: Good uniformity but very strong clustering detected - check management practices",
worth_sending = TRUE
),
"decrease" = list(
message = "🚨 Alert: Good uniformity declining with clustering patterns - targeted intervention needed",
worth_sending = TRUE
),
"increase" = list(
message = "✅ Good: Improving uniformity but monitor clustering patterns",
worth_sending = FALSE
)
)
} else {
messages <- list(
"stable" = list(
message = "✅ Good: Stable field with good uniformity",
worth_sending = FALSE
),
"decrease" = list(
message = "⚠️ Alert: Good uniformity but declining trend - early intervention recommended",
worth_sending = TRUE
),
"increase" = list(
message = "✅ Great: Good uniformity with improvement trend",
worth_sending = FALSE
)
)
}
}
# Moderate variation scenarios
else if (uniformity_category == "moderate variation") {
acceptable_msg <- if (!is.na(acceptable_pct) && acceptable_pct < 45) " - low acceptable area" else ""
messages <- list(
"stable" = list(
message = paste0("⚠️ Alert: Moderate field variation detected", acceptable_msg, " - review management uniformity"),
worth_sending = TRUE
),
"decrease" = list(
message = paste0("🚨 Alert: Moderate variation with declining trend", acceptable_msg, " - intervention needed"),
worth_sending = TRUE
),
"increase" = list(
message = paste0("📈 Monitor: Improving but still moderate variation", acceptable_msg, " - continue optimization"),
worth_sending = FALSE
)
)
}
# Localized problem areas
else if (uniformity_category == "localized problem areas") {
hotspot_pct <- round(extreme_pct$hotspot_pct, 1)
coldspot_pct <- round(extreme_pct$coldspot_pct, 1)
messages <- list(
"stable" = list(
message = paste0("🚨 Alert: Problem zones detected (", coldspot_pct, "% underperforming) - targeted intervention needed"),
worth_sending = TRUE
),
"decrease" = list(
message = paste0("🚨 URGENT: Problem areas expanding with overall decline (", coldspot_pct, "% affected) - immediate action required"),
worth_sending = TRUE
),
"increase" = list(
message = paste0("⚠️ Caution: Overall improvement but ", coldspot_pct, "% problem areas remain - monitor closely"),
worth_sending = TRUE
)
)
}
# Localized high-performing areas
else if (uniformity_category == "localized high-performing areas") {
hotspot_pct <- round(extreme_pct$hotspot_pct, 1)
messages <- list(
"stable" = list(
message = paste0("💡 Opportunity: ", hotspot_pct, "% of field performing well - replicate conditions in remaining areas"),
worth_sending = FALSE
),
"decrease" = list(
message = paste0("⚠️ Alert: High-performing areas (", hotspot_pct, "%) declining - investigate cause to prevent spread"),
worth_sending = TRUE
),
"increase" = list(
message = paste0("🌟 Excellent: High-performing areas (", hotspot_pct, "%) expanding - excellent management practices"),
worth_sending = FALSE
)
)
}
# Clustered variation (general)
else if (uniformity_category == "clustered variation") {
messages <- list(
"stable" = list(
message = "⚠️ Alert: Clustered variation detected - investigate spatial management patterns",
worth_sending = TRUE
),
"decrease" = list(
message = "🚨 Alert: Clustered decline pattern - targeted investigation needed",
worth_sending = TRUE
),
"increase" = list(
message = "📈 Monitor: Clustered improvement - identify and replicate successful practices",
worth_sending = FALSE
)
)
}
# Default fallback
else {
messages <- list(
"stable" = list(message = "❓ Field analysis inconclusive - manual review recommended", worth_sending = FALSE),
"decrease" = list(message = "⚠️ Field showing decline - investigation recommended", worth_sending = TRUE),
"increase" = list(message = "📈 Field showing improvement", worth_sending = FALSE)
)
}
# Return appropriate message
if (change_category %in% names(messages)) {
return(messages[[change_category]])
} else {
return(list(
message = paste("❓ Analysis inconclusive -", uniformity_category, "with", change_category, "trend"),
worth_sending = FALSE
))
}
}
#' Create Word document with analysis results
#' @param field_results List of field analysis results
#' @param current_week Current week number
#' @param previous_week Previous week number
#' @param project_dir Project directory name
#' @param output_dir Directory to save the Word document
#' @return Path to the created Word document
# REMOVED - Word report creation functionality
#' Load and analyze a weekly mosaic for individual fields with spatial analysis
#' @param week_file_path Path to the weekly mosaic file
#' @param field_boundaries_sf SF object with field boundaries
#' @return List with CI statistics per field including spatial metrics
analyze_weekly_mosaic <- function(week_file_path, field_boundaries_sf) {
if (!file.exists(week_file_path)) {
warning(paste("Mosaic file not found:", week_file_path))
return(NULL)
}
tryCatch({
# Load the raster and select only the CI band (5th band)
mosaic_raster <- terra::rast(week_file_path)
ci_raster <- mosaic_raster[[5]] # Select the CI band
names(ci_raster) <- "CI"
# Convert field boundaries to terra vect for extraction
field_boundaries_vect <- terra::vect(field_boundaries_sf)
# Extract CI values for each field
field_results <- list()
for (i in seq_len(nrow(field_boundaries_sf))) {
field_name <- field_boundaries_sf$field[i]
sub_field_name <- field_boundaries_sf$sub_field[i]
# Check and get field area from geojson if available
field_area_ha <- NA
if ("area_ha" %in% colnames(field_boundaries_sf)) {
field_area_ha <- field_boundaries_sf$area_ha[i]
} else if ("AREA_HA" %in% colnames(field_boundaries_sf)) {
field_area_ha <- field_boundaries_sf$AREA_HA[i]
} else if ("area" %in% colnames(field_boundaries_sf)) {
field_area_ha <- field_boundaries_sf$area[i]
} else {
# Calculate area from geometry as fallback
field_geom <- field_boundaries_sf[i,]
if (sf::st_is_longlat(field_geom)) {
# For geographic coordinates, transform to projected for area calculation
field_geom <- sf::st_transform(field_geom, 3857) # Web Mercator
}
field_area_ha <- as.numeric(sf::st_area(field_geom)) / 10000 # Convert to hectares
}
cat("Processing field:", field_name, "-", sub_field_name, "(", round(field_area_ha, 1), "ha)\n")
# Extract values for this specific field
field_vect <- field_boundaries_vect[i]
# Extract with built-in statistics from terra (PRIMARY METHOD)
terra_stats <- terra::extract(ci_raster, field_vect, fun = c("mean", "sd", "min", "max", "median"), na.rm = TRUE)
# Extract raw values for additional calculations and validation
ci_values <- terra::extract(ci_raster, field_vect, fun = NULL)
# Flatten and clean the values
field_values <- unlist(ci_values)
valid_values <- field_values[!is.na(field_values) & is.finite(field_values)]
if (length(valid_values) > 0) {
# Use TERRA as primary calculations
primary_mean <- terra_stats$mean[1]
primary_sd <- terra_stats$sd[1]
primary_cv <- primary_sd / primary_mean
primary_median <- terra_stats$median[1]
primary_min <- terra_stats$min[1]
primary_max <- terra_stats$max[1]
# Manual calculations for validation only
manual_mean <- mean(valid_values)
manual_cv <- sd(valid_values) / manual_mean
basic_stats <- list(
field = field_name,
sub_field = sub_field_name,
# PRIMARY statistics (terra-based)
mean_ci = primary_mean,
median_ci = primary_median,
sd_ci = primary_sd,
cv = primary_cv,
min_ci = primary_min,
max_ci = primary_max,
# Store raw values for change analysis
raw_values = valid_values,
# Other metrics using terra values
acceptable_pct = calculate_acceptable_percentage_terra(primary_mean, valid_values),
n_pixels = length(valid_values),
# Field area from geojson
field_area_ha = field_area_ha
)
# Calculate spatial statistics
spatial_info <- calculate_spatial_autocorrelation(ci_raster, field_vect)
extreme_pct <- calculate_extreme_percentages_simple(valid_values)
# Calculate entropy for additional uniformity measure
entropy_value <- calculate_entropy(valid_values)
# Enhanced uniformity categorization
uniformity_category <- categorize_uniformity_enhanced(
basic_stats$cv,
spatial_info,
extreme_pct,
basic_stats$acceptable_pct
)
# Combine all results
field_stats <- c(
basic_stats,
list(
spatial_autocorr = spatial_info,
extreme_percentages = extreme_pct,
entropy = entropy_value,
uniformity_category = uniformity_category
)
)
field_results[[paste0(field_name, "_", sub_field_name)]] <- field_stats
} else {
warning(paste("No valid CI values found for field:", field_name, sub_field_name))
}
}
return(field_results)
}, error = function(e) {
warning(paste("Error analyzing mosaic:", e$message))
return(NULL)
})
}
# 4. Core analysis functions for modular use
# ----------------------------------------
#' Run crop analysis for any estate
#' @param estate_name Character: name of the estate (e.g., "simba", "chemba")
#' @param current_week Numeric: current week number
#' @param previous_week Numeric: previous week number
#' @param year Numeric: year (default 2025)
#' @return List with analysis results
run_estate_analysis <- function(estate_name, current_week, previous_week, year = 2025) {
cat("=== CROP ANALYSIS MESSAGING SYSTEM ===\n")
cat("Analyzing:", toupper(estate_name), "estate\n")
cat("Comparing week", previous_week, "vs week", current_week, "of", year, "\n\n")
# Set project_dir globally for parameters_project.R
assign("project_dir", estate_name, envir = .GlobalEnv)
# Load project configuration
tryCatch({
source("parameters_project.R")
cat("✓ Project configuration loaded\n")
}, error = function(e) {
tryCatch({
source(here::here("r_app", "parameters_project.R"))
cat("✓ Project configuration loaded from r_app directory\n")
}, error = function(e) {
stop("Failed to load project configuration")
})
})
# Verify required variables are available
if (!exists("weekly_CI_mosaic") || !exists("field_boundaries_sf")) {
stop("Required project variables not initialized. Check project configuration.")
}
# Construct file paths for weekly mosaics
current_week_file <- sprintf("week_%02d_%d.tif", current_week, year)
previous_week_file <- sprintf("week_%02d_%d.tif", previous_week, year)
current_week_path <- file.path(weekly_CI_mosaic, current_week_file)
previous_week_path <- file.path(weekly_CI_mosaic, previous_week_file)
cat("Looking for files:\n")
cat("- Current week:", current_week_path, "\n")
cat("- Previous week:", previous_week_path, "\n\n")
# Analyze both weeks for all fields
cat("Analyzing weekly mosaics per field...\n")
current_field_stats <- analyze_weekly_mosaic(current_week_path, field_boundaries_sf)
previous_field_stats <- analyze_weekly_mosaic(previous_week_path, field_boundaries_sf)
if (is.null(current_field_stats) || is.null(previous_field_stats)) {
stop("Could not analyze one or both weekly mosaics")
}
# Generate field results
field_results <- generate_field_results(current_field_stats, previous_field_stats, current_week, previous_week)
return(list(
estate_name = estate_name,
current_week = current_week,
previous_week = previous_week,
year = year,
field_results = field_results,
current_field_stats = current_field_stats,
previous_field_stats = previous_field_stats
))
}
#' Generate analysis results for all fields
#' @param current_field_stats Analysis results for current week
#' @param previous_field_stats Analysis results for previous week
#' @param current_week Current week number
#' @param previous_week Previous week number
#' @return List with field results
generate_field_results <- function(current_field_stats, previous_field_stats, current_week, previous_week) {
field_results <- list()
# Get common field names between both weeks
common_fields <- intersect(names(current_field_stats), names(previous_field_stats))
for (field_id in common_fields) {
current_field <- current_field_stats[[field_id]]
previous_field <- previous_field_stats[[field_id]]
# Calculate change metrics for this field
ci_change <- current_field$mean_ci - previous_field$mean_ci
change_category <- categorize_change(ci_change)
# Calculate spatial change percentages
change_percentages <- calculate_change_percentages(
current_field$raw_values,
previous_field$raw_values
)
# Use enhanced uniformity category from current week analysis
uniformity_category <- current_field$uniformity_category
# Generate enhanced message for this field
message_result <- generate_enhanced_message(
uniformity_category,
change_category,
current_field$extreme_percentages,
current_field$acceptable_pct,
current_field$spatial_autocorr$morans_i
)
# Store results
field_results[[field_id]] <- list(
current_stats = current_field,
previous_stats = previous_field,
ci_change = ci_change,
change_category = change_category,
change_percentages = change_percentages,
uniformity_category = uniformity_category,
message_result = message_result
)
}
return(field_results)
}
#' Format analysis results for WhatsApp/Word copy-paste
#' @param analysis_results Results from run_estate_analysis
#' @return Character string with formatted text
format_for_whatsapp <- function(analysis_results) {
field_results <- analysis_results$field_results
estate_name <- toupper(analysis_results$estate_name)
current_week <- analysis_results$current_week
previous_week <- analysis_results$previous_week
output <- c()
output <- c(output, paste("🌾", estate_name, "CROP ANALYSIS"))
output <- c(output, paste("📅 Week", current_week, "vs Week", previous_week))
output <- c(output, "")
# Summary statistics
alert_count <- sum(sapply(field_results, function(x) x$message_result$worth_sending))
total_fields <- length(field_results)
# Calculate total area and area statistics
total_hectares <- sum(sapply(field_results, function(x) x$current_stats$field_area_ha), na.rm = TRUE)
output <- c(output, "📊 SUMMARY:")
output <- c(output, paste("• Estate:", estate_name))
output <- c(output, paste("• Fields analyzed:", total_fields))
output <- c(output, paste("• Total area:", round(total_hectares, 1), "ha"))
output <- c(output, paste("• Alerts needed:", alert_count))
output <- c(output, "")
# Field-by-field alerts only
if (alert_count > 0) {
output <- c(output, "🚨 PRIORITY FIELDS:")
for (field_id in names(field_results)) {
field_info <- field_results[[field_id]]
if (field_info$message_result$worth_sending) {
field_name <- paste(field_info$current_stats$field, field_info$current_stats$sub_field, sep="-")
area <- round(field_info$current_stats$field_area_ha, 1)
message <- field_info$message_result$message
output <- c(output, paste("•", field_name, paste0("(", area, "ha):"), message))
}
}
} else {
output <- c(output, "✅ No urgent alerts - all fields stable")
}
# Quick farm summary
output <- c(output, "")
output <- c(output, "📈 QUICK STATS:")
# Calculate improving vs declining areas
total_improving <- sum(sapply(field_results, function(x) {
if (!is.na(x$change_percentages$positive_pct)) {
(x$change_percentages$positive_pct / 100) * x$current_stats$field_area_ha
} else 0
}), na.rm = TRUE)
total_declining <- sum(sapply(field_results, function(x) {
if (!is.na(x$change_percentages$negative_pct)) {
(x$change_percentages$negative_pct / 100) * x$current_stats$field_area_ha
} else 0
}), na.rm = TRUE)
improving_pct <- (total_improving / total_hectares) * 100
declining_pct <- (total_declining / total_hectares) * 100
output <- c(output, paste("• Improving areas:", round(total_improving, 1), "ha (", round(improving_pct, 1), "%)"))
output <- c(output, paste("• Declining areas:", round(total_declining, 1), "ha (", round(declining_pct, 1), "%)"))
# Overall trend
if (improving_pct > declining_pct) {
trend_diff <- round(improving_pct - declining_pct, 1)
output <- c(output, paste("• Trend: ✅ POSITIVE (+", trend_diff, "%)"))
} else if (declining_pct > improving_pct) {
trend_diff <- round(declining_pct - improving_pct, 1)
output <- c(output, paste("• Trend: ⚠️ NEGATIVE (-", trend_diff, "%)"))
} else {
output <- c(output, "• Trend: BALANCED")
}
return(paste(output, collapse = "\n"))
}
#' Format analysis results as CSV data
#' @param analysis_results Results from run_estate_analysis
#' @return Data frame ready for write.csv
format_as_csv <- function(analysis_results) {
field_results <- analysis_results$field_results
estate_name <- analysis_results$estate_name
current_week <- analysis_results$current_week
previous_week <- analysis_results$previous_week
csv_data <- data.frame()
for (field_id in names(field_results)) {
field_info <- field_results[[field_id]]
row_data <- data.frame(
estate = estate_name,
field = field_info$current_stats$field,
sub_field = field_info$current_stats$sub_field,
area_ha = round(field_info$current_stats$field_area_ha, 2),
current_week = current_week,
previous_week = previous_week,
current_week_ci = round(field_info$current_stats$mean_ci, 3),
previous_week_ci = round(field_info$previous_stats$mean_ci, 3),
ci_change = round(field_info$ci_change, 3),
change_category = field_info$change_category,
cv = round(field_info$current_stats$cv, 3),
uniformity_category = field_info$uniformity_category,
acceptable_pct = round(field_info$current_stats$acceptable_pct, 1),
hotspot_pct = round(field_info$current_stats$extreme_percentages$hotspot_pct, 1),
coldspot_pct = round(field_info$current_stats$extreme_percentages$coldspot_pct, 1),
morans_i = round(field_info$current_stats$spatial_autocorr$morans_i, 3),
alert_needed = field_info$message_result$worth_sending,
message = field_info$message_result$message,
stringsAsFactors = FALSE
)
csv_data <- rbind(csv_data, row_data)
}
return(csv_data)
}
#' Format analysis results as markdown table
#' @param analysis_results Results from run_estate_analysis
#' @return Character string with markdown table
format_as_markdown_table <- function(analysis_results) {
field_results <- analysis_results$field_results
estate_name <- toupper(analysis_results$estate_name)
current_week <- analysis_results$current_week
previous_week <- analysis_results$previous_week
output <- c()
output <- c(output, paste("# Crop Analysis Summary -", estate_name, "Estate"))
output <- c(output, paste("**Analysis Period:** Week", previous_week, "vs Week", current_week))
output <- c(output, "")
output <- c(output, "| Field | Area (ha) | Current CI | Change | Uniformity | Alert | Message |")
output <- c(output, "|-------|-----------|------------|--------|------------|-------|---------|")
for (field_id in names(field_results)) {
field_info <- field_results[[field_id]]
field_name <- paste(field_info$current_stats$field, field_info$current_stats$sub_field, sep="-")
area <- round(field_info$current_stats$field_area_ha, 1)
current_ci <- round(field_info$current_stats$mean_ci, 3)
change <- field_info$change_category
uniformity <- field_info$uniformity_category
alert <- if(field_info$message_result$worth_sending) "🚨 YES" else "✅ NO"
message <- field_info$message_result$message
row <- paste("|", field_name, "|", area, "|", current_ci, "|", change, "|", uniformity, "|", alert, "|", message, "|")
output <- c(output, row)
}
return(paste(output, collapse = "\n"))
}
#' Save analysis outputs in multiple formats
#' @param analysis_results Results from run_estate_analysis
#' @param output_dir Directory to save files (optional)
#' @return List with file paths created
save_analysis_outputs <- function(analysis_results, output_dir = NULL) {
estate_name <- analysis_results$estate_name
current_week <- analysis_results$current_week
previous_week <- analysis_results$previous_week
# Create output directory if not specified
if (is.null(output_dir)) {
output_dir <- file.path("output", estate_name)
}
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE)
timestamp <- format(Sys.time(), "%Y%m%d_%H%M")
base_filename <- paste0("crop_analysis_w", current_week, "vs", previous_week, "_", timestamp)
# Generate different output formats
whatsapp_text <- format_for_whatsapp(analysis_results)
csv_data <- format_as_csv(analysis_results)
markdown_table <- format_as_markdown_table(analysis_results)
# Save files
whatsapp_file <- file.path(output_dir, paste0(base_filename, "_whatsapp.txt"))
csv_file <- file.path(output_dir, paste0(base_filename, "_data.csv"))
markdown_file <- file.path(output_dir, paste0(base_filename, "_table.md"))
writeLines(whatsapp_text, whatsapp_file)
write.csv(csv_data, csv_file, row.names = FALSE)
writeLines(markdown_table, markdown_file)
# Display summary
cat("\n=== OUTPUT FILES CREATED ===\n")
cat("📱 WhatsApp format:", whatsapp_file, "\n")
cat("📊 CSV file:", csv_file, "\n")
cat("📝 Markdown table:", markdown_file, "\n")
# Display WhatsApp format in console for immediate copy
cat("\n=== WHATSAPP/WORD READY FORMAT ===\n")
cat("(Copy text below directly to WhatsApp or Word)\n")
cat(rep("=", 50), "\n")
cat(whatsapp_text)
cat("\n", rep("=", 50), "\n")
return(list(
whatsapp_file = whatsapp_file,
csv_file = csv_file,
markdown_file = markdown_file
))
}
# 5. Main analysis function (now uses modular approach)
# ---------------------------------------------------
main <- function() {
# Capture command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Process arguments with defaults
current_week <- if (length(args) >= 1 && !is.na(args[1])) {
as.numeric(args[1])
} else {
32 # Default for proof of concept
}
previous_week <- if (length(args) >= 2 && !is.na(args[2])) {
as.numeric(args[2])
} else {
31 # Default for proof of concept
}
estate_name <- if (length(args) >= 3 && !is.na(args[3])) {
as.character(args[3])
} else {
"simba" # Default estate
}
year <- 2025 # Current year - could be made dynamic
# Run the modular analysis
analysis_results <- run_estate_analysis(estate_name, current_week, previous_week, year)
field_results <- analysis_results$field_results
# 6. Display detailed field-by-field analysis
# ------------------------------------------
cat("=== FIELD-BY-FIELD ANALYSIS ===\n\n")
for (field_id in names(field_results)) {
field_info <- field_results[[field_id]]
current_field <- field_info$current_stats
previous_field <- field_info$previous_stats
ci_change <- field_info$ci_change
change_category <- field_info$change_category
change_percentages <- field_info$change_percentages
uniformity_category <- field_info$uniformity_category
message_result <- field_info$message_result
# Print enhanced field analysis
cat("FIELD:", current_field$field, "-", current_field$sub_field, "\n")
cat("- Field size:", round(current_field$field_area_ha, 1), "hectares\n")
cat("- Week", previous_week, "CI:", round(previous_field$mean_ci, 3), "\n")
cat("- Week", current_week, "CI:", round(current_field$mean_ci, 3), "\n")
cat("- Terra stats: Mean =", round(current_field$mean_ci, 3),
", CV =", round(current_field$cv, 3),
", Range = [", round(current_field$min_ci, 2), "-", round(current_field$max_ci, 2), "]\n")
cat("- Within acceptable range (±25% of mean):", round(current_field$acceptable_pct, 1), "%\n")
# Display primary uniformity metrics (CV and Entropy)
cat("- Field uniformity: CV =", round(current_field$cv, 3))
if (current_field$cv < EXCELLENT_UNIFORMITY_THRESHOLD) {
cat(" (excellent)")
} else if (current_field$cv < UNIFORMITY_THRESHOLD) {
cat(" (good)")
} else if (current_field$cv < 0.30) {
cat(" (moderate)")
} else if (current_field$cv < 0.50) {
cat(" (high variation)")
} else {
cat(" (very high variation)")
}
# Add entropy information
if (!is.na(current_field$entropy)) {
cat(", Entropy =", round(current_field$entropy, 3))
# Entropy interpretation (higher = more heterogeneous)
# Adjusted thresholds to better match CV patterns
if (current_field$entropy < 1.3) {
cat(" (very uniform)")
} else if (current_field$entropy < 1.5) {
cat(" (uniform)")
} else if (current_field$entropy < 1.7) {
cat(" (moderate heterogeneity)")
} else {
cat(" (high heterogeneity)")
}
}
cat("\n")
cat("- Change: Mean =", round(ci_change, 3), "(", change_category, ")")
if (!is.na(change_percentages$positive_pct)) {
# Calculate hectares for this field using field area from geojson
field_hectares <- current_field$field_area_ha
improving_hectares <- (change_percentages$positive_pct / 100) * field_hectares
declining_hectares <- (change_percentages$negative_pct / 100) * field_hectares
cat(", Areas: ", round(change_percentages$positive_pct, 1), "% (", round(improving_hectares, 1), " ha) improving, ",
round(change_percentages$negative_pct, 1), "% (", round(declining_hectares, 1), " ha) declining\n")
} else {
cat("\n")
}
cat("- Spatial Pattern:", uniformity_category, "\n")
# Add spatial details if available
if (!is.na(current_field$spatial_autocorr$morans_i)) {
cat("- Moran's I:", round(current_field$spatial_autocorr$morans_i, 3),
"(", current_field$spatial_autocorr$interpretation, ")")
# Add agricultural context explanation for Moran's I
moran_val <- current_field$spatial_autocorr$morans_i
if (moran_val >= 0.7 && moran_val < 0.85) {
cat(" - normal field continuity")
} else if (moran_val >= 0.85 && moran_val < 0.95) {
cat(" - strong spatial pattern")
} else if (moran_val >= 0.95) {
cat(" - very strong clustering, monitor for management issues")
} else if (moran_val < 0.7 && moran_val > 0.3) {
cat(" - moderate spatial pattern")
} else {
cat(" - unusual spatial pattern for crop field")
}
cat("\n")
}
if (!is.na(current_field$extreme_percentages$hotspot_pct)) {
cat("- Extreme areas: ", round(current_field$extreme_percentages$hotspot_pct, 1),
"% hotspots (high-performing), ", round(current_field$extreme_percentages$coldspot_pct, 1),
"% coldspots (underperforming)")
# Show method used for extreme detection
if (!is.null(current_field$extreme_percentages$method)) {
if (current_field$extreme_percentages$method == "getis_ord_gi_star") {
cat(" [Getis-Ord Gi*]")
} else if (current_field$extreme_percentages$method == "simple_sd") {
cat(" [Simple SD]")
}
}
cat("\n")
}
cat("- Message:", message_result$message, "\n")
cat("- Alert needed:", if(message_result$worth_sending) "YES 🚨" else "NO", "\n\n")
}
# 7. Summary of alerts
# ------------------
alert_fields <- sapply(field_results, function(x) x$message_result$worth_sending)
total_alerts <- sum(alert_fields)
cat("=== SUMMARY ===\n")
cat("Total fields analyzed:", length(field_results), "\n")
cat("Fields requiring alerts:", total_alerts, "\n")
if (total_alerts > 0) {
cat("\nFields needing attention:\n")
for (field_id in names(field_results)[alert_fields]) {
field_info <- field_results[[field_id]]
cat("-", field_info$current_stats$field, "-", field_info$current_stats$sub_field,
":", field_info$message_result$message, "\n")
}
}
# 8. Farm-wide analysis summary table (using modular data)
# -------------------------------------------------------
cat("\n=== FARM-WIDE ANALYSIS SUMMARY ===\n")
# Field uniformity statistics with detailed categories
excellent_fields <- sapply(field_results, function(x) x$current_stats$cv <= EXCELLENT_UNIFORMITY_THRESHOLD)
good_fields <- sapply(field_results, function(x) x$current_stats$cv > EXCELLENT_UNIFORMITY_THRESHOLD & x$current_stats$cv <= UNIFORMITY_THRESHOLD)
moderate_fields <- sapply(field_results, function(x) x$current_stats$cv > UNIFORMITY_THRESHOLD & x$current_stats$cv <= POOR_UNIFORMITY_THRESHOLD)
poor_fields <- sapply(field_results, function(x) x$current_stats$cv > POOR_UNIFORMITY_THRESHOLD)
n_excellent <- sum(excellent_fields)
n_good <- sum(good_fields)
n_moderate <- sum(moderate_fields)
n_poor <- sum(poor_fields)
n_uniform_total <- n_excellent + n_good # Total uniform fields (CV ≤ 0.20)
# Calculate farm-wide area statistics (use field area from geojson)
total_hectares <- 0
total_improving_hectares <- 0
total_declining_hectares <- 0
total_stable_hectares <- 0
for (field_id in names(field_results)) {
field_info <- field_results[[field_id]]
change_pct <- field_info$change_percentages
field_hectares <- field_info$current_stats$field_area_ha
if (!is.na(change_pct$positive_pct) && !is.na(field_hectares)) {
total_hectares <- total_hectares + field_hectares
total_improving_hectares <- total_improving_hectares + (change_pct$positive_pct / 100 * field_hectares)
total_declining_hectares <- total_declining_hectares + (change_pct$negative_pct / 100 * field_hectares)
total_stable_hectares <- total_stable_hectares + (change_pct$stable_pct / 100 * field_hectares)
}
}
# Calculate farm-wide percentages
farm_improving_pct <- (total_improving_hectares / total_hectares) * 100
farm_declining_pct <- (total_declining_hectares / total_hectares) * 100
farm_stable_pct <- (total_stable_hectares / total_hectares) * 100
# Display summary table
cat("\nFIELD UNIFORMITY SUMMARY:\n")
cat("│ Uniformity Level │ Count │ Percent │\n")
cat(sprintf("│ Excellent (CV≤%.2f) │ %5d │ %6.1f%% │\n", EXCELLENT_UNIFORMITY_THRESHOLD, n_excellent, (n_excellent/length(field_results))*100))
cat(sprintf("│ Good (CV %.2f-%.2f) │ %5d │ %6.1f%% │\n", EXCELLENT_UNIFORMITY_THRESHOLD, UNIFORMITY_THRESHOLD, n_good, (n_good/length(field_results))*100))
cat(sprintf("│ Moderate (CV %.2f-%.2f) │ %5d │ %6.1f%% │\n", UNIFORMITY_THRESHOLD, POOR_UNIFORMITY_THRESHOLD, n_moderate, (n_moderate/length(field_results))*100))
cat(sprintf("│ Poor (CV>%.2f) │ %5d │ %6.1f%% │\n", POOR_UNIFORMITY_THRESHOLD, n_poor, (n_poor/length(field_results))*100))
cat(sprintf("│ Total fields │ %5d │ %6.1f%% │\n", length(field_results), 100.0))
cat("\nFARM-WIDE AREA CHANGE SUMMARY:\n")
cat("│ Change Type │ Hectares│ Percent │\n")
cat(sprintf("│ Improving areas │ %7.1f │ %6.1f%% │\n", total_improving_hectares, farm_improving_pct))
cat(sprintf("│ Stable areas │ %7.1f │ %6.1f%% │\n", total_stable_hectares, farm_stable_pct))
cat(sprintf("│ Declining areas │ %7.1f │ %6.1f%% │\n", total_declining_hectares, farm_declining_pct))
cat(sprintf("│ Total area │ %7.1f │ %6.1f%% │\n", total_hectares, 100.0))
# Additional insights
cat("\nKEY INSIGHTS:\n")
cat(sprintf("• %d%% of fields have good uniformity (CV ≤ %.2f)\n", round((n_uniform_total/length(field_results))*100), UNIFORMITY_THRESHOLD))
cat(sprintf("• %d%% of fields have excellent uniformity (CV ≤ %.2f)\n", round((n_excellent/length(field_results))*100), EXCELLENT_UNIFORMITY_THRESHOLD))
cat(sprintf("• %.1f hectares (%.1f%%) of farm area is improving week-over-week\n", total_improving_hectares, farm_improving_pct))
cat(sprintf("• %.1f hectares (%.1f%%) of farm area is declining week-over-week\n", total_declining_hectares, farm_declining_pct))
cat(sprintf("• Total farm area analyzed: %.1f hectares\n", total_hectares))
if (farm_improving_pct > farm_declining_pct) {
cat(sprintf("• Overall trend: POSITIVE (%.1f%% more area improving than declining)\n", farm_improving_pct - farm_declining_pct))
} else if (farm_declining_pct > farm_improving_pct) {
cat(sprintf("• Overall trend: NEGATIVE (%.1f%% more area declining than improving)\n", farm_declining_pct - farm_improving_pct))
} else {
cat("• Overall trend: BALANCED (equal improvement and decline)\n")
}
# 9. Generate and save multiple output formats
# ------------------------------------------
saved_files <- save_analysis_outputs(analysis_results)
# 10. Analysis complete
# ------------------
cat("\n=== ANALYSIS COMPLETE ===\n")
cat("All field analysis results, farm-wide summary, and output files created.\n")
# Return results for potential further processing
invisible(analysis_results)
}
# Run main function if script is called directly
if (sys.nframe() == 0) {
main()
}