# # CROP_ANALYSIS_MESSAGING.R # ========================= # This script analyzes weekly CI mosaics to detect changes and generate automated messages # about crop conditions. It compares two weeks of data to assess: # - Field uniformity (high vs low variation) # - CI change trends (increase, stable, decrease) # - Generates contextual messages based on analysis # - Outputs results in multiple formats: WhatsApp/Word text, CSV, and Markdown # # Usage: Rscript crop_analysis_messaging.R [current_week] [previous_week] [estate_name] # - current_week: Current week number (e.g., 30) # - previous_week: Previous week number (e.g., 29) # - estate_name: Estate name (e.g., "simba", "chemba") # # Examples: # Rscript crop_analysis_messaging.R 32 31 simba # Rscript crop_analysis_messaging.R 30 29 chemba # # The script automatically: # 1. Loads the correct estate configuration # 2. Analyzes weekly mosaics # 3. Generates field-by-field analysis # 4. Creates output files in multiple formats # 5. Displays WhatsApp-ready text in console # # 1. Load required packages # ----------------------- suppressPackageStartupMessages({ library(sf) library(terra) library(tidyverse) library(lubridate) library(here) library(spdep) # For spatial statistics }) # 2. Analysis configuration # ----------------------- # Thresholds for change detection CI_CHANGE_INCREASE_THRESHOLD <- 0.5 CI_CHANGE_DECREASE_THRESHOLD <- -0.5 # Thresholds for field uniformity (coefficient of variation as decimal) UNIFORMITY_THRESHOLD <- 0.15 # Below this = good uniformity, above = requires attention EXCELLENT_UNIFORMITY_THRESHOLD <- 0.08 # Below this = excellent uniformity POOR_UNIFORMITY_THRESHOLD <- 0.25 # Above this = poor uniformity, urgent attention needed # Thresholds for spatial clustering (adjusted for agricultural fields) # Agricultural fields naturally have spatial autocorrelation, so higher thresholds are needed MORAN_THRESHOLD_HIGH <- 0.95 # Above this = very strong clustering (problematic patterns) MORAN_THRESHOLD_MODERATE <- 0.85 # Above this = moderate clustering MORAN_THRESHOLD_LOW <- 0.7 # Above this = normal field continuity # Threshold for acceptable area percentage ACCEPTABLE_AREA_THRESHOLD <- 40 # Below this percentage = management issue # 3. Helper functions # ----------------- #' Calculate uniformity metrics using terra statistics (optimized) #' @param mean_val Mean CI value from terra #' @param sd_val Standard deviation from terra #' @param median_val Median CI value from terra #' @param min_val Minimum CI value from terra #' @param max_val Maximum CI value from terra #' @param values Raw values for quantile calculations only #' @return List with various uniformity metrics (all scaled to be comparable) calculate_uniformity_metrics_terra <- function(mean_val, sd_val, median_val, min_val, max_val, values) { if (is.na(mean_val) || length(values) < 2) return(list( cv = NA, iqr_cv = NA, range_cv = NA, mad_cv = NA, percentile_cv = NA, interpretation = "insufficient_data" )) # 1. Coefficient of variation (from terra) - already normalized cv <- sd_val / mean_val # 2. IQR-based CV (IQR/median) - using R's built-in IQR function iqr_val <- IQR(values, na.rm = TRUE) iqr_cv <- iqr_val / median_val # 3. Range-based CV (range/mean) - using terra min/max range_val <- max_val - min_val range_cv <- range_val / mean_val # 4. MAD-based CV (MAD/median) - using R's built-in mad function mad_val <- mad(values, constant = 1.4826, na.rm = TRUE) # scaled to match SD for normal distribution mad_cv <- mad_val / median_val # 5. Percentile-based CV (P90-P10)/mean - using R's built-in quantile percentiles <- quantile(values, c(0.1, 0.9), na.rm = TRUE) percentile_cv <- (percentiles[2] - percentiles[1]) / mean_val # Interpretation based on CV thresholds (all metrics now comparable) # CV < 0.15 = Very uniform, 0.15-0.30 = Moderate variation, 0.30-0.50 = High variation, >0.50 = Very high variation interpret_uniformity <- function(metric_value) { if (is.na(metric_value)) return("unknown") if (metric_value < 0.15) return("very uniform") if (metric_value < 0.30) return("moderate variation") if (metric_value < 0.50) return("high variation") return("very high variation") } return(list( cv = cv, iqr_cv = iqr_cv, range_cv = range_cv, mad_cv = mad_cv, percentile_cv = percentile_cv, cv_interpretation = interpret_uniformity(cv), iqr_interpretation = interpret_uniformity(iqr_cv), mad_interpretation = interpret_uniformity(mad_cv), percentile_interpretation = interpret_uniformity(percentile_cv) )) } #' Calculate percentage within acceptable range using terra mean #' Acceptable range = within 25% of the field mean CI value #' This indicates what percentage of the field has "normal" performance #' @param mean_val Mean CI value from terra #' @param values Raw CI values #' @param threshold_factor Factor to multiply mean by for acceptable range (default 0.25 = 25%) #' @return Percentage of values within acceptable range calculate_acceptable_percentage_terra <- function(mean_val, values, threshold_factor = 0.25) { values <- values[!is.na(values) & is.finite(values)] if (length(values) < 2 || is.na(mean_val)) return(NA) threshold <- mean_val * threshold_factor # 25% of mean as default within_range <- abs(values - mean_val) <= threshold percentage <- (sum(within_range) / length(values)) * 100 return(percentage) } #' Calculate coefficient of variation for uniformity assessment #' @param values Numeric vector of CI values #' @return Coefficient of variation (CV) as decimal calculate_cv <- function(values) { values <- values[!is.na(values) & is.finite(values)] if (length(values) < 2) return(NA) cv <- sd(values) / mean(values) # Keep as decimal return(cv) } #' Calculate Shannon entropy for spatial heterogeneity assessment #' Higher entropy = more heterogeneous/variable field #' Lower entropy = more homogeneous/uniform field #' @param values Numeric vector of CI values #' @param n_bins Number of bins for histogram (default 10) #' @return Shannon entropy value calculate_entropy <- function(values, n_bins = 10) { values <- values[!is.na(values) & is.finite(values)] if (length(values) < 2) return(NA) # Create histogram bins value_range <- range(values) breaks <- seq(value_range[1], value_range[2], length.out = n_bins + 1) # Count values in each bin bin_counts <- hist(values, breaks = breaks, plot = FALSE)$counts # Calculate probabilities (remove zero counts) probabilities <- bin_counts[bin_counts > 0] / sum(bin_counts) # Calculate Shannon entropy: H = -sum(p * log(p)) entropy <- -sum(probabilities * log(probabilities)) return(entropy) } #' Calculate percentage of field with positive vs negative change #' @param current_values Current week CI values #' @param previous_values Previous week CI values #' @return List with percentage of positive and negative change areas calculate_change_percentages <- function(current_values, previous_values) { # Ensure same length (should be from same field boundaries) if (length(current_values) != length(previous_values)) { return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) } # Calculate pixel-wise change change_values <- current_values - previous_values valid_changes <- change_values[!is.na(change_values) & is.finite(change_values)] if (length(valid_changes) < 2) { return(list(positive_pct = NA, negative_pct = NA, stable_pct = NA)) } # Count positive, negative, and stable areas positive_pct <- sum(valid_changes > 0) / length(valid_changes) * 100 negative_pct <- sum(valid_changes < 0) / length(valid_changes) * 100 stable_pct <- sum(valid_changes == 0) / length(valid_changes) * 100 return(list( positive_pct = positive_pct, negative_pct = negative_pct, stable_pct = stable_pct )) } #' Calculate spatial autocorrelation (Moran's I) for a field #' @param ci_raster Terra raster of CI values #' @param field_boundary Terra vector of field boundary #' @return List with Moran's I statistic and p-value calculate_spatial_autocorrelation <- function(ci_raster, field_boundary) { tryCatch({ # Crop and mask raster to field boundary field_raster <- terra::crop(ci_raster, field_boundary) field_raster <- terra::mask(field_raster, field_boundary) # Convert to points for spatial analysis raster_points <- terra::as.points(field_raster, na.rm = TRUE) # Check if we have enough points if (length(raster_points) < 10) { return(list(morans_i = NA, p_value = NA, interpretation = "insufficient_data")) } # Convert to sf for spdep points_sf <- sf::st_as_sf(raster_points) # Create spatial weights matrix (k-nearest neighbors) coords <- sf::st_coordinates(points_sf) # Use adaptive number of neighbors based on sample size k_neighbors <- min(8, max(4, floor(nrow(coords) / 10))) knn_nb <- spdep::knearneigh(coords, k = k_neighbors) knn_listw <- spdep::nb2listw(spdep::knn2nb(knn_nb), style = "W", zero.policy = TRUE) # Calculate Moran's I ci_values <- points_sf[[1]] # First column contains CI values moran_result <- spdep::moran.test(ci_values, knn_listw, zero.policy = TRUE) # Interpret results morans_i <- moran_result$estimate[1] p_value <- moran_result$p.value interpretation <- if (is.na(morans_i)) { "insufficient_data" } else if (p_value > 0.05) { "random" # Not significant spatial pattern } else if (morans_i > MORAN_THRESHOLD_HIGH) { "very_strong_clustering" # Very strong clustering - may indicate management issues } else if (morans_i > MORAN_THRESHOLD_MODERATE) { "strong_clustering" # Strong clustering - worth monitoring } else if (morans_i > MORAN_THRESHOLD_LOW) { "normal_continuity" # Normal field continuity - expected for uniform fields } else if (morans_i > 0.3) { "weak_clustering" # Some clustering present } else if (morans_i < -0.3) { "dispersed" # Checkerboard pattern } else { "low_autocorrelation" # Low spatial autocorrelation } return(list( morans_i = morans_i, p_value = p_value, interpretation = interpretation )) }, error = function(e) { warning(paste("Error calculating spatial autocorrelation:", e$message)) return(list(morans_i = NA, p_value = NA, interpretation = "error")) }) } #' Calculate percentage of field in extreme values using simple threshold #' Hotspots = areas with CI > mean + 1.5*SD (high-performing areas) #' Coldspots = areas with CI < mean - 1.5*SD (underperforming areas) #' @param values Numeric vector of CI values #' @param threshold_multiplier Standard deviation multiplier (default 1.5) #' @return List with percentage of hotspots and coldspots calculate_extreme_percentages_simple <- function(values, threshold_multiplier = 1.5) { if (length(values) < 10) return(list(hotspot_pct = NA, coldspot_pct = NA, method = "insufficient_data")) mean_val <- mean(values, na.rm = TRUE) sd_val <- sd(values, na.rm = TRUE) # Hotspots: significantly ABOVE average (good performance) upper_threshold <- mean_val + (threshold_multiplier * sd_val) # Coldspots: significantly BELOW average (poor performance) lower_threshold <- mean_val - (threshold_multiplier * sd_val) hotspot_pct <- sum(values > upper_threshold, na.rm = TRUE) / length(values) * 100 coldspot_pct <- sum(values < lower_threshold, na.rm = TRUE) / length(values) * 100 return(list( hotspot_pct = hotspot_pct, coldspot_pct = coldspot_pct, method = "simple_threshold", threshold_used = threshold_multiplier )) } #' Categorize CI change based on thresholds #' @param change_value Mean change in CI between weeks #' @return Character string: "increase", "stable", or "decrease" categorize_change <- function(change_value) { if (is.na(change_value)) return("unknown") if (change_value >= CI_CHANGE_INCREASE_THRESHOLD) return("increase") if (change_value <= CI_CHANGE_DECREASE_THRESHOLD) return("decrease") return("stable") } #' Categorize field uniformity based on coefficient of variation and spatial pattern #' @param cv_value Coefficient of variation (primary uniformity metric) #' @param spatial_info List with spatial autocorrelation results #' @param extreme_pct List with hotspot/coldspot percentages #' @param acceptable_pct Percentage of field within acceptable range #' @return Character string describing field uniformity pattern categorize_uniformity_enhanced <- function(cv_value, spatial_info, extreme_pct, acceptable_pct = NA) { if (is.na(cv_value)) return("unknown variation") # Check for poor uniformity first (urgent issues) if (cv_value > POOR_UNIFORMITY_THRESHOLD || (!is.na(acceptable_pct) && acceptable_pct < ACCEPTABLE_AREA_THRESHOLD)) { return("poor uniformity - urgent attention needed") } # Check for excellent uniformity if (cv_value <= EXCELLENT_UNIFORMITY_THRESHOLD && (!is.na(acceptable_pct) && acceptable_pct >= 45)) { return("excellent uniformity") } # Check for good uniformity if (cv_value <= UNIFORMITY_THRESHOLD) { return("good uniformity") } # Field has moderate variation - determine if localized or distributed spatial_pattern <- spatial_info$interpretation hotspot_pct <- extreme_pct$hotspot_pct coldspot_pct <- extreme_pct$coldspot_pct # Determine pattern type based on CV (primary) and spatial pattern (secondary) if (spatial_pattern %in% c("very_strong_clustering") && !is.na(hotspot_pct) && (hotspot_pct > 15 || coldspot_pct > 5)) { # Very strong clustering with substantial extreme areas - likely problematic if (hotspot_pct > coldspot_pct) { return("localized high-performing areas") } else if (coldspot_pct > hotspot_pct) { return("localized problem areas") } else { return("localized hotspots and coldspots") } } else if (spatial_pattern %in% c("strong_clustering") && !is.na(hotspot_pct) && (hotspot_pct > 10 || coldspot_pct > 3)) { # Strong clustering with moderate extreme areas if (hotspot_pct > coldspot_pct) { return("localized high-performing areas") } else if (coldspot_pct > hotspot_pct) { return("localized problem areas") } else { return("clustered variation") } } else { # Normal field continuity or weak patterns - rely primarily on CV return("moderate variation") } } #' Generate enhanced message based on analysis results including spatial patterns #' @param uniformity_category Character: enhanced uniformity category with spatial info #' @param change_category Character: "increase", "stable", or "decrease" #' @param extreme_pct List with hotspot/coldspot percentages #' @param acceptable_pct Percentage of field within acceptable range #' @param morans_i Moran's I value for additional context #' @param growth_stage Character: growth stage (simplified for now) #' @return List with message and worth_sending flag generate_enhanced_message <- function(uniformity_category, change_category, extreme_pct, acceptable_pct = NA, morans_i = NA, growth_stage = "vegetation stage") { # Enhanced message matrix based on spatial patterns messages <- list() # Poor uniformity scenarios (urgent) if (uniformity_category == "poor uniformity - urgent attention needed") { messages <- list( "stable" = list( message = "🚨 URGENT: Poor field uniformity detected - immediate management review required", worth_sending = TRUE ), "decrease" = list( message = "🚨 CRITICAL: Poor uniformity with declining trend - emergency intervention needed", worth_sending = TRUE ), "increase" = list( message = "⚠️ CAUTION: Improving but still poor uniformity - continue intensive monitoring", worth_sending = TRUE ) ) } # Excellent uniformity scenarios else if (uniformity_category == "excellent uniformity") { messages <- list( "stable" = list( message = "βœ… Excellent: Optimal field uniformity and stability", worth_sending = FALSE ), "decrease" = list( message = "⚠️ Alert: Excellent uniformity but declining - investigate cause early", worth_sending = TRUE ), "increase" = list( message = "🌟 Outstanding: Excellent uniformity with continued improvement", worth_sending = FALSE ) ) } # Good uniformity scenarios else if (uniformity_category == "good uniformity") { # Check for very strong clustering which may indicate management issues if (!is.na(morans_i) && morans_i > MORAN_THRESHOLD_HIGH) { messages <- list( "stable" = list( message = "⚠️ Alert: Good uniformity but very strong clustering detected - check management practices", worth_sending = TRUE ), "decrease" = list( message = "🚨 Alert: Good uniformity declining with clustering patterns - targeted intervention needed", worth_sending = TRUE ), "increase" = list( message = "βœ… Good: Improving uniformity but monitor clustering patterns", worth_sending = FALSE ) ) } else { messages <- list( "stable" = list( message = "βœ… Good: Stable field with good uniformity", worth_sending = FALSE ), "decrease" = list( message = "⚠️ Alert: Good uniformity but declining trend - early intervention recommended", worth_sending = TRUE ), "increase" = list( message = "βœ… Great: Good uniformity with improvement trend", worth_sending = FALSE ) ) } } # Moderate variation scenarios else if (uniformity_category == "moderate variation") { acceptable_msg <- if (!is.na(acceptable_pct) && acceptable_pct < 45) " - low acceptable area" else "" messages <- list( "stable" = list( message = paste0("⚠️ Alert: Moderate field variation detected", acceptable_msg, " - review management uniformity"), worth_sending = TRUE ), "decrease" = list( message = paste0("🚨 Alert: Moderate variation with declining trend", acceptable_msg, " - intervention needed"), worth_sending = TRUE ), "increase" = list( message = paste0("πŸ“ˆ Monitor: Improving but still moderate variation", acceptable_msg, " - continue optimization"), worth_sending = FALSE ) ) } # Localized problem areas else if (uniformity_category == "localized problem areas") { hotspot_pct <- round(extreme_pct$hotspot_pct, 1) coldspot_pct <- round(extreme_pct$coldspot_pct, 1) messages <- list( "stable" = list( message = paste0("🚨 Alert: Problem zones detected (", coldspot_pct, "% underperforming) - targeted intervention needed"), worth_sending = TRUE ), "decrease" = list( message = paste0("🚨 URGENT: Problem areas expanding with overall decline (", coldspot_pct, "% affected) - immediate action required"), worth_sending = TRUE ), "increase" = list( message = paste0("⚠️ Caution: Overall improvement but ", coldspot_pct, "% problem areas remain - monitor closely"), worth_sending = TRUE ) ) } # Localized high-performing areas else if (uniformity_category == "localized high-performing areas") { hotspot_pct <- round(extreme_pct$hotspot_pct, 1) messages <- list( "stable" = list( message = paste0("πŸ’‘ Opportunity: ", hotspot_pct, "% of field performing well - replicate conditions in remaining areas"), worth_sending = FALSE ), "decrease" = list( message = paste0("⚠️ Alert: High-performing areas (", hotspot_pct, "%) declining - investigate cause to prevent spread"), worth_sending = TRUE ), "increase" = list( message = paste0("🌟 Excellent: High-performing areas (", hotspot_pct, "%) expanding - excellent management practices"), worth_sending = FALSE ) ) } # Clustered variation (general) else if (uniformity_category == "clustered variation") { messages <- list( "stable" = list( message = "⚠️ Alert: Clustered variation detected - investigate spatial management patterns", worth_sending = TRUE ), "decrease" = list( message = "🚨 Alert: Clustered decline pattern - targeted investigation needed", worth_sending = TRUE ), "increase" = list( message = "πŸ“ˆ Monitor: Clustered improvement - identify and replicate successful practices", worth_sending = FALSE ) ) } # Default fallback else { messages <- list( "stable" = list(message = "❓ Field analysis inconclusive - manual review recommended", worth_sending = FALSE), "decrease" = list(message = "⚠️ Field showing decline - investigation recommended", worth_sending = TRUE), "increase" = list(message = "πŸ“ˆ Field showing improvement", worth_sending = FALSE) ) } # Return appropriate message if (change_category %in% names(messages)) { return(messages[[change_category]]) } else { return(list( message = paste("❓ Analysis inconclusive -", uniformity_category, "with", change_category, "trend"), worth_sending = FALSE )) } } #' Create Word document with analysis results #' @param field_results List of field analysis results #' @param current_week Current week number #' @param previous_week Previous week number #' @param project_dir Project directory name #' @param output_dir Directory to save the Word document #' @return Path to the created Word document # REMOVED - Word report creation functionality #' Load and analyze a weekly mosaic for individual fields with spatial analysis #' @param week_file_path Path to the weekly mosaic file #' @param field_boundaries_sf SF object with field boundaries #' @return List with CI statistics per field including spatial metrics analyze_weekly_mosaic <- function(week_file_path, field_boundaries_sf) { if (!file.exists(week_file_path)) { warning(paste("Mosaic file not found:", week_file_path)) return(NULL) } tryCatch({ # Load the raster and select only the CI band (5th band) mosaic_raster <- terra::rast(week_file_path) ci_raster <- mosaic_raster[[5]] # Select the CI band names(ci_raster) <- "CI" # Convert field boundaries to terra vect for extraction field_boundaries_vect <- terra::vect(field_boundaries_sf) # Extract CI values for each field field_results <- list() for (i in seq_len(nrow(field_boundaries_sf))) { field_name <- field_boundaries_sf$field[i] sub_field_name <- field_boundaries_sf$sub_field[i] # Check and get field area from geojson if available field_area_ha <- NA if ("area_ha" %in% colnames(field_boundaries_sf)) { field_area_ha <- field_boundaries_sf$area_ha[i] } else if ("AREA_HA" %in% colnames(field_boundaries_sf)) { field_area_ha <- field_boundaries_sf$AREA_HA[i] } else if ("area" %in% colnames(field_boundaries_sf)) { field_area_ha <- field_boundaries_sf$area[i] } else { # Calculate area from geometry as fallback field_geom <- field_boundaries_sf[i,] if (sf::st_is_longlat(field_geom)) { # For geographic coordinates, transform to projected for area calculation field_geom <- sf::st_transform(field_geom, 3857) # Web Mercator } field_area_ha <- as.numeric(sf::st_area(field_geom)) / 10000 # Convert to hectares } cat("Processing field:", field_name, "-", sub_field_name, "(", round(field_area_ha, 1), "ha)\n") # Extract values for this specific field field_vect <- field_boundaries_vect[i] # Extract with built-in statistics from terra (PRIMARY METHOD) terra_stats <- terra::extract(ci_raster, field_vect, fun = c("mean", "sd", "min", "max", "median"), na.rm = TRUE) # Extract raw values for additional calculations and validation ci_values <- terra::extract(ci_raster, field_vect, fun = NULL) # Flatten and clean the values field_values <- unlist(ci_values) valid_values <- field_values[!is.na(field_values) & is.finite(field_values)] if (length(valid_values) > 0) { # Use TERRA as primary calculations primary_mean <- terra_stats$mean[1] primary_sd <- terra_stats$sd[1] primary_cv <- primary_sd / primary_mean primary_median <- terra_stats$median[1] primary_min <- terra_stats$min[1] primary_max <- terra_stats$max[1] # Manual calculations for validation only manual_mean <- mean(valid_values) manual_cv <- sd(valid_values) / manual_mean basic_stats <- list( field = field_name, sub_field = sub_field_name, # PRIMARY statistics (terra-based) mean_ci = primary_mean, median_ci = primary_median, sd_ci = primary_sd, cv = primary_cv, min_ci = primary_min, max_ci = primary_max, # Store raw values for change analysis raw_values = valid_values, # Other metrics using terra values acceptable_pct = calculate_acceptable_percentage_terra(primary_mean, valid_values), n_pixels = length(valid_values), # Field area from geojson field_area_ha = field_area_ha ) # Calculate spatial statistics spatial_info <- calculate_spatial_autocorrelation(ci_raster, field_vect) extreme_pct <- calculate_extreme_percentages_simple(valid_values) # Calculate entropy for additional uniformity measure entropy_value <- calculate_entropy(valid_values) # Enhanced uniformity categorization uniformity_category <- categorize_uniformity_enhanced( basic_stats$cv, spatial_info, extreme_pct, basic_stats$acceptable_pct ) # Combine all results field_stats <- c( basic_stats, list( spatial_autocorr = spatial_info, extreme_percentages = extreme_pct, entropy = entropy_value, uniformity_category = uniformity_category ) ) field_results[[paste0(field_name, "_", sub_field_name)]] <- field_stats } else { warning(paste("No valid CI values found for field:", field_name, sub_field_name)) } } return(field_results) }, error = function(e) { warning(paste("Error analyzing mosaic:", e$message)) return(NULL) }) } # 4. Core analysis functions for modular use # ---------------------------------------- #' Run crop analysis for any estate #' @param estate_name Character: name of the estate (e.g., "simba", "chemba") #' @param current_week Numeric: current week number #' @param previous_week Numeric: previous week number #' @param year Numeric: year (default 2025) #' @return List with analysis results run_estate_analysis <- function(estate_name, current_week, previous_week, year = 2025) { cat("=== CROP ANALYSIS MESSAGING SYSTEM ===\n") cat("Analyzing:", toupper(estate_name), "estate\n") cat("Comparing week", previous_week, "vs week", current_week, "of", year, "\n\n") # Set project_dir globally for parameters_project.R assign("project_dir", estate_name, envir = .GlobalEnv) # Load project configuration tryCatch({ source("parameters_project.R") cat("βœ“ Project configuration loaded\n") }, error = function(e) { tryCatch({ source(here::here("r_app", "parameters_project.R")) cat("βœ“ Project configuration loaded from r_app directory\n") }, error = function(e) { stop("Failed to load project configuration") }) }) # Verify required variables are available if (!exists("weekly_CI_mosaic") || !exists("field_boundaries_sf")) { stop("Required project variables not initialized. Check project configuration.") } # Construct file paths for weekly mosaics current_week_file <- sprintf("week_%02d_%d.tif", current_week, year) previous_week_file <- sprintf("week_%02d_%d.tif", previous_week, year) current_week_path <- file.path(weekly_CI_mosaic, current_week_file) previous_week_path <- file.path(weekly_CI_mosaic, previous_week_file) cat("Looking for files:\n") cat("- Current week:", current_week_path, "\n") cat("- Previous week:", previous_week_path, "\n\n") # Analyze both weeks for all fields cat("Analyzing weekly mosaics per field...\n") current_field_stats <- analyze_weekly_mosaic(current_week_path, field_boundaries_sf) previous_field_stats <- analyze_weekly_mosaic(previous_week_path, field_boundaries_sf) if (is.null(current_field_stats) || is.null(previous_field_stats)) { stop("Could not analyze one or both weekly mosaics") } # Generate field results field_results <- generate_field_results(current_field_stats, previous_field_stats, current_week, previous_week) return(list( estate_name = estate_name, current_week = current_week, previous_week = previous_week, year = year, field_results = field_results, current_field_stats = current_field_stats, previous_field_stats = previous_field_stats )) } #' Generate analysis results for all fields #' @param current_field_stats Analysis results for current week #' @param previous_field_stats Analysis results for previous week #' @param current_week Current week number #' @param previous_week Previous week number #' @return List with field results generate_field_results <- function(current_field_stats, previous_field_stats, current_week, previous_week) { field_results <- list() # Get common field names between both weeks common_fields <- intersect(names(current_field_stats), names(previous_field_stats)) for (field_id in common_fields) { current_field <- current_field_stats[[field_id]] previous_field <- previous_field_stats[[field_id]] # Calculate change metrics for this field ci_change <- current_field$mean_ci - previous_field$mean_ci change_category <- categorize_change(ci_change) # Calculate spatial change percentages change_percentages <- calculate_change_percentages( current_field$raw_values, previous_field$raw_values ) # Use enhanced uniformity category from current week analysis uniformity_category <- current_field$uniformity_category # Generate enhanced message for this field message_result <- generate_enhanced_message( uniformity_category, change_category, current_field$extreme_percentages, current_field$acceptable_pct, current_field$spatial_autocorr$morans_i ) # Store results field_results[[field_id]] <- list( current_stats = current_field, previous_stats = previous_field, ci_change = ci_change, change_category = change_category, change_percentages = change_percentages, uniformity_category = uniformity_category, message_result = message_result ) } return(field_results) } #' Format analysis results for WhatsApp/Word copy-paste #' @param analysis_results Results from run_estate_analysis #' @return Character string with formatted text format_for_whatsapp <- function(analysis_results) { field_results <- analysis_results$field_results estate_name <- toupper(analysis_results$estate_name) current_week <- analysis_results$current_week previous_week <- analysis_results$previous_week output <- c() output <- c(output, paste("🌾", estate_name, "CROP ANALYSIS")) output <- c(output, paste("πŸ“… Week", current_week, "vs Week", previous_week)) output <- c(output, "") # Summary statistics alert_count <- sum(sapply(field_results, function(x) x$message_result$worth_sending)) total_fields <- length(field_results) # Calculate total area and area statistics total_hectares <- sum(sapply(field_results, function(x) x$current_stats$field_area_ha), na.rm = TRUE) output <- c(output, "πŸ“Š SUMMARY:") output <- c(output, paste("β€’ Estate:", estate_name)) output <- c(output, paste("β€’ Fields analyzed:", total_fields)) output <- c(output, paste("β€’ Total area:", round(total_hectares, 1), "ha")) output <- c(output, paste("β€’ Alerts needed:", alert_count)) output <- c(output, "") # Field-by-field alerts only if (alert_count > 0) { output <- c(output, "🚨 PRIORITY FIELDS:") for (field_id in names(field_results)) { field_info <- field_results[[field_id]] if (field_info$message_result$worth_sending) { field_name <- paste(field_info$current_stats$field, field_info$current_stats$sub_field, sep="-") area <- round(field_info$current_stats$field_area_ha, 1) message <- field_info$message_result$message output <- c(output, paste("β€’", field_name, paste0("(", area, "ha):"), message)) } } } else { output <- c(output, "βœ… No urgent alerts - all fields stable") } # Quick farm summary output <- c(output, "") output <- c(output, "πŸ“ˆ QUICK STATS:") # Calculate improving vs declining areas total_improving <- sum(sapply(field_results, function(x) { if (!is.na(x$change_percentages$positive_pct)) { (x$change_percentages$positive_pct / 100) * x$current_stats$field_area_ha } else 0 }), na.rm = TRUE) total_declining <- sum(sapply(field_results, function(x) { if (!is.na(x$change_percentages$negative_pct)) { (x$change_percentages$negative_pct / 100) * x$current_stats$field_area_ha } else 0 }), na.rm = TRUE) improving_pct <- (total_improving / total_hectares) * 100 declining_pct <- (total_declining / total_hectares) * 100 output <- c(output, paste("β€’ Improving areas:", round(total_improving, 1), "ha (", round(improving_pct, 1), "%)")) output <- c(output, paste("β€’ Declining areas:", round(total_declining, 1), "ha (", round(declining_pct, 1), "%)")) # Overall trend if (improving_pct > declining_pct) { trend_diff <- round(improving_pct - declining_pct, 1) output <- c(output, paste("β€’ Trend: βœ… POSITIVE (+", trend_diff, "%)")) } else if (declining_pct > improving_pct) { trend_diff <- round(declining_pct - improving_pct, 1) output <- c(output, paste("β€’ Trend: ⚠️ NEGATIVE (-", trend_diff, "%)")) } else { output <- c(output, "β€’ Trend: βž– BALANCED") } return(paste(output, collapse = "\n")) } #' Format analysis results as CSV data #' @param analysis_results Results from run_estate_analysis #' @return Data frame ready for write.csv format_as_csv <- function(analysis_results) { field_results <- analysis_results$field_results estate_name <- analysis_results$estate_name current_week <- analysis_results$current_week previous_week <- analysis_results$previous_week csv_data <- data.frame() for (field_id in names(field_results)) { field_info <- field_results[[field_id]] row_data <- data.frame( estate = estate_name, field = field_info$current_stats$field, sub_field = field_info$current_stats$sub_field, area_ha = round(field_info$current_stats$field_area_ha, 2), current_week = current_week, previous_week = previous_week, current_week_ci = round(field_info$current_stats$mean_ci, 3), previous_week_ci = round(field_info$previous_stats$mean_ci, 3), ci_change = round(field_info$ci_change, 3), change_category = field_info$change_category, cv = round(field_info$current_stats$cv, 3), uniformity_category = field_info$uniformity_category, acceptable_pct = round(field_info$current_stats$acceptable_pct, 1), hotspot_pct = round(field_info$current_stats$extreme_percentages$hotspot_pct, 1), coldspot_pct = round(field_info$current_stats$extreme_percentages$coldspot_pct, 1), morans_i = round(field_info$current_stats$spatial_autocorr$morans_i, 3), alert_needed = field_info$message_result$worth_sending, message = field_info$message_result$message, stringsAsFactors = FALSE ) csv_data <- rbind(csv_data, row_data) } return(csv_data) } #' Format analysis results as markdown table #' @param analysis_results Results from run_estate_analysis #' @return Character string with markdown table format_as_markdown_table <- function(analysis_results) { field_results <- analysis_results$field_results estate_name <- toupper(analysis_results$estate_name) current_week <- analysis_results$current_week previous_week <- analysis_results$previous_week output <- c() output <- c(output, paste("# Crop Analysis Summary -", estate_name, "Estate")) output <- c(output, paste("**Analysis Period:** Week", previous_week, "vs Week", current_week)) output <- c(output, "") output <- c(output, "| Field | Area (ha) | Current CI | Change | Uniformity | Alert | Message |") output <- c(output, "|-------|-----------|------------|--------|------------|-------|---------|") for (field_id in names(field_results)) { field_info <- field_results[[field_id]] field_name <- paste(field_info$current_stats$field, field_info$current_stats$sub_field, sep="-") area <- round(field_info$current_stats$field_area_ha, 1) current_ci <- round(field_info$current_stats$mean_ci, 3) change <- field_info$change_category uniformity <- field_info$uniformity_category alert <- if(field_info$message_result$worth_sending) "🚨 YES" else "βœ… NO" message <- field_info$message_result$message row <- paste("|", field_name, "|", area, "|", current_ci, "|", change, "|", uniformity, "|", alert, "|", message, "|") output <- c(output, row) } return(paste(output, collapse = "\n")) } #' Save analysis outputs in multiple formats #' @param analysis_results Results from run_estate_analysis #' @param output_dir Directory to save files (optional) #' @return List with file paths created save_analysis_outputs <- function(analysis_results, output_dir = NULL) { estate_name <- analysis_results$estate_name current_week <- analysis_results$current_week previous_week <- analysis_results$previous_week # Create output directory if not specified if (is.null(output_dir)) { output_dir <- file.path("output", estate_name) } if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE) timestamp <- format(Sys.time(), "%Y%m%d_%H%M") base_filename <- paste0("crop_analysis_w", current_week, "vs", previous_week, "_", timestamp) # Generate different output formats whatsapp_text <- format_for_whatsapp(analysis_results) csv_data <- format_as_csv(analysis_results) markdown_table <- format_as_markdown_table(analysis_results) # Save files whatsapp_file <- file.path(output_dir, paste0(base_filename, "_whatsapp.txt")) csv_file <- file.path(output_dir, paste0(base_filename, "_data.csv")) markdown_file <- file.path(output_dir, paste0(base_filename, "_table.md")) writeLines(whatsapp_text, whatsapp_file) write.csv(csv_data, csv_file, row.names = FALSE) writeLines(markdown_table, markdown_file) # Display summary cat("\n=== OUTPUT FILES CREATED ===\n") cat("πŸ“± WhatsApp format:", whatsapp_file, "\n") cat("πŸ“Š CSV file:", csv_file, "\n") cat("πŸ“ Markdown table:", markdown_file, "\n") # Display WhatsApp format in console for immediate copy cat("\n=== WHATSAPP/WORD READY FORMAT ===\n") cat("(Copy text below directly to WhatsApp or Word)\n") cat(rep("=", 50), "\n") cat(whatsapp_text) cat("\n", rep("=", 50), "\n") return(list( whatsapp_file = whatsapp_file, csv_file = csv_file, markdown_file = markdown_file )) } # 5. Main analysis function (now uses modular approach) # --------------------------------------------------- main <- function() { # Capture command line arguments args <- commandArgs(trailingOnly = TRUE) # Process arguments with defaults current_week <- if (length(args) >= 1 && !is.na(args[1])) { as.numeric(args[1]) } else { 32 # Default for proof of concept } previous_week <- if (length(args) >= 2 && !is.na(args[2])) { as.numeric(args[2]) } else { 31 # Default for proof of concept } estate_name <- if (length(args) >= 3 && !is.na(args[3])) { as.character(args[3]) } else { "simba" # Default estate } year <- 2025 # Current year - could be made dynamic # Run the modular analysis analysis_results <- run_estate_analysis(estate_name, current_week, previous_week, year) field_results <- analysis_results$field_results # 6. Display detailed field-by-field analysis # ------------------------------------------ cat("=== FIELD-BY-FIELD ANALYSIS ===\n\n") for (field_id in names(field_results)) { field_info <- field_results[[field_id]] current_field <- field_info$current_stats previous_field <- field_info$previous_stats ci_change <- field_info$ci_change change_category <- field_info$change_category change_percentages <- field_info$change_percentages uniformity_category <- field_info$uniformity_category message_result <- field_info$message_result # Print enhanced field analysis cat("FIELD:", current_field$field, "-", current_field$sub_field, "\n") cat("- Field size:", round(current_field$field_area_ha, 1), "hectares\n") cat("- Week", previous_week, "CI:", round(previous_field$mean_ci, 3), "\n") cat("- Week", current_week, "CI:", round(current_field$mean_ci, 3), "\n") cat("- Terra stats: Mean =", round(current_field$mean_ci, 3), ", CV =", round(current_field$cv, 3), ", Range = [", round(current_field$min_ci, 2), "-", round(current_field$max_ci, 2), "]\n") cat("- Within acceptable range (Β±25% of mean):", round(current_field$acceptable_pct, 1), "%\n") # Display primary uniformity metrics (CV and Entropy) cat("- Field uniformity: CV =", round(current_field$cv, 3)) if (current_field$cv < EXCELLENT_UNIFORMITY_THRESHOLD) { cat(" (excellent)") } else if (current_field$cv < UNIFORMITY_THRESHOLD) { cat(" (good)") } else if (current_field$cv < 0.30) { cat(" (moderate)") } else if (current_field$cv < 0.50) { cat(" (high variation)") } else { cat(" (very high variation)") } # Add entropy information if (!is.na(current_field$entropy)) { cat(", Entropy =", round(current_field$entropy, 3)) # Entropy interpretation (higher = more heterogeneous) # Adjusted thresholds to better match CV patterns if (current_field$entropy < 1.3) { cat(" (very uniform)") } else if (current_field$entropy < 1.5) { cat(" (uniform)") } else if (current_field$entropy < 1.7) { cat(" (moderate heterogeneity)") } else { cat(" (high heterogeneity)") } } cat("\n") cat("- Change: Mean =", round(ci_change, 3), "(", change_category, ")") if (!is.na(change_percentages$positive_pct)) { # Calculate hectares for this field using field area from geojson field_hectares <- current_field$field_area_ha improving_hectares <- (change_percentages$positive_pct / 100) * field_hectares declining_hectares <- (change_percentages$negative_pct / 100) * field_hectares cat(", Areas: ", round(change_percentages$positive_pct, 1), "% (", round(improving_hectares, 1), " ha) improving, ", round(change_percentages$negative_pct, 1), "% (", round(declining_hectares, 1), " ha) declining\n") } else { cat("\n") } cat("- Spatial Pattern:", uniformity_category, "\n") # Add spatial details if available if (!is.na(current_field$spatial_autocorr$morans_i)) { cat("- Moran's I:", round(current_field$spatial_autocorr$morans_i, 3), "(", current_field$spatial_autocorr$interpretation, ")") # Add agricultural context explanation for Moran's I moran_val <- current_field$spatial_autocorr$morans_i if (moran_val >= 0.7 && moran_val < 0.85) { cat(" - normal field continuity") } else if (moran_val >= 0.85 && moran_val < 0.95) { cat(" - strong spatial pattern") } else if (moran_val >= 0.95) { cat(" - very strong clustering, monitor for management issues") } else if (moran_val < 0.7 && moran_val > 0.3) { cat(" - moderate spatial pattern") } else { cat(" - unusual spatial pattern for crop field") } cat("\n") } if (!is.na(current_field$extreme_percentages$hotspot_pct)) { cat("- Extreme areas: ", round(current_field$extreme_percentages$hotspot_pct, 1), "% hotspots (high-performing), ", round(current_field$extreme_percentages$coldspot_pct, 1), "% coldspots (underperforming)") # Show method used for extreme detection if (!is.null(current_field$extreme_percentages$method)) { if (current_field$extreme_percentages$method == "getis_ord_gi_star") { cat(" [Getis-Ord Gi*]") } else if (current_field$extreme_percentages$method == "simple_sd") { cat(" [Simple SD]") } } cat("\n") } cat("- Message:", message_result$message, "\n") cat("- Alert needed:", if(message_result$worth_sending) "YES 🚨" else "NO", "\n\n") } # 7. Summary of alerts # ------------------ alert_fields <- sapply(field_results, function(x) x$message_result$worth_sending) total_alerts <- sum(alert_fields) cat("=== SUMMARY ===\n") cat("Total fields analyzed:", length(field_results), "\n") cat("Fields requiring alerts:", total_alerts, "\n") if (total_alerts > 0) { cat("\nFields needing attention:\n") for (field_id in names(field_results)[alert_fields]) { field_info <- field_results[[field_id]] cat("-", field_info$current_stats$field, "-", field_info$current_stats$sub_field, ":", field_info$message_result$message, "\n") } } # 8. Farm-wide analysis summary table (using modular data) # ------------------------------------------------------- cat("\n=== FARM-WIDE ANALYSIS SUMMARY ===\n") # Field uniformity statistics with detailed categories excellent_fields <- sapply(field_results, function(x) x$current_stats$cv <= EXCELLENT_UNIFORMITY_THRESHOLD) good_fields <- sapply(field_results, function(x) x$current_stats$cv > EXCELLENT_UNIFORMITY_THRESHOLD & x$current_stats$cv <= UNIFORMITY_THRESHOLD) moderate_fields <- sapply(field_results, function(x) x$current_stats$cv > UNIFORMITY_THRESHOLD & x$current_stats$cv <= POOR_UNIFORMITY_THRESHOLD) poor_fields <- sapply(field_results, function(x) x$current_stats$cv > POOR_UNIFORMITY_THRESHOLD) n_excellent <- sum(excellent_fields) n_good <- sum(good_fields) n_moderate <- sum(moderate_fields) n_poor <- sum(poor_fields) n_uniform_total <- n_excellent + n_good # Total uniform fields (CV ≀ 0.20) # Calculate farm-wide area statistics (use field area from geojson) total_hectares <- 0 total_improving_hectares <- 0 total_declining_hectares <- 0 total_stable_hectares <- 0 for (field_id in names(field_results)) { field_info <- field_results[[field_id]] change_pct <- field_info$change_percentages field_hectares <- field_info$current_stats$field_area_ha if (!is.na(change_pct$positive_pct) && !is.na(field_hectares)) { total_hectares <- total_hectares + field_hectares total_improving_hectares <- total_improving_hectares + (change_pct$positive_pct / 100 * field_hectares) total_declining_hectares <- total_declining_hectares + (change_pct$negative_pct / 100 * field_hectares) total_stable_hectares <- total_stable_hectares + (change_pct$stable_pct / 100 * field_hectares) } } # Calculate farm-wide percentages farm_improving_pct <- (total_improving_hectares / total_hectares) * 100 farm_declining_pct <- (total_declining_hectares / total_hectares) * 100 farm_stable_pct <- (total_stable_hectares / total_hectares) * 100 # Display summary table cat("\nFIELD UNIFORMITY SUMMARY:\n") cat("β”‚ Uniformity Level β”‚ Count β”‚ Percent β”‚\n") cat(sprintf("β”‚ Excellent (CV≀%.2f) β”‚ %5d β”‚ %6.1f%% β”‚\n", EXCELLENT_UNIFORMITY_THRESHOLD, n_excellent, (n_excellent/length(field_results))*100)) cat(sprintf("β”‚ Good (CV %.2f-%.2f) β”‚ %5d β”‚ %6.1f%% β”‚\n", EXCELLENT_UNIFORMITY_THRESHOLD, UNIFORMITY_THRESHOLD, n_good, (n_good/length(field_results))*100)) cat(sprintf("β”‚ Moderate (CV %.2f-%.2f) β”‚ %5d β”‚ %6.1f%% β”‚\n", UNIFORMITY_THRESHOLD, POOR_UNIFORMITY_THRESHOLD, n_moderate, (n_moderate/length(field_results))*100)) cat(sprintf("β”‚ Poor (CV>%.2f) β”‚ %5d β”‚ %6.1f%% β”‚\n", POOR_UNIFORMITY_THRESHOLD, n_poor, (n_poor/length(field_results))*100)) cat(sprintf("β”‚ Total fields β”‚ %5d β”‚ %6.1f%% β”‚\n", length(field_results), 100.0)) cat("\nFARM-WIDE AREA CHANGE SUMMARY:\n") cat("β”‚ Change Type β”‚ Hectaresβ”‚ Percent β”‚\n") cat(sprintf("β”‚ Improving areas β”‚ %7.1f β”‚ %6.1f%% β”‚\n", total_improving_hectares, farm_improving_pct)) cat(sprintf("β”‚ Stable areas β”‚ %7.1f β”‚ %6.1f%% β”‚\n", total_stable_hectares, farm_stable_pct)) cat(sprintf("β”‚ Declining areas β”‚ %7.1f β”‚ %6.1f%% β”‚\n", total_declining_hectares, farm_declining_pct)) cat(sprintf("β”‚ Total area β”‚ %7.1f β”‚ %6.1f%% β”‚\n", total_hectares, 100.0)) # Additional insights cat("\nKEY INSIGHTS:\n") cat(sprintf("β€’ %d%% of fields have good uniformity (CV ≀ %.2f)\n", round((n_uniform_total/length(field_results))*100), UNIFORMITY_THRESHOLD)) cat(sprintf("β€’ %d%% of fields have excellent uniformity (CV ≀ %.2f)\n", round((n_excellent/length(field_results))*100), EXCELLENT_UNIFORMITY_THRESHOLD)) cat(sprintf("β€’ %.1f hectares (%.1f%%) of farm area is improving week-over-week\n", total_improving_hectares, farm_improving_pct)) cat(sprintf("β€’ %.1f hectares (%.1f%%) of farm area is declining week-over-week\n", total_declining_hectares, farm_declining_pct)) cat(sprintf("β€’ Total farm area analyzed: %.1f hectares\n", total_hectares)) if (farm_improving_pct > farm_declining_pct) { cat(sprintf("β€’ Overall trend: POSITIVE (%.1f%% more area improving than declining)\n", farm_improving_pct - farm_declining_pct)) } else if (farm_declining_pct > farm_improving_pct) { cat(sprintf("β€’ Overall trend: NEGATIVE (%.1f%% more area declining than improving)\n", farm_declining_pct - farm_improving_pct)) } else { cat("β€’ Overall trend: BALANCED (equal improvement and decline)\n") } # 9. Generate and save multiple output formats # ------------------------------------------ saved_files <- save_analysis_outputs(analysis_results) # 10. Analysis complete # ------------------ cat("\n=== ANALYSIS COMPLETE ===\n") cat("All field analysis results, farm-wide summary, and output files created.\n") # Return results for potential further processing invisible(analysis_results) } # Run main function if script is called directly if (sys.nframe() == 0) { main() }