# 20_GENERATE_KPI_EXCEL.R # ======================= # This script generates an Excel file with KPIs for client delivery: # - Field-level data with CI, changes, crop status, alerts, and cloud interference # - Alerts summary sheet # - Farm-wide overview statistics # # Usage: Rscript 20_generate_kpi_excel.R [end_date] [project_dir] # - end_date: End date for KPI calculation (YYYY-MM-DD format), default: most recent week # - project_dir: Project directory name (e.g., "aura", "esa"), default: "esa" # 1. Load required libraries # ------------------------- suppressPackageStartupMessages({ library(here) library(sf) library(terra) library(dplyr) library(tidyr) library(lubridate) library(readr) library(writexl) library(stringr) }) # 2. Main function # -------------- main <- function() { # Process command line arguments args <- commandArgs(trailingOnly = TRUE) # Process end_date argument if (length(args) >= 1 && !is.na(args[1])) { end_date <- as.Date(args[1]) if (is.na(end_date)) { warning("Invalid end_date provided. Using default (most recent available).") end_date <- NULL } } else { end_date <- NULL } # Process project_dir argument if (length(args) >= 2 && !is.na(args[2])) { project_dir <- as.character(args[2]) } else { project_dir <- "esa" # Default project } # Make project_dir available globally so parameters_project.R can use it assign("project_dir", project_dir, envir = .GlobalEnv) # 3. Load project configuration # --------------------------- # Load project parameters (this sets up all directory paths and field boundaries) tryCatch({ source(here("r_app", "parameters_project.R")) }, error = function(e) { stop("Error loading parameters_project.R: ", e$message) }) # Check if required variables exist if (!exists("project_dir")) { stop("project_dir must be set before running this script") } if (!exists("field_boundaries_sf") || is.null(field_boundaries_sf)) { stop("Field boundaries not loaded. Check parameters_project.R initialization.") } cat("\n=== STARTING KPI EXCEL GENERATION ===\n") cat("Project:", project_dir, "\n") # 4. Determine which week to analyze # -------------------------------- if (is.null(end_date)) { # Find most recent weekly mosaic mosaic_files <- list.files(weekly_CI_mosaic, pattern = "^week_\\d+_\\d{4}\\.tif$", full.names = TRUE) if (length(mosaic_files) == 0) { stop("No weekly mosaic files found in: ", weekly_CI_mosaic) } # Extract week numbers and years mosaic_info <- data.frame( file = mosaic_files, week = as.numeric(gsub(".*week_(\\d+)_\\d{4}\\.tif", "\\1", basename(mosaic_files))), year = as.numeric(gsub(".*week_\\d+_(\\d{4})\\.tif", "\\1", basename(mosaic_files))) ) %>% arrange(desc(year), desc(week)) current_week <- mosaic_info$week[1] current_year <- mosaic_info$year[1] cat("Using most recent week:", current_week, "of", current_year, "\n") } else { current_week <- isoweek(end_date) current_year <- year(end_date) cat("Using specified date:", as.character(end_date), "(Week", current_week, ")\n") } previous_week <- current_week - 1 previous_year <- current_year # Handle year boundary if (previous_week < 1) { previous_week <- 52 previous_year <- current_year - 1 } # 5. Load weekly mosaics # -------------------- current_mosaic_path <- file.path(weekly_CI_mosaic, paste0("week_", current_week, "_", current_year, ".tif")) previous_mosaic_path <- file.path(weekly_CI_mosaic, paste0("week_", previous_week, "_", previous_year, ".tif")) if (!file.exists(current_mosaic_path)) { stop("Current week mosaic not found: ", current_mosaic_path) } current_mosaic <- rast(current_mosaic_path) cat("Loaded current week mosaic:", basename(current_mosaic_path), "\n") previous_mosaic <- NULL if (file.exists(previous_mosaic_path)) { previous_mosaic <- rast(previous_mosaic_path) cat("Loaded previous week mosaic:", basename(previous_mosaic_path), "\n") } else { warning("Previous week mosaic not found: ", basename(previous_mosaic_path)) } # 6. Load 8-band data for cloud information # ---------------------------------------- cloud_data_available <- check_cloud_data_availability(project_dir, current_week, current_year) # 7. Build time series for harvest detection # ----------------------------------------- cat("\nBuilding time series for harvest detection...\n") time_series_data <- build_time_series_from_weekly_mosaics( weekly_mosaic_dir = weekly_CI_mosaic, field_boundaries_sf = field_boundaries_sf, current_week = current_week, current_year = current_year ) # 8. Detect harvest events # ----------------------- cat("Detecting harvest events...\n") harvest_events <- detect_harvest_events( time_series_data = time_series_data, ci_threshold = 2, consecutive_weeks = 2 ) # 9. Calculate crop status # ----------------------- cat("Calculating crop status...\n") crop_status_data <- calculate_crop_status( time_series_data = time_series_data, harvest_events = harvest_events, current_week = current_week, current_year = current_year ) # 10. Extract field-level data # -------------------------- cat("\nExtracting field-level data...\n") field_data <- extract_field_kpis( field_boundaries_sf = field_boundaries_sf, current_mosaic = current_mosaic, previous_mosaic = previous_mosaic, crop_status_data = crop_status_data, cloud_data_available = cloud_data_available, project_dir = project_dir, current_week = current_week, current_year = current_year ) # 11. Generate alerts # ----------------- cat("Generating alerts...\n") alerts_data <- generate_alerts(field_data, crop_status_data) # 12. Create farm overview # ---------------------- cat("Creating farm overview...\n") farm_overview <- create_farm_overview(field_data, alerts_data) # 13. Write Excel file # ------------------ output_file <- file.path( reports_dir, paste0("kpi_excel_report_week", current_week, "_", project_dir, ".xlsx") ) cat("\nWriting Excel file...\n") write_xlsx( list( Field_Data = field_data, Alerts_Summary = alerts_data, Farm_Overview = farm_overview ), path = output_file ) cat("\n=== KPI EXCEL GENERATION COMPLETED ===\n") cat("Output file:", output_file, "\n") cat("Total fields:", nrow(field_data), "\n") cat("Total alerts:", nrow(alerts_data), "\n\n") return(output_file) } # ============================================================================ # HELPER FUNCTIONS # ============================================================================ #' Check if cloud data is available for the current week check_cloud_data_availability <- function(project_dir, current_week, current_year) { # Check if merged_tif_8b directory exists and has data for this week merged_8b_dir <- here("laravel_app/storage/app", project_dir, "merged_tif_8b") if (!dir.exists(merged_8b_dir)) { cat("Cloud data directory not found, cloud interference will not be calculated\n") return(FALSE) } # Get files from the current week files_8b <- list.files(merged_8b_dir, pattern = "\\.tif$", full.names = TRUE) if (length(files_8b) == 0) { cat("No 8-band files found, cloud interference will not be calculated\n") return(FALSE) } cat("Cloud data available from", length(files_8b), "images\n") return(TRUE) } #' Build time series from weekly mosaics build_time_series_from_weekly_mosaics <- function(weekly_mosaic_dir, field_boundaries_sf, current_week, current_year) { # Get all weekly mosaic files mosaic_files <- list.files(weekly_mosaic_dir, pattern = "^week_\\d+_\\d{4}\\.tif$", full.names = TRUE) if (length(mosaic_files) == 0) { stop("No weekly mosaic files found") } # Extract week and year information mosaic_info <- data.frame( file = mosaic_files, week = as.numeric(gsub(".*week_(\\d+)_\\d{4}\\.tif", "\\1", basename(mosaic_files))), year = as.numeric(gsub(".*week_\\d+_(\\d{4})\\.tif", "\\1", basename(mosaic_files))) ) %>% arrange(year, week) # Extract CI values for all fields across all weeks time_series_list <- list() for (i in 1:nrow(mosaic_info)) { week_num <- mosaic_info$week[i] year_num <- mosaic_info$year[i] tryCatch({ mosaic <- rast(mosaic_info$file[i]) # Extract values for each field field_stats <- terra::extract(mosaic$CI, vect(field_boundaries_sf), fun = mean, na.rm = TRUE) # Calculate date from week number (start of week) jan1 <- as.Date(paste0(year_num, "-01-01")) week_date <- jan1 + (week_num - 1) * 7 time_series_list[[i]] <- data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, week = week_num, year = year_num, date = week_date, mean_ci = field_stats$CI, # First column is ID, second is the value stringsAsFactors = FALSE ) }, error = function(e) { warning(paste("Error processing week", week_num, ":", e$message)) }) } # Combine all weeks time_series_df <- bind_rows(time_series_list) return(time_series_df) } #' Detect harvest events based on CI time series detect_harvest_events <- function(time_series_data, ci_threshold = 2.0, consecutive_weeks = 2, recovery_threshold = 4.0, max_harvest_duration = 12) { # For each field, find ALL periods where CI drops below threshold # Key insight: A harvest event should be SHORT (few weeks), not 60+ weeks! # After harvest, CI should recover (go above recovery_threshold) harvest_events <- time_series_data %>% arrange(field_id, sub_field, date) %>% group_by(field_id, sub_field) %>% mutate( # Classify each week's state state = case_when( mean_ci < ci_threshold ~ "harvest", # Very low CI = harvested/bare mean_ci >= recovery_threshold ~ "recovered", # High CI = fully grown TRUE ~ "growing" # Medium CI = growing back ), # Detect state changes to identify new harvest cycles state_change = state != lag(state, default = "recovered"), # Create run IDs based on state changes run_id = cumsum(state_change) ) %>% ungroup() %>% # Group by run to identify continuous periods in each state group_by(field_id, sub_field, run_id, state) %>% summarize( harvest_week = first(week), harvest_year = first(year), duration_weeks = n(), mean_ci_during = mean(mean_ci, na.rm = TRUE), start_date = first(date), end_date = last(date), .groups = "drop" ) %>% # Only keep "harvest" state periods filter(state == "harvest") %>% # Filter for reasonable harvest duration (not multi-year periods!) filter( duration_weeks >= consecutive_weeks, duration_weeks <= max_harvest_duration # Harvest shouldn't last more than ~3 months ) %>% # Sort and add unique IDs arrange(field_id, sub_field, start_date) %>% mutate(harvest_event_id = row_number()) %>% select(harvest_event_id, field_id, sub_field, harvest_week, harvest_year, duration_weeks, start_date, end_date, mean_ci_during) return(harvest_events) } #' Calculate crop status based on age and CI calculate_crop_status <- function(time_series_data, harvest_events, current_week, current_year) { # Join harvest events with current week data current_data <- time_series_data %>% filter(week == current_week, year == current_year) # For each field, find most recent harvest most_recent_harvest <- harvest_events %>% group_by(field_id, sub_field) %>% arrange(desc(harvest_year), desc(harvest_week)) %>% slice(1) %>% ungroup() # Calculate weeks since harvest crop_status <- current_data %>% left_join(most_recent_harvest, by = c("field_id", "sub_field")) %>% mutate( weeks_since_harvest = ifelse( !is.na(harvest_week), (current_year - harvest_year) * 52 + (current_week - harvest_week), NA ), crop_status = case_when( is.na(weeks_since_harvest) ~ "Unknown", weeks_since_harvest <= 8 & mean_ci < 3.0 ~ "Germination", weeks_since_harvest <= 20 & mean_ci >= 3.0 & mean_ci <= 6.0 ~ "Tillering", weeks_since_harvest > 20 & weeks_since_harvest <= 52 & mean_ci > 6.0 ~ "Maturity", weeks_since_harvest > 52 ~ "Over Maturity", TRUE ~ "Transitional" ) ) %>% select(field_id, sub_field, weeks_since_harvest, crop_status, harvest_week, harvest_year) return(crop_status) } #' Extract field-level KPIs #' Load per-field cloud coverage data from script 09 output #' #' @param project_dir Project directory name #' @param current_week Current week number #' @param reports_dir Reports directory where RDS is saved #' @param field_boundaries_sf Field boundaries for fallback #' @return Data frame with per-field cloud coverage (pct_clear, cloud_category) #' load_per_field_cloud_data <- function(project_dir, current_week, reports_dir, field_boundaries_sf) { # Try to load cloud coverage RDS file from script 09 cloud_rds_file <- file.path(reports_dir, paste0(project_dir, "_cloud_coverage_week", current_week, ".rds")) if (file.exists(cloud_rds_file)) { tryCatch({ cloud_data <- readRDS(cloud_rds_file) if (!is.null(cloud_data) && nrow(cloud_data) > 0) { # Ensure we have the right columns if ("field_id" %in% names(cloud_data)) { cloud_data <- cloud_data %>% rename(field = field_id) } # Return with just the columns we need if ("field" %in% names(cloud_data) && "sub_field" %in% names(cloud_data)) { return(cloud_data %>% select(field, sub_field, pct_clear, cloud_category)) } } }, error = function(e) { message(paste("Warning: Could not load cloud RDS file:", e$message)) }) } # Fallback: return default values if file not found or error message("Warning: Cloud coverage RDS file not found, using default values") return(data.frame( field = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, pct_clear = NA_real_, cloud_category = "Not calculated", stringsAsFactors = FALSE )) } extract_field_kpis <- function(field_boundaries_sf, current_mosaic, previous_mosaic, crop_status_data, cloud_data_available, project_dir, current_week, current_year) { # Calculate field areas in hectares and acres # Use tryCatch to handle geometry issues field_areas <- field_boundaries_sf %>% st_drop_geometry() %>% select(field, sub_field) # Try to calculate areas, but skip if geometry is problematic tryCatch({ areas <- field_boundaries_sf %>% mutate( area_ha = as.numeric(st_area(geometry)) / 10000, # m2 to hectares area_acres = area_ha * 2.47105 ) %>% st_drop_geometry() %>% select(field, sub_field, area_ha, area_acres) field_areas <- areas }, error = function(e) { message(paste("Warning: Could not calculate field areas:", e$message)) # Return default NA values field_areas <<- field_boundaries_sf %>% st_drop_geometry() %>% select(field, sub_field) %>% mutate( area_ha = NA_real_, area_acres = NA_real_ ) }) # Extract current week CI statistics current_ci_data <- tryCatch({ current_stats <- terra::extract( current_mosaic, vect(field_boundaries_sf), fun = function(x, ...) { c(mean = mean(x, na.rm = TRUE), sd = sd(x, na.rm = TRUE)) }, na.rm = TRUE ) data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, ci_current = current_stats[, 2], # mean ci_current_sd = current_stats[, 3] # sd ) }, error = function(e) { message(paste("Warning: Could not extract CI data:", e$message)) data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, ci_current = NA_real_, ci_current_sd = NA_real_ ) }) # Extract previous week CI if available if (!is.null(previous_mosaic)) { previous_ci_data <- tryCatch({ previous_stats <- terra::extract( previous_mosaic, vect(field_boundaries_sf), fun = function(x, ...) { c(mean = mean(x, na.rm = TRUE), sd = sd(x, na.rm = TRUE)) }, na.rm = TRUE ) data.frame(mc field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, ci_previous = previous_stats[, 2], # mean ci_previous_sd = previous_stats[, 3] # sd ) }, error = function(e) { message(paste("Warning: Could not extract previous CI data:", e$message)) data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, ci_previous = NA_real_, ci_previous_sd = NA_real_ ) }) } else { previous_ci_data <- data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, ci_previous = NA, ci_previous_sd = NA ) } # Calculate weekly change statistics change_data <- current_ci_data %>% left_join(previous_ci_data, by = c("field_id", "sub_field")) %>% mutate( weekly_ci_change = ci_current - ci_previous, # Combined SD shows if change was uniform (low) or patchy (high) weekly_change_heterogeneity = sqrt(ci_current_sd^2 + ci_previous_sd^2) / 2, change_interpretation = case_when( is.na(weekly_ci_change) ~ "No previous data", abs(weekly_ci_change) < 0.3 & weekly_change_heterogeneity < 0.5 ~ "Stable (uniform)", abs(weekly_ci_change) < 0.3 & weekly_change_heterogeneity >= 0.5 ~ "Stable (patchy)", weekly_ci_change >= 0.3 & weekly_change_heterogeneity < 0.5 ~ "Increasing (uniform)", weekly_ci_change >= 0.3 & weekly_change_heterogeneity >= 0.5 ~ "Increasing (patchy)", weekly_ci_change <= -0.3 & weekly_change_heterogeneity < 0.5 ~ "Decreasing (uniform)", weekly_ci_change <= -0.3 & weekly_change_heterogeneity >= 0.5 ~ "Decreasing (patchy)", TRUE ~ "Mixed patterns" ) ) # Load cloud coverage data from script 09 output cloud_stats <- load_per_field_cloud_data( project_dir = project_dir, current_week = current_week, reports_dir = file.path(here("laravel_app", "storage", "app", project_dir, "reports", "kpis")), field_boundaries_sf = field_boundaries_sf ) # Combine all data field_kpi_data <- field_areas %>% left_join(change_data, by = c("field" = "field_id", "sub_field")) %>% left_join(crop_status_data, by = c("field" = "field_id", "sub_field")) %>% left_join(cloud_stats, by = c("field" = "field_id", "sub_field")) %>% mutate( # Calculate clear acres based on pct_clear clear_acres = round(area_acres * (pct_clear / 100), 2), clear_acres_pct = paste0(round(pct_clear, 1), "%"), # Format for Excel Field_ID = paste(field, sub_field, sep = "_"), Acreage_ha = round(area_ha, 2), Acreage_acres = round(area_acres, 1), Clear_Acres = paste0(clear_acres, " (", clear_acres_pct, ")"), Chlorophyll_Index = round(ci_current, 2), Weekly_Change = round(weekly_ci_change, 2), Change_Uniformity = round(weekly_change_heterogeneity, 2), Change_Pattern = change_interpretation, Crop_Status = crop_status, Weeks_Since_Harvest = weeks_since_harvest, Cloud_Category = cloud_category, Alerts = "999 - test weed" # Placeholder ) %>% select(Field_ID, Acreage_ha, Acreage_acres, Clear_Acres, Chlorophyll_Index, Weekly_Change, Change_Uniformity, Change_Pattern, Crop_Status, Weeks_Since_Harvest, Cloud_Category, Alerts) return(field_kpi_data) } #' Calculate cloud interference from 8-band data calculate_cloud_interference <- function(field_boundaries_sf, project_dir, current_week, current_year) { merged_8b_dir <- here("laravel_app/storage/app", project_dir, "merged_tif_8b") # Find files from the current week # We need to map week numbers to dates # Week X of year Y corresponds to dates in that week week_start <- as.Date(paste(current_year, "-01-01", sep = "")) + (current_week - 1) * 7 week_end <- week_start + 6 files_8b <- list.files(merged_8b_dir, pattern = "\\.tif$", full.names = TRUE) if (length(files_8b) == 0) { return(data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, cloud_free_pct = NA, cloud_quality = "No data" )) } # Extract dates from filenames (format: YYYY-MM-DD.tif) file_dates <- as.Date(gsub("\\.tif$", "", basename(files_8b))) # Filter files within the week week_files <- files_8b[file_dates >= week_start & file_dates <= week_end] if (length(week_files) == 0) { return(data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, cloud_free_pct = NA, cloud_quality = "No data for week" )) } # Process each file and calculate cloud-free percentage cloud_results_list <- list() for (file in week_files) { tryCatch({ r <- rast(file) # Band 9 is udm1 (cloud mask): 0 = clear, 1 = cloudy if (nlyr(r) >= 9) { cloud_band <- r[[9]] # Extract cloud mask for each field cloud_stats <- terra::extract( cloud_band, vect(field_boundaries_sf), fun = function(x, ...) { clear_pixels <- sum(x == 0, na.rm = TRUE) total_pixels <- sum(!is.na(x)) if (total_pixels > 0) { return(clear_pixels / total_pixels * 100) } else { return(NA) } }, na.rm = TRUE ) cloud_results_list[[basename(file)]] <- data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, cloud_free_pct = cloud_stats[, 2] ) } }, error = function(e) { warning(paste("Error processing cloud data from", basename(file), ":", e$message)) }) } if (length(cloud_results_list) == 0) { return(data.frame( field_id = field_boundaries_sf$field, sub_field = field_boundaries_sf$sub_field, cloud_free_pct = NA, cloud_quality = "Processing error" )) } # Average cloud-free percentage across all images in the week cloud_summary <- bind_rows(cloud_results_list) %>% group_by(field_id, sub_field) %>% summarize( cloud_free_pct = mean(cloud_free_pct, na.rm = TRUE), .groups = "drop" ) %>% mutate( cloud_quality = case_when( is.na(cloud_free_pct) ~ "No data", cloud_free_pct >= 90 ~ "Excellent", cloud_free_pct >= 75 ~ "Good", cloud_free_pct >= 50 ~ "Moderate", TRUE ~ "Poor" ) ) return(cloud_summary) } #' Generate alerts based on field data generate_alerts <- function(field_data, crop_status_data) { # For now, just extract placeholder alerts # In future, this will include real alert logic alerts <- field_data %>% filter(Alerts != "" & !is.na(Alerts)) %>% select(Field_ID, Crop_Status, Chlorophyll_Index, Weekly_Change, Alerts) %>% mutate(Alert_Type = "Placeholder") return(alerts) } #' Create farm-wide overview create_farm_overview <- function(field_data, alerts_data) { overview <- data.frame( Metric = c( "Total Fields", "Total Area (ha)", "Total Area (acres)", "Average CI", "Fields with Increasing CI", "Fields with Decreasing CI", "Fields with Stable CI", "Average Cloud Free %", "Total Alerts" ), Value = c( nrow(field_data), round(sum(field_data$Acreage_ha, na.rm = TRUE), 1), round(sum(field_data$Acreage_acres, na.rm = TRUE), 0), round(mean(field_data$Chlorophyll_Index, na.rm = TRUE), 2), sum(grepl("Increasing", field_data$Change_Pattern), na.rm = TRUE), sum(grepl("Decreasing", field_data$Change_Pattern), na.rm = TRUE), sum(grepl("Stable", field_data$Change_Pattern), na.rm = TRUE), round(mean(field_data$Cloud_Free_Percent, na.rm = TRUE), 1), nrow(alerts_data) ) ) return(overview) } # 14. Script execution # ------------------ if (sys.nframe() == 0) { main() }