SmartCane/r_app/old_scripts/20_generate_kpi_excel.R

# 20_GENERATE_KPI_EXCEL.R
# =======================
# This script generates an Excel file with KPIs for client delivery:
# - Field-level data with CI, changes, crop status, alerts, and cloud interference
# - Alerts summary sheet
# - Farm-wide overview statistics
#
# Usage: Rscript 20_generate_kpi_excel.R [end_date] [project_dir]
#   - end_date: End date for KPI calculation (YYYY-MM-DD format), default: most recent week
#   - project_dir: Project directory name (e.g., "aura", "esa"), default: "esa"

# 1. Load required libraries
# -------------------------
suppressPackageStartupMessages({
  library(here)
  library(sf)
  library(terra)
  library(dplyr)
  library(tidyr)
  library(lubridate)
  library(readr)
  library(writexl)
  library(stringr)
})

# 2. Main function
# --------------
main <- function() {
  # Process command line arguments
  args <- commandArgs(trailingOnly = TRUE)

  # Process end_date argument
  if (length(args) >= 1 && !is.na(args[1])) {
    end_date <- as.Date(args[1])
    if (is.na(end_date)) {
      warning("Invalid end_date provided. Using default (most recent available).")
      end_date <- NULL
    }
  } else {
    end_date <- NULL
  }

  # Process project_dir argument
  if (length(args) >= 2 && !is.na(args[2])) {
    project_dir <- as.character(args[2])
  } else {
    project_dir <- "esa"  # Default project
  }

  # Make project_dir available globally so parameters_project.R can use it
  assign("project_dir", project_dir, envir = .GlobalEnv)

  # 3. Load project configuration
  # ---------------------------

  # Load project parameters (this sets up all directory paths and field boundaries)
  tryCatch({
    source(here("r_app", "parameters_project.R"))
  }, error = function(e) {
    stop("Error loading parameters_project.R: ", e$message)
  })

  # Check if required variables exist
  if (!exists("project_dir")) {
    stop("project_dir must be set before running this script")
  }

  if (!exists("field_boundaries_sf") || is.null(field_boundaries_sf)) {
    stop("Field boundaries not loaded. Check parameters_project.R initialization.")
  }

  cat("\n=== STARTING KPI EXCEL GENERATION ===\n")
  cat("Project:", project_dir, "\n")

  # 4. Determine which week to analyze
  # --------------------------------
  if (is.null(end_date)) {
    # Find most recent weekly mosaic
    mosaic_files <- list.files(weekly_CI_mosaic, pattern = "^week_\\d+_\\d{4}\\.tif$", full.names = TRUE)
    if (length(mosaic_files) == 0) {
      stop("No weekly mosaic files found in: ", weekly_CI_mosaic)
    }

    # Extract week numbers and years
    mosaic_info <- data.frame(
      file = mosaic_files,
      week = as.numeric(gsub(".*week_(\\d+)_\\d{4}\\.tif", "\\1", basename(mosaic_files))),
      year = as.numeric(gsub(".*week_\\d+_(\\d{4})\\.tif", "\\1", basename(mosaic_files)))
    ) %>%
      arrange(desc(year), desc(week))

    current_week <- mosaic_info$week[1]
    current_year <- mosaic_info$year[1]

    cat("Using most recent week:", current_week, "of", current_year, "\n")
  } else {
    current_week <- isoweek(end_date)
    current_year <- year(end_date)
    cat("Using specified date:", as.character(end_date), "(Week", current_week, ")\n")
  }

  previous_week <- current_week - 1
  previous_year <- current_year

  # Handle year boundary
  if (previous_week < 1) {
    previous_week <- 52
    previous_year <- current_year - 1
  }

  # 5. Load weekly mosaics
  # --------------------
  current_mosaic_path <- file.path(weekly_CI_mosaic, paste0("week_", current_week, "_", current_year, ".tif"))
  previous_mosaic_path <- file.path(weekly_CI_mosaic, paste0("week_", previous_week, "_", previous_year, ".tif"))

  if (!file.exists(current_mosaic_path)) {
    stop("Current week mosaic not found: ", current_mosaic_path)
  }

  current_mosaic <- rast(current_mosaic_path)
  cat("Loaded current week mosaic:", basename(current_mosaic_path), "\n")

  previous_mosaic <- NULL
  if (file.exists(previous_mosaic_path)) {
    previous_mosaic <- rast(previous_mosaic_path)
    cat("Loaded previous week mosaic:", basename(previous_mosaic_path), "\n")
  } else {
    warning("Previous week mosaic not found: ", basename(previous_mosaic_path))
  }

  # 6. Load 8-band data for cloud information
  # ----------------------------------------
  cloud_data_available <- check_cloud_data_availability(project_dir, current_week, current_year)

  # 7. Build time series for harvest detection
  # -----------------------------------------
  cat("\nBuilding time series for harvest detection...\n")
  time_series_data <- build_time_series_from_weekly_mosaics(
    weekly_mosaic_dir = weekly_CI_mosaic,
    field_boundaries_sf = field_boundaries_sf,
    current_week = current_week,
    current_year = current_year
  )

  # 8. Detect harvest events
  # -----------------------
  cat("Detecting harvest events...\n")
  harvest_events <- detect_harvest_events(
    time_series_data = time_series_data,
    ci_threshold = 2,
    consecutive_weeks = 2
  )

  # 9. Calculate crop status
  # -----------------------
  cat("Calculating crop status...\n")
  crop_status_data <- calculate_crop_status(
    time_series_data = time_series_data,
    harvest_events = harvest_events,
    current_week = current_week,
    current_year = current_year
  )

  # 10. Extract field-level data
  # --------------------------
  cat("\nExtracting field-level data...\n")
  field_data <- extract_field_kpis(
    field_boundaries_sf = field_boundaries_sf,
    current_mosaic = current_mosaic,
    previous_mosaic = previous_mosaic,
    crop_status_data = crop_status_data,
    cloud_data_available = cloud_data_available,
    project_dir = project_dir,
    current_week = current_week,
    current_year = current_year
  )

  # 11. Generate alerts
  # -----------------
  cat("Generating alerts...\n")
  alerts_data <- generate_alerts(field_data, crop_status_data)

  # 12. Create farm overview
  # ----------------------
  cat("Creating farm overview...\n")
  farm_overview <- create_farm_overview(field_data, alerts_data)

  # 13. Write Excel file
  # ------------------
  output_file <- file.path(
    reports_dir,
    paste0("kpi_excel_report_week", current_week, "_", project_dir, ".xlsx")
  )

  cat("\nWriting Excel file...\n")
  write_xlsx(
    list(
      Field_Data = field_data,
      Alerts_Summary = alerts_data,
      Farm_Overview = farm_overview
    ),
    path = output_file
  )

  cat("\n=== KPI EXCEL GENERATION COMPLETED ===\n")
  cat("Output file:", output_file, "\n")
  cat("Total fields:", nrow(field_data), "\n")
  cat("Total alerts:", nrow(alerts_data), "\n\n")

  return(output_file)
}

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

#' Check if cloud data is available for the current week
check_cloud_data_availability <- function(project_dir, current_week, current_year) {
  # Check if merged_tif_8b directory exists and has data for this week
  merged_8b_dir <- here("laravel_app/storage/app", project_dir, "merged_tif_8b")

  if (!dir.exists(merged_8b_dir)) {
    cat("Cloud data directory not found, cloud interference will not be calculated\n")
    return(FALSE)
  }

  # Get files from the current week
  files_8b <- list.files(merged_8b_dir, pattern = "\\.tif$", full.names = TRUE)
  if (length(files_8b) == 0) {
    cat("No 8-band files found, cloud interference will not be calculated\n")
    return(FALSE)
  }

  cat("Cloud data available from", length(files_8b), "images\n")
  return(TRUE)
}

#' Build time series from weekly mosaics
build_time_series_from_weekly_mosaics <- function(weekly_mosaic_dir, field_boundaries_sf, current_week, current_year) {
  # Get all weekly mosaic files
  mosaic_files <- list.files(weekly_mosaic_dir, pattern = "^week_\\d+_\\d{4}\\.tif$", full.names = TRUE)

  if (length(mosaic_files) == 0) {
    stop("No weekly mosaic files found")
  }

  # Extract week and year information
  mosaic_info <- data.frame(
    file = mosaic_files,
    week = as.numeric(gsub(".*week_(\\d+)_\\d{4}\\.tif", "\\1", basename(mosaic_files))),
    year = as.numeric(gsub(".*week_\\d+_(\\d{4})\\.tif", "\\1", basename(mosaic_files)))
  ) %>%
    arrange(year, week)

  # Extract CI values for all fields across all weeks
  time_series_list <- list()

  for (i in 1:nrow(mosaic_info)) {
    week_num <- mosaic_info$week[i]
    year_num <- mosaic_info$year[i]

    tryCatch({
      mosaic <- rast(mosaic_info$file[i])

      # Extract values for each field
      field_stats <- terra::extract(mosaic$CI, vect(field_boundaries_sf), fun = mean, na.rm = TRUE)

      # Calculate date from week number (start of week)
      jan1 <- as.Date(paste0(year_num, "-01-01"))
      week_date <- jan1 + (week_num - 1) * 7

      time_series_list[[i]] <- data.frame(
        field_id = field_boundaries_sf$field,
        sub_field = field_boundaries_sf$sub_field,
        week = week_num,
        year = year_num,
        date = week_date,
        mean_ci = field_stats$CI,  # First column is ID, second is the value
        stringsAsFactors = FALSE
      )
    }, error = function(e) {
      warning(paste("Error processing week", week_num, ":", e$message))
    })
  }

  # Combine all weeks
  time_series_df <- bind_rows(time_series_list)

  return(time_series_df)
}

#' Detect harvest events based on CI time series
detect_harvest_events <- function(time_series_data, ci_threshold = 2.0, consecutive_weeks = 2,
                                 recovery_threshold = 4.0, max_harvest_duration = 12) {
  # For each field, find ALL periods where CI drops below threshold
  # Key insight: A harvest event should be SHORT (few weeks), not 60+ weeks!
  # After harvest, CI should recover (go above recovery_threshold)

  harvest_events <- time_series_data %>%
    arrange(field_id, sub_field, date) %>%
    group_by(field_id, sub_field) %>%
    mutate(
      # Classify each week's state
      state = case_when(
        mean_ci < ci_threshold ~ "harvest",           # Very low CI = harvested/bare
        mean_ci >= recovery_threshold ~ "recovered",  # High CI = fully grown
        TRUE ~ "growing"                              # Medium CI = growing back
      ),
      # Detect state changes to identify new harvest cycles
      state_change = state != lag(state, default = "recovered"),
      # Create run IDs based on state changes
      run_id = cumsum(state_change)
    ) %>%
    ungroup() %>%
    # Group by run to identify continuous periods in each state
    group_by(field_id, sub_field, run_id, state) %>%
    summarize(
      harvest_week = first(week),
      harvest_year = first(year),
      duration_weeks = n(),
      mean_ci_during = mean(mean_ci, na.rm = TRUE),
      start_date = first(date),
      end_date = last(date),
      .groups = "drop"
    ) %>%
    # Only keep "harvest" state periods
    filter(state == "harvest") %>%
    # Filter for reasonable harvest duration (not multi-year periods!)
    filter(
      duration_weeks >= consecutive_weeks,
      duration_weeks <= max_harvest_duration  # Harvest shouldn't last more than ~3 months
    ) %>%
    # Sort and add unique IDs
    arrange(field_id, sub_field, start_date) %>%
    mutate(harvest_event_id = row_number()) %>%
    select(harvest_event_id, field_id, sub_field, harvest_week, harvest_year,
           duration_weeks, start_date, end_date, mean_ci_during)

  return(harvest_events)
}

#' Calculate crop status based on age and CI
calculate_crop_status <- function(time_series_data, harvest_events, current_week, current_year) {
  # Join harvest events with current week data
  current_data <- time_series_data %>%
    filter(week == current_week, year == current_year)

  # For each field, find most recent harvest
  most_recent_harvest <- harvest_events %>%
    group_by(field_id, sub_field) %>%
    arrange(desc(harvest_year), desc(harvest_week)) %>%
    slice(1) %>%
    ungroup()

  # Calculate weeks since harvest
  crop_status <- current_data %>%
    left_join(most_recent_harvest, by = c("field_id", "sub_field")) %>%
    mutate(
      weeks_since_harvest = ifelse(
        !is.na(harvest_week),
        (current_year - harvest_year) * 52 + (current_week - harvest_week),
        NA
      ),
      crop_status = case_when(
        is.na(weeks_since_harvest) ~ "Unknown",
        weeks_since_harvest <= 8 & mean_ci < 3.0 ~ "Germination",
        weeks_since_harvest <= 20 & mean_ci >= 3.0 & mean_ci <= 6.0 ~ "Tillering",
        weeks_since_harvest > 20 & weeks_since_harvest <= 52 & mean_ci > 6.0 ~ "Maturity",
        weeks_since_harvest > 52 ~ "Over Maturity",
        TRUE ~ "Transitional"
      )
    ) %>%
    select(field_id, sub_field, weeks_since_harvest, crop_status, harvest_week, harvest_year)

  return(crop_status)
}

#' Extract field-level KPIs
#' Load per-field cloud coverage data from script 09 output
#'
#' @param project_dir Project directory name
#' @param current_week Current week number
#' @param reports_dir Reports directory where RDS is saved
#' @param field_boundaries_sf Field boundaries for fallback
#' @return Data frame with per-field cloud coverage (pct_clear, cloud_category)
#'
load_per_field_cloud_data <- function(project_dir, current_week, reports_dir, field_boundaries_sf) {
  # Try to load cloud coverage RDS file from script 09
  cloud_rds_file <- file.path(reports_dir,
                              paste0(project_dir, "_cloud_coverage_week", current_week, ".rds"))

  if (file.exists(cloud_rds_file)) {
    tryCatch({
      cloud_data <- readRDS(cloud_rds_file)
      if (!is.null(cloud_data) && nrow(cloud_data) > 0) {
        # Ensure we have the right columns
        if ("field_id" %in% names(cloud_data)) {
          cloud_data <- cloud_data %>%
            rename(field = field_id)
        }
        # Return with just the columns we need
        if ("field" %in% names(cloud_data) && "sub_field" %in% names(cloud_data)) {
          return(cloud_data %>%
            select(field, sub_field, pct_clear, cloud_category))
        }
      }
    }, error = function(e) {
      message(paste("Warning: Could not load cloud RDS file:", e$message))
    })
  }

  # Fallback: return default values if file not found or error
  message("Warning: Cloud coverage RDS file not found, using default values")
  return(data.frame(
    field = field_boundaries_sf$field,
    sub_field = field_boundaries_sf$sub_field,
    pct_clear = NA_real_,
    cloud_category = "Not calculated",
    stringsAsFactors = FALSE
  ))
}

extract_field_kpis <- function(field_boundaries_sf, current_mosaic, previous_mosaic,
                               crop_status_data, cloud_data_available,
                               project_dir, current_week, current_year) {

  # Calculate field areas in hectares and acres
  # Use tryCatch to handle geometry issues
  field_areas <- field_boundaries_sf %>%
    st_drop_geometry() %>%
    select(field, sub_field)

  # Try to calculate areas, but skip if geometry is problematic
  tryCatch({
    areas <- field_boundaries_sf %>%
      mutate(
        area_ha = as.numeric(st_area(geometry)) / 10000,  # m2 to hectares
        area_acres = area_ha * 2.47105
      ) %>%
      st_drop_geometry() %>%
      select(field, sub_field, area_ha, area_acres)
    field_areas <- areas
  }, error = function(e) {
    message(paste("Warning: Could not calculate field areas:", e$message))
    # Return default NA values
    field_areas <<- field_boundaries_sf %>%
      st_drop_geometry() %>%
      select(field, sub_field) %>%
      mutate(
        area_ha = NA_real_,
        area_acres = NA_real_
      )
  })

  # Extract current week CI statistics
  current_ci_data <- tryCatch({
    current_stats <- terra::extract(
      current_mosaic,
      vect(field_boundaries_sf),
      fun = function(x, ...) {
        c(mean = mean(x, na.rm = TRUE),
          sd = sd(x, na.rm = TRUE))
      },
      na.rm = TRUE
    )

    data.frame(
      field_id = field_boundaries_sf$field,
      sub_field = field_boundaries_sf$sub_field,
      ci_current = current_stats[, 2],  # mean
      ci_current_sd = current_stats[, 3]  # sd
    )
  }, error = function(e) {
    message(paste("Warning: Could not extract CI data:", e$message))
    data.frame(
      field_id = field_boundaries_sf$field,
      sub_field = field_boundaries_sf$sub_field,
      ci_current = NA_real_,
      ci_current_sd = NA_real_
    )
  })

  # Extract previous week CI if available
  if (!is.null(previous_mosaic)) {
    previous_ci_data <- tryCatch({
      previous_stats <- terra::extract(
        previous_mosaic,
        vect(field_boundaries_sf),
        fun = function(x, ...) {
          c(mean = mean(x, na.rm = TRUE),
            sd = sd(x, na.rm = TRUE))
        },
        na.rm = TRUE
      )

      data.frame(mc
        field_id = field_boundaries_sf$field,
        sub_field = field_boundaries_sf$sub_field,
        ci_previous = previous_stats[, 2],  # mean
        ci_previous_sd = previous_stats[, 3]  # sd
      )
    }, error = function(e) {
      message(paste("Warning: Could not extract previous CI data:", e$message))
      data.frame(
        field_id = field_boundaries_sf$field,
        sub_field = field_boundaries_sf$sub_field,
        ci_previous = NA_real_,
        ci_previous_sd = NA_real_
      )
    })
  } else {
    previous_ci_data <- data.frame(
      field_id = field_boundaries_sf$field,
      sub_field = field_boundaries_sf$sub_field,
      ci_previous = NA,
      ci_previous_sd = NA
    )
  }

  # Calculate weekly change statistics
  change_data <- current_ci_data %>%
    left_join(previous_ci_data, by = c("field_id", "sub_field")) %>%
    mutate(
      weekly_ci_change = ci_current - ci_previous,
      # Combined SD shows if change was uniform (low) or patchy (high)
      weekly_change_heterogeneity = sqrt(ci_current_sd^2 + ci_previous_sd^2) / 2,
      change_interpretation = case_when(
        is.na(weekly_ci_change) ~ "No previous data",
        abs(weekly_ci_change) < 0.3 & weekly_change_heterogeneity < 0.5 ~ "Stable (uniform)",
        abs(weekly_ci_change) < 0.3 & weekly_change_heterogeneity >= 0.5 ~ "Stable (patchy)",
        weekly_ci_change >= 0.3 & weekly_change_heterogeneity < 0.5 ~ "Increasing (uniform)",
        weekly_ci_change >= 0.3 & weekly_change_heterogeneity >= 0.5 ~ "Increasing (patchy)",
        weekly_ci_change <= -0.3 & weekly_change_heterogeneity < 0.5 ~ "Decreasing (uniform)",
        weekly_ci_change <= -0.3 & weekly_change_heterogeneity >= 0.5 ~ "Decreasing (patchy)",
        TRUE ~ "Mixed patterns"
      )
    )

  # Load cloud coverage data from script 09 output
  cloud_stats <- load_per_field_cloud_data(
    project_dir = project_dir,
    current_week = current_week,
    reports_dir = file.path(here("laravel_app", "storage", "app", project_dir, "reports", "kpis")),
    field_boundaries_sf = field_boundaries_sf
  )

  # Combine all data
  field_kpi_data <- field_areas %>%
    left_join(change_data, by = c("field" = "field_id", "sub_field")) %>%
    left_join(crop_status_data, by = c("field" = "field_id", "sub_field")) %>%
    left_join(cloud_stats, by = c("field" = "field_id", "sub_field")) %>%
    mutate(
      # Calculate clear acres based on pct_clear
      clear_acres = round(area_acres * (pct_clear / 100), 2),
      clear_acres_pct = paste0(round(pct_clear, 1), "%"),

      # Format for Excel
      Field_ID = paste(field, sub_field, sep = "_"),
      Acreage_ha = round(area_ha, 2),
      Acreage_acres = round(area_acres, 1),
      Clear_Acres = paste0(clear_acres, " (", clear_acres_pct, ")"),
      Chlorophyll_Index = round(ci_current, 2),
      Weekly_Change = round(weekly_ci_change, 2),
      Change_Uniformity = round(weekly_change_heterogeneity, 2),
      Change_Pattern = change_interpretation,
      Crop_Status = crop_status,
      Weeks_Since_Harvest = weeks_since_harvest,
      Cloud_Category = cloud_category,
      Alerts = "999 - test weed"  # Placeholder
    ) %>%
    select(Field_ID, Acreage_ha, Acreage_acres, Clear_Acres, Chlorophyll_Index,
           Weekly_Change, Change_Uniformity, Change_Pattern,
           Crop_Status, Weeks_Since_Harvest, Cloud_Category, Alerts)

  return(field_kpi_data)
}

#' Calculate cloud interference from 8-band data
calculate_cloud_interference <- function(field_boundaries_sf, project_dir, current_week, current_year) {
  merged_8b_dir <- here("laravel_app/storage/app", project_dir, "merged_tif_8b")

  # Find files from the current week
  # We need to map week numbers to dates
  # Week X of year Y corresponds to dates in that week
  week_start <- as.Date(paste(current_year, "-01-01", sep = "")) + (current_week - 1) * 7
  week_end <- week_start + 6

  files_8b <- list.files(merged_8b_dir, pattern = "\\.tif$", full.names = TRUE)

  if (length(files_8b) == 0) {
    return(data.frame(
      field_id = field_boundaries_sf$field,
      sub_field = field_boundaries_sf$sub_field,
      cloud_free_pct = NA,
      cloud_quality = "No data"
    ))
  }

  # Extract dates from filenames (format: YYYY-MM-DD.tif)
  file_dates <- as.Date(gsub("\\.tif$", "", basename(files_8b)))

  # Filter files within the week
  week_files <- files_8b[file_dates >= week_start & file_dates <= week_end]

  if (length(week_files) == 0) {
    return(data.frame(
      field_id = field_boundaries_sf$field,
      sub_field = field_boundaries_sf$sub_field,
      cloud_free_pct = NA,
      cloud_quality = "No data for week"
    ))
  }

  # Process each file and calculate cloud-free percentage
  cloud_results_list <- list()

  for (file in week_files) {
    tryCatch({
      r <- rast(file)

      # Band 9 is udm1 (cloud mask): 0 = clear, 1 = cloudy
      if (nlyr(r) >= 9) {
        cloud_band <- r[[9]]

        # Extract cloud mask for each field
        cloud_stats <- terra::extract(
          cloud_band,
          vect(field_boundaries_sf),
          fun = function(x, ...) {
            clear_pixels <- sum(x == 0, na.rm = TRUE)
            total_pixels <- sum(!is.na(x))
            if (total_pixels > 0) {
              return(clear_pixels / total_pixels * 100)
            } else {
              return(NA)
            }
          },
          na.rm = TRUE
        )

        cloud_results_list[[basename(file)]] <- data.frame(
          field_id = field_boundaries_sf$field,
          sub_field = field_boundaries_sf$sub_field,
          cloud_free_pct = cloud_stats[, 2]
        )
      }
    }, error = function(e) {
      warning(paste("Error processing cloud data from", basename(file), ":", e$message))
    })
  }

  if (length(cloud_results_list) == 0) {
    return(data.frame(
      field_id = field_boundaries_sf$field,
      sub_field = field_boundaries_sf$sub_field,
      cloud_free_pct = NA,
      cloud_quality = "Processing error"
    ))
  }

  # Average cloud-free percentage across all images in the week
  cloud_summary <- bind_rows(cloud_results_list) %>%
    group_by(field_id, sub_field) %>%
    summarize(
      cloud_free_pct = mean(cloud_free_pct, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    mutate(
      cloud_quality = case_when(
        is.na(cloud_free_pct) ~ "No data",
        cloud_free_pct >= 90 ~ "Excellent",
        cloud_free_pct >= 75 ~ "Good",
        cloud_free_pct >= 50 ~ "Moderate",
        TRUE ~ "Poor"
      )
    )

  return(cloud_summary)
}

#' Generate alerts based on field data
generate_alerts <- function(field_data, crop_status_data) {
  # For now, just extract placeholder alerts
  # In future, this will include real alert logic

  alerts <- field_data %>%
    filter(Alerts != "" & !is.na(Alerts)) %>%
    select(Field_ID, Crop_Status, Chlorophyll_Index, Weekly_Change, Alerts) %>%
    mutate(Alert_Type = "Placeholder")

  return(alerts)
}

#' Create farm-wide overview
create_farm_overview <- function(field_data, alerts_data) {
  overview <- data.frame(
    Metric = c(
      "Total Fields",
      "Total Area (ha)",
      "Total Area (acres)",
      "Average CI",
      "Fields with Increasing CI",
      "Fields with Decreasing CI",
      "Fields with Stable CI",
      "Average Cloud Free %",
      "Total Alerts"
    ),
    Value = c(
      nrow(field_data),
      round(sum(field_data$Acreage_ha, na.rm = TRUE), 1),
      round(sum(field_data$Acreage_acres, na.rm = TRUE), 0),
      round(mean(field_data$Chlorophyll_Index, na.rm = TRUE), 2),
      sum(grepl("Increasing", field_data$Change_Pattern), na.rm = TRUE),
      sum(grepl("Decreasing", field_data$Change_Pattern), na.rm = TRUE),
      sum(grepl("Stable", field_data$Change_Pattern), na.rm = TRUE),
      round(mean(field_data$Cloud_Free_Percent, na.rm = TRUE), 1),
      nrow(alerts_data)
    )
  )

  return(overview)
}

# 14. Script execution
# ------------------
if (sys.nframe() == 0) {
  main()
}