SmartCane/r_app/09c_field_analysis_weekly.R

# 09c_FIELD_ANALYSIS_WEEKLY.R (ENHANCED - SC-64 NEW COLUMNS)
# ============================================================================
# Per-field weekly analysis with NEW columns for trend analysis and advanced metrics
#
# ENHANCEMENTS OVER 09b:
#   - Four_week_trend: Smoothed CI trend categorization (strong growth, growth, no growth, decline, strong decline)
#   - Last_harvest_or_planting_date: Date of most recent harvest/planting from harvesting_data
#   - CI_Percentiles: 10th and 90th percentiles (robust to outliers from roads/trees)
#   - CV_Trend_Short_Term: Week-over-week CV change (2-week comparison)
#   - CV_Trend_Long_Term: Long-term CV trend (8-week comparison)
#   - Cloud_pct_clear: Rounded to 5% intervals for client reporting
#
# Key improvement: All threshold values are MANUALLY DEFINED at the top of this script
# and can be easily updated. In future, these may be replaced with model-derived parameters.
#
# Usage: Rscript 09c_field_analysis_weekly.R [end_date] [project_dir]
#   - end_date: End date for analysis (YYYY-MM-DD format), default: today
#   - project_dir: Project directory name (e.g., "aura", "esa", "angata")
#
# Example:
#   Rscript 09c_field_analysis_weekly.R 2026-01-08 angata
#

# ============================================================================
# *** CONFIGURATION SECTION - MANUALLY DEFINED THRESHOLDS ***
# *** These values define decision logic and can be easily updated or replaced
#     by model-derived parameters in the future ***
# ============================================================================

# TEST MODE (for development with limited historical data)
# Set to TRUE to test with fewer historical weeks; set to FALSE for production
TEST_MODE <- TRUE
TEST_MODE_NUM_WEEKS <- 2  # Number of historical weeks to load in test mode

# FOUR-WEEK TREND THRESHOLDS (for categorizing mean_CI trends)
# These define the boundaries for growth categorization based on weekly change rate
FOUR_WEEK_TREND_STRONG_GROWTH_MIN <- 0.5       # Average weekly increase >= 0.5 = "strong growth"
FOUR_WEEK_TREND_GROWTH_MIN <- 0.1              # Average weekly increase >= 0.1 = "growth"
FOUR_WEEK_TREND_GROWTH_MAX <- 0.5              # Average weekly increase < 0.5
FOUR_WEEK_TREND_NO_GROWTH_RANGE <- 0.1         # ±0.1 around 0 = "no growth"
FOUR_WEEK_TREND_DECLINE_MAX <- -0.1            # Average weekly change > -0.1 = "no growth"
FOUR_WEEK_TREND_DECLINE_MIN <- -0.5            # Average weekly decrease >= -0.1 = "decline"
FOUR_WEEK_TREND_STRONG_DECLINE_MAX <- -0.5    # Average weekly decrease < -0.5 = "strong decline"

# CV TREND THRESHOLDS (for categorizing field uniformity trends)
# These determine if CV change is significant enough to report
CV_TREND_THRESHOLD_SIGNIFICANT <- 0.05         # CV change >= 0.05 is considered significant

# CLOUD COVER ROUNDING INTERVALS (for client-friendly reporting)
# Rounds cloud_pct_clear to 5% intervals to show impact while avoiding false precision
CLOUD_INTERVALS <- c(0, 50, 60, 70, 80, 90, 100)  # Used for bucketing: <50%, 50-60%, 60-70%, etc.

# PERCENTILE CALCULATIONS (for robust CI range estimation)
CI_PERCENTILE_LOW <- 0.10                       # 10th percentile
CI_PERCENTILE_HIGH <- 0.90                      # 90th percentile

# HISTORICAL DATA LOOKBACK (for trend calculations)
WEEKS_FOR_FOUR_WEEK_TREND <- 4                  # Use 4 weeks of data for trend
WEEKS_FOR_CV_TREND_SHORT <- 2                   # Compare CV over 2 weeks
WEEKS_FOR_CV_TREND_LONG <- 8                    # Compare CV over 8 weeks

# ============================================================================
# 1. Load required libraries
# ============================================================================

suppressPackageStartupMessages({
  library(here)
  library(sf)
  library(terra)
  library(dplyr)
  library(tidyr)
  library(lubridate)
  library(readr)
  library(readxl)
  library(writexl)
  library(purrr)
  library(furrr)
  library(future)
  tryCatch({
    library(torch)
  }, error = function(e) {
    message("Note: torch package not available - harvest model inference will be skipped")
  })
})

# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
# ============================================================================

PHASE_DEFINITIONS <- data.frame(
  phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
  age_start = c(0, 4, 17, 39),
  age_end = c(6, 16, 39, 200),
  stringsAsFactors = FALSE
)

STATUS_TRIGGERS <- data.frame(
  trigger = c(
    "germination_started",
    "germination_complete",
    "stress_detected_whole_field",
    "strong_recovery",
    "growth_on_track",
    "maturation_progressing",
    "harvest_ready"
  ),
  age_min = c(0, 0, NA, NA, 4, 39, 45),
  age_max = c(6, 6, NA, NA, 39, 200, 200),
  description = c(
    "10% of field CI > 2",
    "70% of field CI >= 2",
    "CI decline > -1.5 + low CV",
    "CI increase > +1.5",
    "CI increasing consistently",
    "High CI, stable/declining",
    "Age 45+ weeks (ready to harvest)"
  ),
  stringsAsFactors = FALSE
)

# ============================================================================
# TILE-AWARE HELPER FUNCTIONS
# ============================================================================

#' Get tile IDs that a field geometry intersects
#'
#' @param field_geom Single field geometry (sf or terra::vect)
#' @param tile_grid Data frame with tile extents (id, xmin, xmax, ymin, ymax)
#' @return Numeric vector of tile IDs that field intersects
#'
get_tile_ids_for_field <- function(field_geom, tile_grid) {
  if (inherits(field_geom, "sf")) {
    field_bbox <- sf::st_bbox(field_geom)
    field_xmin <- field_bbox["xmin"]
    field_xmax <- field_bbox["xmax"]
    field_ymin <- field_bbox["ymin"]
    field_ymax <- field_bbox["ymax"]
  } else if (inherits(field_geom, "SpatVector")) {
    field_bbox <- terra::ext(field_geom)
    field_xmin <- field_bbox$xmin
    field_xmax <- field_bbox$xmax
    field_ymin <- field_bbox$ymin
    field_ymax <- field_bbox$ymax
  } else {
    stop("field_geom must be sf or terra::vect object")
  }

  intersecting_tiles <- tile_grid$id[
    !(tile_grid$xmax < field_xmin |
      tile_grid$xmin > field_xmax |
      tile_grid$ymax < field_ymin |
      tile_grid$ymin > field_ymax)
  ]

  return(as.numeric(intersecting_tiles))
}

#' Load CI tiles that a field intersects
#'
#' @param field_geom Single field geometry
#' @param tile_ids Numeric vector of tile IDs to load
#' @param week_num Week number
#' @param year Year
#' @param mosaic_dir Directory with weekly tiles
#' @return Single CI raster (merged if multiple tiles, or single tile)
#'
load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) {
  if (length(tile_ids) == 0) {
    return(NULL)
  }

  tiles_list <- list()
  for (tile_id in sort(tile_ids)) {
    tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id)
    tile_path <- file.path(mosaic_dir, tile_filename)

    if (file.exists(tile_path)) {
      tryCatch({
        tile_rast <- terra::rast(tile_path)
        ci_band <- terra::subset(tile_rast, 5)
        tiles_list[[length(tiles_list) + 1]] <- ci_band
      }, error = function(e) {
        message(paste("  Warning: Could not load tile", tile_id, ":", e$message))
      })
    }
  }

  if (length(tiles_list) == 0) {
    return(NULL)
  }

  if (length(tiles_list) == 1) {
    return(tiles_list[[1]])
  } else {
    tryCatch({
      rsrc <- terra::sprc(tiles_list)
      merged <- terra::mosaic(rsrc, fun = "max")
      return(merged)
    }, error = function(e) {
      message(paste("  Warning: Could not merge tiles:", e$message))
      return(tiles_list[[1]])
    })
  }
}

#' Build tile grid from available weekly tile files
#'
#' @param mosaic_dir Directory with weekly tiles
#' @param week_num Week number to discover tiles
#' @param year Year to discover tiles
#' @return Data frame with columns: id, xmin, xmax, ymin, ymax
#'
build_tile_grid <- function(mosaic_dir, week_num, year) {
  tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
  tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)

  if (length(tile_files) == 0) {
    stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
  }

  tile_grid <- data.frame(
    id = integer(),
    xmin = numeric(),
    xmax = numeric(),
    ymin = numeric(),
    ymax = numeric(),
    stringsAsFactors = FALSE
  )

  for (tile_file in tile_files) {
    tryCatch({
      matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file)))
      if (length(matches) > 0) {
        tile_id <- as.integer(sub("_|\\.tif", "", matches[1]))
        tile_rast <- terra::rast(tile_file)
        tile_ext <- terra::ext(tile_rast)
        tile_grid <- rbind(tile_grid, data.frame(
          id = tile_id,
          xmin = tile_ext$xmin,
          xmax = tile_ext$xmax,
          ymin = tile_ext$ymin,
          ymax = tile_ext$ymax,
          stringsAsFactors = FALSE
        ))
      }
    }, error = function(e) {
      message(paste("  Warning: Could not process tile", basename(tile_file), ":", e$message))
    })
  }

  if (nrow(tile_grid) == 0) {
    stop("Could not extract extents from any tile files")
  }

  return(tile_grid)
}

# ============================================================================
# HELPER FUNCTIONS FOR NEW COLUMNS (SC-64)
# ============================================================================

#' Categorize four-week trend based on average weekly CI change
#'
#' @param ci_values_list List of CI mean values (chronological order, oldest to newest)
#' @return Character: "strong growth", "growth", "no growth", "decline", "strong decline"
#'
categorize_four_week_trend <- function(ci_values_list) {
  if (is.null(ci_values_list) || length(ci_values_list) < 2) {
    return(NA_character_)
  }

  ci_values_list <- ci_values_list[!is.na(ci_values_list)]
  if (length(ci_values_list) < 2) {
    return(NA_character_)
  }

  # Calculate average weekly change
  weekly_changes <- diff(ci_values_list)
  avg_weekly_change <- mean(weekly_changes, na.rm = TRUE)

  # Categorize based on thresholds
  if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) {
    return("strong growth")
  } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN &&
             avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) {
    return("growth")
  } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) {
    return("no growth")
  } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN &&
             avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
    return("decline")
  } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
    return("strong decline")
  } else {
    return("no growth")  # Default fallback
  }
}

#' Round cloud percentage to 5% intervals for client reporting
#'
#' @param cloud_pct_clear Numeric cloud clear percentage (0-100)
#' @return Character representing the interval bucket
#'
round_cloud_to_intervals <- function(cloud_pct_clear) {
  if (is.na(cloud_pct_clear)) {
    return(NA_character_)
  }

  if (cloud_pct_clear < 50) return("<50%")
  if (cloud_pct_clear < 60) return("50-60%")
  if (cloud_pct_clear < 70) return("60-70%")
  if (cloud_pct_clear < 80) return("70-80%")
  if (cloud_pct_clear < 90) return("80-90%")
  return(">90%")
}

#' Extract CI percentiles (10th and 90th) to avoid outlier distortion
#'
#' @param ci_values Numeric vector of CI values
#' @return Character string: "p10-p90" format
#'
get_ci_percentiles <- function(ci_values) {
  if (is.null(ci_values) || length(ci_values) == 0) {
    return(NA_character_)
  }

  ci_values <- ci_values[!is.na(ci_values)]
  if (length(ci_values) == 0) {
    return(NA_character_)
  }

  p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE)
  p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE)

  return(sprintf("%.1f-%.1f", p10, p90))
}

#' Calculate CV trend between two weeks
#'
#' @param cv_current Current week's CV
#' @param cv_previous Previous week's CV
#' @return Numeric: change in CV (positive = increased heterogeneity)
#'
calculate_cv_trend <- function(cv_current, cv_previous) {
  if (is.na(cv_current) || is.na(cv_previous)) {
    return(NA_real_)
  }
  return(round(cv_current - cv_previous, 4))
}

# ============================================================================
# HELPER FUNCTIONS (FROM 09b)
# ============================================================================

get_phase_by_age <- function(age_weeks) {
  if (is.na(age_weeks)) return(NA_character_)
  for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
    if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
        age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
      return(PHASE_DEFINITIONS$phase[i])
    }
  }
  return("Unknown")
}

get_status_trigger <- function(ci_values, ci_change, age_weeks) {
  if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)

  ci_values <- ci_values[!is.na(ci_values)]
  if (length(ci_values) == 0) return(NA_character_)

  pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
  pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
  ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
  mean_ci <- mean(ci_values, na.rm = TRUE)

  if (age_weeks >= 0 && age_weeks <= 6) {
    if (pct_at_or_above_2 >= 70) {
      return("germination_complete")
    } else if (pct_above_2 > 10) {
      return("germination_started")
    }
  }

  if (age_weeks >= 45) {
    return("harvest_ready")
  }

  if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
    return("stress_detected_whole_field")
  }

  if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
    return("strong_recovery")
  }

  if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
    return("growth_on_track")
  }

  if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
    return("maturation_progressing")
  }

  return(NA_character_)
}

#' Load multiple weeks of CSV data for trend calculations
#'
#' @param project_dir Project name
#' @param current_week Current week number
#' @param reports_dir Reports directory
#' @param num_weeks Number of weeks to load (default 4)
#' @return List with data frames for each week, or NULL if not enough data
#'
load_historical_field_data <- function(project_dir, current_week, reports_dir, num_weeks = 4) {
  historical_data <- list()
  loaded_weeks <- c()

  for (lookback in 0:(num_weeks - 1)) {
    target_week <- current_week - lookback
    if (target_week < 1) target_week <- target_week + 52

    csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", target_week), ".csv")
    csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)

    if (file.exists(csv_path)) {
      tryCatch({
        data <- read_csv(csv_path, show_col_types = FALSE)
        historical_data[[lookback + 1]] <- list(
          week = target_week,
          data = data
        )
        loaded_weeks <- c(loaded_weeks, target_week)
      }, error = function(e) {
        message(paste("  Warning: Could not load week", target_week, ":", e$message))
      })
    }
  }

  if (length(historical_data) == 0) {
    message(paste("Warning: No historical field data found for trend calculations"))
    return(NULL)
  }

  message(paste("Loaded", length(historical_data), "weeks of historical data:",
                paste(loaded_weeks, collapse = ", ")))

  return(historical_data)
}

USE_UNIFORM_AGE <- TRUE
UNIFORM_PLANTING_DATE <- as.Date("2025-01-01")

extract_planting_dates <- function(harvesting_data) {
  if (USE_UNIFORM_AGE) {
    message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
    return(data.frame(
      field_id = character(),
      planting_date = as.Date(character()),
      stringsAsFactors = FALSE
    ))
  }

  if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
    message("Warning: No harvesting data available.")
    return(NULL)
  }

  tryCatch({
    planting_dates <- harvesting_data %>%
      arrange(field, desc(season_start)) %>%
      distinct(field, .keep_all = TRUE) %>%
      select(field, season_start) %>%
      rename(field_id = field, planting_date = season_start) %>%
      filter(!is.na(planting_date)) %>%
      as.data.frame()

    message(paste("Extracted planting dates for", nrow(planting_dates), "fields"))
    return(planting_dates)
  }, error = function(e) {
    message(paste("Error extracting planting dates:", e$message))
    return(NULL)
  })
}

# ============================================================================
# PARALLEL FIELD ANALYSIS FUNCTION
# ============================================================================

#' Analyze single field with SC-64 enhancements
#'
#' @param field_idx Index in field_boundaries_sf
#' @param field_boundaries_sf All field boundaries (sf object)
#' @param tile_grid Data frame with tile extents
#' @param week_num Current week number
#' @param year Current year
#' @param mosaic_dir Directory with weekly tiles
#' @param historical_data Historical weekly data for trend calculations
#' @param planting_dates Planting dates lookup
#' @param report_date Report date
#' @param harvest_imminence_data Harvest imminence predictions (optional)
#'
#' @return Single-row data frame with field analysis including new SC-64 columns
#'
analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year,
                                  mosaic_dir, historical_data = NULL, planting_dates = NULL,
                                  report_date = Sys.Date(), harvest_imminence_data = NULL) {

  tryCatch({
    # Get field info
    field_id <- field_boundaries_sf$field[field_idx]
    farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) {
      field_boundaries_sf$sub_area[field_idx]
    } else {
      NA_character_
    }
    field_name <- field_id

    # Get field geometry and validate
    field_sf <- field_boundaries_sf[field_idx, ]
    if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) {
      return(data.frame(
        Field_id = field_id,
        error = "Empty or invalid geometry"
      ))
    }

    # Calculate field area
    field_area_ha <- as.numeric(sf::st_area(field_sf)) / 10000
    field_area_acres <- field_area_ha / 0.404686

    # Determine which tiles this field intersects
    tile_ids <- get_tile_ids_for_field(field_sf, tile_grid)

    # Load current CI tiles for this field
    current_ci <- load_tiles_for_field(field_sf, tile_ids, week_num, year, mosaic_dir)

    if (is.null(current_ci)) {
      return(data.frame(
        Field_id = field_id,
        error = "No tile data available"
      ))
    }

    # Extract CI values for current field
    field_vect <- terra::vect(sf::as_Spatial(field_sf))
    terra::crs(field_vect) <- terra::crs(current_ci)

    all_extracted <- terra::extract(current_ci, field_vect)[, 2]
    current_ci_vals <- all_extracted[!is.na(all_extracted)]

    # Calculate cloud coverage
    num_total <- length(all_extracted)
    num_data <- sum(!is.na(all_extracted))
    pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0

    cloud_cat <- if (num_data == 0) "No image available"
                 else if (pct_clear >= 99.5) "Clear view"
                 else "Partial coverage"
    cloud_pct <- 100 - pct_clear
    cloud_interval <- round_cloud_to_intervals(pct_clear)  # NEW: Rounded intervals

    if (length(current_ci_vals) == 0) {
      return(data.frame(
        Field_id = field_id,
        error = "No CI values extracted"
      ))
    }

    # Calculate current CI statistics
    mean_ci_current <- mean(current_ci_vals, na.rm = TRUE)
    ci_std <- sd(current_ci_vals, na.rm = TRUE)
    cv_current <- ci_std / mean_ci_current
    range_min <- min(current_ci_vals, na.rm = TRUE)
    range_max <- max(current_ci_vals, na.rm = TRUE)
    range_str <- sprintf("%.1f-%.1f", range_min, range_max)

    # NEW: Get CI percentiles (10th-90th)
    ci_percentiles_str <- get_ci_percentiles(current_ci_vals)

    # Calculate weekly CI change
    weekly_ci_change <- NA
    previous_ci_vals <- NULL

    tryCatch({
      previous_ci <- load_tiles_for_field(field_sf, tile_ids, week_num - 1, year, mosaic_dir)
      if (!is.null(previous_ci)) {
        prev_extracted <- terra::extract(previous_ci, field_vect)[, 2]
        previous_ci_vals <- prev_extracted[!is.na(prev_extracted)]
        if (length(previous_ci_vals) > 0) {
          mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE)
          weekly_ci_change <- mean_ci_current - mean_ci_previous
        }
      }
    }, error = function(e) {
      # Silent fail
    })

    if (is.na(weekly_ci_change)) {
      weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std)
    } else {
      weekly_ci_change_str <- sprintf("%.1f ± %.2f (Δ%.1f)", mean_ci_current, ci_std, weekly_ci_change)
    }

    # Calculate age
    age_weeks <- NA
    if (!is.null(planting_dates) && nrow(planting_dates) > 0) {
      field_planting <- planting_dates %>%
        filter(field_id == !!field_id) %>%
        pull(planting_date)

      if (length(field_planting) > 0) {
        age_weeks <- as.numeric(difftime(report_date, field_planting[1], units = "weeks"))
      }
    }

    if (USE_UNIFORM_AGE) {
      age_weeks <- as.numeric(difftime(report_date, UNIFORM_PLANTING_DATE, units = "weeks"))
    }

    # Calculate germination progress
    pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100
    pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100
    germination_progress_str <- NA_character_
    if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) {
      germination_progress_str <- sprintf("%.0f%%", pct_ci_ge_2)
    }

    # Assign phase and trigger
    phase <- "Unknown"
    imminent_prob_val <- NA
    if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
      imminence_row <- harvest_imminence_data %>%
        filter(field_id == !!field_id)
      if (nrow(imminence_row) > 0) {
        imminent_prob_val <- imminence_row$probability[1]
        if (imminent_prob_val > 0.5) {
          phase <- "Harvest Imminent (Model)"
        }
      }
    }

    if (phase == "Unknown") {
      phase <- get_phase_by_age(age_weeks)
    }

    status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks)

    nmr_weeks_in_phase <- 1

    # NEW: Load historical data to calculate four_week_trend
    four_week_trend <- NA_character_
    ci_values_for_trend <- c(mean_ci_current)

    if (!is.null(historical_data) && length(historical_data) > 0) {
      # Extract this field's CI mean values from historical weeks
      for (hist in historical_data) {
        hist_week <- hist$week
        hist_data <- hist$data

        field_row <- hist_data %>%
          filter(Field_id == !!field_id)

        if (nrow(field_row) > 0 && !is.na(field_row$Mean_CI[1])) {
          ci_values_for_trend <- c(field_row$Mean_CI[1], ci_values_for_trend)
        }
      }

      if (length(ci_values_for_trend) >= 2) {
        four_week_trend <- categorize_four_week_trend(ci_values_for_trend)
      }
    }

    # NEW: Load previous weeks for CV trends
    cv_trend_short <- NA_real_
    cv_trend_long <- NA_real_

    if (!is.null(historical_data) && length(historical_data) > 0) {
      # CV from 2 weeks ago (short-term trend)
      if (length(historical_data) >= 2) {
        cv_2w <- historical_data[[2]]$data %>%
          filter(Field_id == !!field_id) %>%
          pull(CV)
        if (length(cv_2w) > 0 && !is.na(cv_2w[1])) {
          cv_trend_short <- calculate_cv_trend(cv_current, cv_2w[1])
        }
      }

      # CV from 8 weeks ago (long-term trend)
      if (length(historical_data) >= 8) {
        cv_8w <- historical_data[[8]]$data %>%
          filter(Field_id == !!field_id) %>%
          pull(CV)
        if (length(cv_8w) > 0 && !is.na(cv_8w[1])) {
          cv_trend_long <- calculate_cv_trend(cv_current, cv_8w[1])
        }
      }
    }

    # NEW: Last harvest/planting date (from harvesting_data if available)
    last_harvest_date <- NA_character_
    if (!is.null(harvesting_data) && nrow(harvesting_data) > 0) {
      last_harvest_row <- harvesting_data %>%
        filter(field == !!field_id) %>%
        arrange(desc(season_start)) %>%
        slice(1)

      if (nrow(last_harvest_row) > 0 && !is.na(last_harvest_row$season_start[1])) {
        last_harvest_date <- as.character(last_harvest_row$season_start[1])
      }
    }

    # Compile result with all SC-64 columns
    result <- data.frame(
      Field_id = field_id,
      Farm_Section = farm_section,
      Field_name = field_name,
      Hectare = round(field_area_ha, 2),
      Acreage = round(field_area_acres, 2),
      Mean_CI = round(mean_ci_current, 2),
      Weekly_ci_change = if (is.na(weekly_ci_change)) NA_real_ else round(weekly_ci_change, 2),
      Weekly_ci_change_str = weekly_ci_change_str,
      Four_week_trend = four_week_trend,  # NEW
      Last_harvest_or_planting_date = last_harvest_date,  # NEW
      Age_week = if (is.na(age_weeks)) NA_integer_ else as.integer(round(age_weeks)),
      `Phase (age based)` = phase,
      nmr_weeks_in_this_phase = nmr_weeks_in_phase,
      Germination_progress = germination_progress_str,
      Imminent_prob = imminent_prob_val,
      Status_trigger = status_trigger,
      CI_range = range_str,
      CI_Percentiles = ci_percentiles_str,  # NEW
      CV = round(cv_current, 4),
      CV_Trend_Short_Term = cv_trend_short,  # NEW (2-week)
      CV_Trend_Long_Term = cv_trend_long,  # NEW (8-week)
      Cloud_pct_clear = pct_clear,
      Cloud_pct_clear_interval = cloud_interval,  # NEW: Rounded intervals
      Cloud_pct = cloud_pct,
      Cloud_category = cloud_cat,
      stringsAsFactors = FALSE
    )

    return(result)

  }, error = function(e) {
    message(paste("Error analyzing field", field_idx, ":", e$message))
    return(data.frame(
      Field_id = NA_character_,
      error = e$message
    ))
  })
}

# ============================================================================
# SUMMARY GENERATION
# ============================================================================

generate_field_analysis_summary <- function(field_df) {
  message("Generating summary statistics...")

  total_acreage <- sum(field_df$Acreage, na.rm = TRUE)

  germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
  tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
  grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
  maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)
  unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE)

  harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
  stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
  recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
  growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
  germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
  germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
  no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)

  clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
  partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
  no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
  total_fields <- nrow(field_df)

  clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
  partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
  no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)

  summary_df <- data.frame(
    Category = c(
      "--- PHASE DISTRIBUTION ---",
      "Germination",
      "Tillering",
      "Grand Growth",
      "Maturation",
      "Unknown phase",
      "--- STATUS TRIGGERS ---",
      "Harvest ready",
      "Stress detected",
      "Strong recovery",
      "Growth on track",
      "Germination complete",
      "Germination started",
      "No trigger",
      "--- CLOUD COVERAGE (FIELDS) ---",
      "Clear view",
      "Partial coverage",
      "No image available",
      "--- CLOUD COVERAGE (ACREAGE) ---",
      "Clear view",
      "Partial coverage",
      "No image available",
      "--- TOTAL ---",
      "Total Acreage"
    ),
    Acreage = c(
      NA,
      round(germination_acreage, 2),
      round(tillering_acreage, 2),
      round(grand_growth_acreage, 2),
      round(maturation_acreage, 2),
      round(unknown_phase_acreage, 2),
      NA,
      round(harvest_ready_acreage, 2),
      round(stress_acreage, 2),
      round(recovery_acreage, 2),
      round(growth_on_track_acreage, 2),
      round(germination_complete_acreage, 2),
      round(germination_started_acreage, 2),
      round(no_trigger_acreage, 2),
      NA,
      paste0(clear_fields, " fields"),
      paste0(partial_fields, " fields"),
      paste0(no_image_fields, " fields"),
      NA,
      round(clear_acreage, 2),
      round(partial_acreage, 2),
      round(no_image_acreage, 2),
      NA,
      round(total_acreage, 2)
    ),
    stringsAsFactors = FALSE
  )

  attr(summary_df, "cloud_fields_clear") <- clear_fields
  attr(summary_df, "cloud_fields_partial") <- partial_fields
  attr(summary_df, "cloud_fields_no_image") <- no_image_fields
  attr(summary_df, "cloud_fields_total") <- total_fields

  return(summary_df)
}

# ============================================================================
# EXPORT FUNCTIONS
# ============================================================================

export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
  message("Exporting per-field analysis to Excel, CSV, and RDS...")

  output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
  if (!dir.exists(output_subdir)) {
    dir.create(output_subdir, recursive = TRUE)
  }

  excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), "_test.xlsx")
  excel_path <- file.path(output_subdir, excel_filename)
  excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)

  sheets <- list(
    "Field Data" = field_df,
    "Summary" = summary_df
  )

  write_xlsx(sheets, excel_path)
  message(paste("✓ Field analysis Excel exported to:", excel_path))

  kpi_data <- list(
    field_analysis = field_df,
    field_analysis_summary = summary_df,
    metadata = list(
      current_week = current_week,
      project = project_dir,
      created_at = Sys.time()
    )
  )

  rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
  rds_path <- file.path(reports_dir, "kpis", rds_filename)

  saveRDS(kpi_data, rds_path)
  message(paste("✓ Field analysis RDS exported to:", rds_path))

  csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".csv")
  csv_path <- file.path(output_subdir, csv_filename)
  write_csv(field_df, csv_path)
  message(paste("✓ Field analysis CSV exported to:", csv_path))

  return(list(excel = excel_path, rds = rds_path, csv = csv_path))
}

# ============================================================================
# MAIN
# ============================================================================

main <- function() {
  args <- commandArgs(trailingOnly = TRUE)

  end_date <- if (length(args) >= 1 && !is.na(args[1])) {
    as.Date(args[1])
  } else if (exists("end_date_str", envir = .GlobalEnv)) {
    as.Date(get("end_date_str", envir = .GlobalEnv))
  } else {
    Sys.Date()
  }

  project_dir <- if (length(args) >= 2 && !is.na(args[2])) {
    as.character(args[2])
  } else if (exists("project_dir", envir = .GlobalEnv)) {
    get("project_dir", envir = .GlobalEnv)
  } else {
    "angata"
  }

  assign("project_dir", project_dir, envir = .GlobalEnv)

  source(here("r_app", "crop_messaging_utils.R"))
  source(here("r_app", "parameters_project.R"))

  message("=== FIELD ANALYSIS WEEKLY (SC-64 ENHANCEMENTS) ===")
  message(paste("Date:", end_date))
  message(paste("Project:", project_dir))
  message("")
  message("CONFIGURATION:")
  message(paste("  Four-week trend thresholds: growth >= ", FOUR_WEEK_TREND_GROWTH_MIN,
                ", strong growth >= ", FOUR_WEEK_TREND_STRONG_GROWTH_MIN, sep = ""))
  message(paste("  CV trend significant threshold:", CV_TREND_THRESHOLD_SIGNIFICANT))
  message(paste("  Cloud intervals:", paste(CLOUD_INTERVALS, collapse = ", ")))
  message("")

  current_week <- as.numeric(format(end_date, "%V"))
  year <- as.numeric(format(end_date, "%Y"))
  previous_week <- current_week - 1
  if (previous_week < 1) previous_week <- 52

  message(paste("Week:", current_week, "/ Year:", year))

  message("Building tile grid from available weekly tiles...")
  tile_grid <- build_tile_grid(weekly_tile_max, current_week, year)
  message(paste("  Found", nrow(tile_grid), "tiles for analysis"))

  tryCatch({
    boundaries_result <- load_field_boundaries(data_dir)

    if (is.list(boundaries_result) && "field_boundaries_sf" %in% names(boundaries_result)) {
      field_boundaries_sf <- boundaries_result$field_boundaries_sf
    } else {
      field_boundaries_sf <- boundaries_result
    }

    if (!is.data.frame(field_boundaries_sf) && !inherits(field_boundaries_sf, "sf")) {
      stop("field_boundaries_sf is not a valid SF object")
    }

    if (nrow(field_boundaries_sf) == 0) {
      stop("No fields loaded from boundaries")
    }

    message(paste("  Loaded", nrow(field_boundaries_sf), "fields from boundaries"))
  }, error = function(e) {
    stop("ERROR loading field boundaries: ", e$message,
         "\nCheck that pivot.geojson exists in ", data_dir)
  })

  # Load historical data for trend calculations
  message("Loading historical field data for trend calculations...")
  num_weeks_to_load <- if (TEST_MODE) TEST_MODE_NUM_WEEKS else max(WEEKS_FOR_FOUR_WEEK_TREND, WEEKS_FOR_CV_TREND_LONG)
  if (TEST_MODE) {
    message(paste("  TEST MODE: Loading only", num_weeks_to_load, "weeks of historical data"))
  }
  historical_data <- load_historical_field_data(
    project_dir, current_week, reports_dir,
    num_weeks = num_weeks_to_load
  )

  planting_dates <- extract_planting_dates(harvesting_data)

  message("Setting up parallel processing...")
  current_plan <- class(future::plan())[1]
  if (current_plan == "sequential") {
    num_workers <- parallel::detectCores() - 1
    message(paste("  Using", num_workers, "workers for parallel processing"))
    future::plan(future::multisession, workers = num_workers)
  } else {
    message(paste("  Using existing plan:", current_plan))
  }

  message("Analyzing fields in parallel...")

  field_analysis_list <- furrr::future_map(
    seq_len(nrow(field_boundaries_sf)),
    ~ analyze_single_field(
      field_idx = .,
      field_boundaries_sf = field_boundaries_sf,
      tile_grid = tile_grid,
      week_num = current_week,
      year = year,
      mosaic_dir = weekly_tile_max,
      historical_data = historical_data,
      planting_dates = planting_dates,
      report_date = end_date,
      harvest_imminence_data = NULL
    ),
    .progress = TRUE,
    .options = furrr::furrr_options(seed = TRUE)
  )

  field_analysis_df <- dplyr::bind_rows(field_analysis_list)

  if (nrow(field_analysis_df) == 0) {
    stop("No fields analyzed successfully!")
  }

  message(paste("✓ Analyzed", nrow(field_analysis_df), "fields"))

  summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)

  export_paths <- export_field_analysis_excel(
    field_analysis_df,
    summary_statistics_df,
    project_dir,
    current_week,
    reports_dir
  )

  cat("\n=== FIELD ANALYSIS SUMMARY ===\n")
  cat("Fields analyzed:", nrow(field_analysis_df), "\n")
  cat("Excel export:", export_paths$excel, "\n")
  cat("RDS export:", export_paths$rds, "\n")
  cat("CSV export:", export_paths$csv, "\n\n")

  cat("--- Per-field results (first 10) ---\n")
  available_cols <- c("Field_id", "Acreage", "Age_week", "Mean_CI",
                     "Four_week_trend", "Status_trigger", "Cloud_category")
  available_cols <- available_cols[available_cols %in% names(field_analysis_df)]
  if (length(available_cols) > 0) {
    print(head(field_analysis_df[, available_cols], 10))
  } else {
    print(head(field_analysis_df, 10))
  }

  cat("\n--- Summary statistics ---\n")
  print(summary_statistics_df)

  message("\n✓ Field analysis complete!")
}

if (sys.nframe() == 0) {
  main()
}