SmartCane/r_app/old_scripts/09_field_analysis_weekly.R

# 09_FIELD_ANALYSIS_WEEKLY.R
# ==========================
# Per-field weekly analysis with phase detection and status triggers
# Generates detailed field-level CSV export with:
#   - Field identifiers and areas
#   - Weekly CI change (mean ± std)
#   - Age-based phase assignment (Germination, Tillering, Grand Growth, Maturation)
#   - Harvest imminence detection (Phase 1 from LSTM model)
#   - Status triggers (non-exclusive, can coexist with harvest imminent phase)
#   - Phase transition tracking (weeks in current phase)
#   - Cloud coverage analysis from 8-band satellite data (band 9 = cloud mask)
#
# Harvest Imminence:
#   - Runs LSTM Phase 1 detection to get imminent_prob (probability harvest in next ~4 weeks)
#   - If imminent_prob > 0.5, phase = "Harvest Imminent" (overrides age-based phase)
#   - Weekly predictions exported to separate Excel for tracking
#
# Cloud Coverage Categories (from band 9: 1=clear, 0=cloudy):
#   - Clear view: >=99.5% clear pixels (100% practical coverage)
#   - Partial coverage: 0-99.5% clear pixels (some cloud interference)
#   - No image available: 0% clear pixels (completely clouded)
#
# Output:
#   - Excel (.xlsx) with Field Data sheet and Summary sheet
#   - Excel (.xlsx) weekly harvest predictions for tracking
#   - RDS file with field_analysis and field_analysis_summary for Rmd reports
#   - Summary includes: Monitored area, Cloud coverage, Phase distribution, Status triggers
#
# Usage: Rscript 09_field_analysis_weekly.R [end_date] [offset] [project_dir]
#   - end_date: End date for analysis (YYYY-MM-DD format), default: today
#   - offset: Number of days to look back (for consistency, not currently used)
#   - project_dir: Project directory name (e.g., "aura", "esa", "angata")

# 1. Load required libraries
suppressPackageStartupMessages({
  library(here)
  library(sf)
  library(terra)
  library(dplyr)
  library(tidyr)
  library(lubridate)
  library(readr)
  library(readxl)
  library(writexl)
  # Optional: torch for harvest model inference (will skip if not available)
  tryCatch({
    library(torch)
  }, error = function(e) {
    message("Note: torch package not available - harvest model inference will be skipped")
  })
})
# ============================================================================
# PHASE AND STATUS TRIGGER DEFINITIONS
# ============================================================================

PHASE_DEFINITIONS <- data.frame(
  phase = c("Germination", "Tillering", "Grand Growth", "Maturation"),
  age_start = c(0, 4, 17, 39),
  age_end = c(6, 16, 39, 200),
  stringsAsFactors = FALSE
)

STATUS_TRIGGERS <- data.frame(
  trigger = c(
    "germination_started",
    "germination_complete",
    "stress_detected_whole_field",
    "strong_recovery",
    "growth_on_track",
    "maturation_progressing",
    "harvest_ready"
  ),
  age_min = c(0, 0, NA, NA, 4, 39, 45),
  age_max = c(6, 6, NA, NA, 39, 200, 200),
  description = c(
    "10% of field CI > 2",
    "70% of field CI >= 2",
    "CI decline > -1.5 + low CV",
    "CI increase > +1.5",
    "CI increasing consistently",
    "High CI, stable/declining",
    "Age 45+ weeks (ready to harvest)"
  ),
  stringsAsFactors = FALSE
)

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

get_phase_by_age <- function(age_weeks) {
  if (is.na(age_weeks)) return(NA_character_)
  for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
    if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
        age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
      return(PHASE_DEFINITIONS$phase[i])
    }
  }
  return("Unknown")
}

get_status_trigger <- function(ci_values, ci_change, age_weeks) {
  if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)

  ci_values <- ci_values[!is.na(ci_values)]
  if (length(ci_values) == 0) return(NA_character_)

  pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
  pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
  ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
  mean_ci <- mean(ci_values, na.rm = TRUE)

  # Germination phase triggers (age 0-6)
  if (age_weeks >= 0 && age_weeks <= 6) {
    if (pct_at_or_above_2 >= 70) {
      return("germination_complete")
    } else if (pct_above_2 > 10) {
      return("germination_started")
    }
  }

  # Harvest ready (45+ weeks) - check first to prioritize
  if (age_weeks >= 45) {
    return("harvest_ready")
  }

  # Stress detection (any phase except Germination)
  if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
    return("stress_detected_whole_field")
  }

  # Strong recovery (any phase except Germination)
  if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
    return("strong_recovery")
  }

  # Growth on track (Tillering/Grand Growth, 4-39 weeks)
  if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
    return("growth_on_track")
  }

  # Maturation progressing (39-45 weeks, high CI stable/declining)
  if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
    return("maturation_progressing")
  }

  return(NA_character_)
}

load_previous_week_csv <- function(project_dir, current_week, reports_dir) {
  lookback_weeks <- c(1, 2, 3)

  for (lookback in lookback_weeks) {
    previous_week <- current_week - lookback
    if (previous_week < 1) previous_week <- previous_week + 52

    csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", previous_week), ".csv")
    csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)

    if (file.exists(csv_path)) {
      tryCatch({
        prev_data <- read.csv(csv_path, stringsAsFactors = FALSE)
        message(if (lookback == 1)
                  paste("Loaded previous week CSV (week", previous_week, ")")
                else
                  paste("Loaded fallback CSV from", lookback, "weeks ago (week", previous_week, ")"))
        return(list(data = prev_data, weeks_lookback = lookback, found = TRUE))
      }, error = function(e) {
        message(paste("Warning: Could not read CSV from week", previous_week))
      })
    }
  }

  message("No previous field analysis CSV found. Phase tracking will be age-based only.")
  return(list(data = NULL, weeks_lookback = NA, found = FALSE))
}

# ============================================================================
# UNIFORM AGE MODE - Using fixed planting date for all fields
# TO SWITCH BACK: Set USE_UNIFORM_AGE <- FALSE and uncomment original code
# ============================================================================
USE_UNIFORM_AGE <- TRUE
UNIFORM_PLANTING_DATE <- as.Date("2025-01-01")  # All fields planted this date

extract_planting_dates <- function(harvesting_data) {
  if (USE_UNIFORM_AGE) {
    message(paste("Using uniform planting date for all fields:", UNIFORM_PLANTING_DATE))
    return(data.frame(
      field_id = NA,  # NA = apply to all fields
      planting_date = UNIFORM_PLANTING_DATE,
      stringsAsFactors = FALSE
    ))
  }

  # ORIGINAL CODE (commented out - uncomment when switching back to real harvest data):
  # if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
  #   message("Warning: No harvesting data available.")
  #   return(NULL)
  # }
  #
  # tryCatch({
  #   # Get the MOST RECENT season_start for each field (most recent row per field)
  #   planting_dates <- harvesting_data %>%
  #     arrange(field, desc(season_start)) %>%
  #     distinct(field, .keep_all = TRUE) %>%
  #     select(field, season_start) %>%
  #     rename(field_id = field, planting_date = season_start) %>%
  #     filter(!is.na(planting_date)) %>%
  #     as.data.frame()
  #
  #   message(paste("Extracted planting dates for", nrow(planting_dates), "fields (most recent season)"))
  #   return(planting_dates)
  # }, error = function(e) {
  #   message(paste("Error extracting planting dates:", e$message))
  #   return(NULL)
  # })
}

# ============================================================================
# CLOUD ANALYSIS FUNCTION
# ============================================================================

calculate_field_cloud_coverage <- function(field_boundaries_sf, merged_tif_8b_dir, current_week, current_year) {
  message("Calculating per-field cloud coverage from weekly cloud mosaic...")

  # Check if cloud data directory exists
  if (!dir.exists(merged_tif_8b_dir)) {
    message(paste("Warning: Cloud data directory not found:", merged_tif_8b_dir))
    return(NULL)
  }

  # Get all TIF files (format: YYYY-MM-DD.tif)
  tif_files <- list.files(merged_tif_8b_dir, pattern = "\\.tif$", full.names = TRUE)

  if (length(tif_files) == 0) {
    message("Warning: No 8-band TIF files found in cloud data directory")
    return(NULL)
  }

  # Extract dates from filenames and filter for current week
  file_dates <- as.Date(gsub("\\.tif$", "", basename(tif_files)))
  current_week_date <- as.Date(paste(current_year, "01-01", sep = "-")) + (current_week - 1) * 7
  week_start <- current_week_date - as.numeric(format(current_week_date, "%w"))
  week_end <- week_start + 6

  week_files <- tif_files[file_dates >= week_start & file_dates <= week_end]

  if (length(week_files) == 0) {
    message(paste("Warning: No 8-band files found for week", current_week))
    return(NULL)
  }

  message(paste("Found", length(week_files), "cloud mask files for week", current_week))

  # Step 1: Create weekly cloud mosaic using max aggregation (same as CI mosaic)
  message("Creating weekly cloud mosaic using max aggregation...")

  cloud_rasters <- list()
  for (i in seq_along(week_files)) {
    tryCatch({
      r <- terra::rast(week_files[i])

      # Band 9 is the cloud mask (1 = clear, 0 = cloudy)
      if (nlyr(r) < 9) {
        message(paste("  Warning: File has only", nlyr(r), "bands, need 9 (band 9 = cloud mask)"))
        next
      }

      cloud_band <- r[[9]]
      cloud_rasters[[i]] <- cloud_band
    }, error = function(e) {
      message(paste("  Error processing", basename(week_files[i]), ":", e$message))
    })
  }

  if (length(cloud_rasters) == 0) {
    message("Warning: No valid cloud mask data found")
    return(NULL)
  }

  # Create mosaic: use max value to prefer clear pixels (1 = clear, 0 = cloudy)
  if (length(cloud_rasters) == 1) {
    cloud_mosaic <- cloud_rasters[[1]]
  } else {
    # Filter out NULL entries
    cloud_rasters <- cloud_rasters[!sapply(cloud_rasters, is.null)]
    rsrc <- terra::sprc(cloud_rasters)
    cloud_mosaic <- terra::mosaic(rsrc, fun = "max")
  }

  message("Weekly cloud mosaic created")

  # Step 2: Extract cloud coverage per field from the mosaic
  message("Extracting cloud coverage per field...")

  cloud_summary <- data.frame(
    field_id = character(),
    pct_clear = numeric(),
    cloud_category = character(),
    stringsAsFactors = FALSE
  )

  for (i in seq_len(nrow(field_boundaries_sf))) {
    tryCatch({
      field_id <- field_boundaries_sf$field[i]
      field_geom <- terra::vect(sf::as_Spatial(field_boundaries_sf[i, ]))

      # Extract cloud values from mosaic (1 = clear, 0 = cloudy)
      cloud_vals <- terra::extract(cloud_mosaic, field_geom)[, 2]
      cloud_vals <- cloud_vals[!is.na(cloud_vals)]

      if (length(cloud_vals) > 0) {
        # Calculate % clear (value 1 = clear, 0 = cloudy)
        pct_clear <- (sum(cloud_vals == 1) / length(cloud_vals)) * 100

        # Categorize: 100% = Clear, 0-99.9% = Partial, 0% = No Image
        if (pct_clear >= 99.5) {
          category <- "Clear view"
        } else if (pct_clear > 0 && pct_clear < 99.5) {
          category <- "Partial coverage"
        } else {
          category <- "No image available"
        }

        cloud_summary <- rbind(cloud_summary, data.frame(
          field_id = field_id,
          pct_clear = round(pct_clear, 1),
          cloud_category = category,
          stringsAsFactors = FALSE
        ))
      }
    }, error = function(e) {
      message(paste("  Error extracting cloud data for field", field_id, ":", e$message))
    })
  }

  message(paste("Cloud analysis complete for", nrow(cloud_summary), "fields"))
  return(cloud_summary)
}

#' Calculate per-field cloud coverage from weekly mosaic
#'
#' @param mosaic_raster The weekly CI mosaic raster (already cloud-masked)
#' @param field_boundaries_sf Field boundaries as sf object
#' @return Data frame with per-field cloud coverage (clear acres, % clear, category)
#'
calculate_cloud_coverage_from_mosaic <- function(mosaic_raster, field_boundaries_sf) {
  message("Calculating per-field cloud coverage from weekly mosaic...")

  if (is.null(mosaic_raster) || class(mosaic_raster)[1] != "SpatRaster") {
    message("Warning: Invalid mosaic raster provided")
    return(NULL)
  }

  # Extract CI band (last band)
  ci_band <- mosaic_raster[[nlyr(mosaic_raster)]]

  # Ensure CRS matches
  if (!terra::same.crs(ci_band, field_boundaries_sf)) {
    field_boundaries_sf <- sf::st_transform(field_boundaries_sf, sf::st_crs(ci_band))
  }

  cloud_data <- data.frame(
    field_id = character(),
    sub_field = character(),
    clear_pixels = numeric(),
    total_pixels = numeric(),
    missing_pixels = numeric(),
    pct_clear = numeric(),
    cloud_category = character(),
    stringsAsFactors = FALSE
  )

  # Process each field
  for (i in seq_len(nrow(field_boundaries_sf))) {
    field_id <- field_boundaries_sf$field[i]
    sub_field <- field_boundaries_sf$sub_field[i]

    # Extract pixel values from field using terra::extract (memory-efficient, native terra)
    extracted_vals <- tryCatch({
      result <- terra::extract(ci_band, field_boundaries_sf[i, ], cells = TRUE, na.rm = FALSE)
      # terra::extract returns a data.frame with ID and value columns; extract values only
      if (is.data.frame(result) && nrow(result) > 0) {
        data.frame(value = result$value, cells = result$cell)
      } else {
        NULL
      }
    }, error = function(e) {
      NULL
    })

    # Skip if extraction failed or returned empty
    if (is.null(extracted_vals) || !is.data.frame(extracted_vals) || nrow(extracted_vals) == 0) {
      next
    }

    # Count pixels: data vs missing
    ci_vals <- extracted_vals$value
    num_data <- sum(!is.na(ci_vals))
    num_total <- length(ci_vals)
    num_missing <- num_total - num_data
    pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0

    # Categorize
    category <- if (pct_clear >= 85) "Clear view"
                else if (pct_clear > 0) "Partial coverage"
                else "No image available"

    # Store result
    cloud_data <- rbind(cloud_data, data.frame(
      field_id = field_id,
      sub_field = sub_field,
      clear_pixels = num_data,
      total_pixels = num_total,
      missing_pixels = num_missing,
      pct_clear = pct_clear,
      cloud_category = category,
      stringsAsFactors = FALSE
    ))
  }

  message(paste("Per-field cloud analysis complete for", nrow(cloud_data), "fields"))
  return(cloud_data)
}

# ============================================================================
# HARVEST IMMINENCE DETECTION FUNCTION
# ============================================================================

calculate_harvest_imminence <- function(ci_data, field_boundaries_sf, harvest_model_dir,
                                        imminent_threshold = 0.5) {
  message("Calculating harvest imminence probabilities...")

  # Check if model files exist
  model_path <- file.path(harvest_model_dir, "model.pt")
  config_path <- file.path(harvest_model_dir, "config.json")
  scalers_path <- file.path(harvest_model_dir, "scalers.pkl")

  if (!file.exists(model_path) || !file.exists(config_path) || !file.exists(scalers_path)) {
    message("Warning: Harvest model files not found in", harvest_model_dir)
    return(NULL)
  }

  tryCatch({
    # Load model using harvest_date_pred_utils
    # Note: This requires torch package and Python utilities, skip if not available
    harvest_utils_path <- file.path(harvest_model_dir, "harvest_date_pred_utils.R")

    if (!file.exists(harvest_utils_path)) {
      message("Note: Harvest utils R file not found, using age-based trigger only")
      return(NULL)
    }

    source(harvest_utils_path)

    # Load model, config, and scalers
    model <- torch::torch_load(model_path)
    model$eval()

    config <- jsonlite::read_json(config_path)
    scalers <- reticulate::py_load_object(scalers_path)

    # Run Phase 1 detection for each field
    harvest_results <- data.frame(
      field_id = character(),
      imminent_prob = numeric(),
      harvest_imminent = logical(),
      stringsAsFactors = FALSE
    )

    for (field in unique(ci_data$field)) {
      tryCatch({
        field_data <- ci_data %>%
          filter(field == !!field) %>%
          arrange(Date)

        if (nrow(field_data) < 10) {
          message(paste("  Skipping field", field, "- insufficient CI data"))
          next
        }

        # Extract CI values
        ci_vals <- field_data$value

        # Run Phase 1 (growing window detection with imminent probability)
        phase1_result <- run_phase1_growing_window(
          ci_values = ci_vals,
          scalers = scalers,
          config = config,
          model = model
        )

        if (!is.null(phase1_result)) {
          imminent_prob <- phase1_result$imminent_prob
          harvest_imminent <- imminent_prob > imminent_threshold

          harvest_results <- rbind(harvest_results, data.frame(
            field_id = field,
            imminent_prob = round(imminent_prob, 3),
            harvest_imminent = harvest_imminent,
            stringsAsFactors = FALSE
          ))
        }
      }, error = function(e) {
        message(paste("  Error processing field", field, ":", e$message))
      })
    }

    message(paste("Harvest imminence detection complete for", nrow(harvest_results), "fields"))
    return(harvest_results)

  }, error = function(e) {
    message(paste("Error in harvest imminence calculation:", e$message))
    return(NULL)
  })
}

# ============================================================================
# MAIN CALCULATION FUNCTION
# ============================================================================

calculate_per_field_analysis <- function(current_ci, previous_ci, field_boundaries_sf,
                                         previous_week_result = NULL, planting_dates = NULL, report_date = Sys.Date(),
                                         cloud_data = NULL, harvest_imminence_data = NULL) {
  message("Calculating per-field analysis...")

  # Debug CRS info
  message(paste("  Current CI CRS:", terra::crs(current_ci)))
  message(paste("  Field boundaries CRS:", sf::st_crs(field_boundaries_sf)$input))

  # Debug mosaic info
  ci_ext <- terra::ext(current_ci)
  ci_summary <- terra::global(current_ci, "range", na.rm = TRUE)
  message(paste("  Mosaic extent:", paste(round(c(ci_ext$xmin, ci_ext$xmax, ci_ext$ymin, ci_ext$ymax), 4), collapse=", ")))
  message(paste("  Mosaic value range:", paste(round(as.numeric(ci_summary), 4), collapse=" to ")))

  previous_week_csv <- NULL
  weeks_lookback <- NA
  if (!is.null(previous_week_result) && previous_week_result$found) {
    previous_week_csv <- previous_week_result$data
    weeks_lookback <- previous_week_result$weeks_lookback
  }

  field_analysis <- list()
  field_count <- 0

  for (i in seq_len(nrow(field_boundaries_sf))) {
    field_id <- field_boundaries_sf$field[i]
    farm_section <- if ("sub_area" %in% names(field_boundaries_sf)) {
      field_boundaries_sf$sub_area[i]
    } else {
      NA_character_
    }
    field_name <- field_boundaries_sf$field[i]

    # Check if geometry is valid and non-empty
    field_sf <- field_boundaries_sf[i, ]
    if (sf::st_is_empty(field_sf) || any(is.na(sf::st_geometry(field_sf)))) {
      message(paste("  Skipping", field_id, "- invalid or empty geometry"))
      next
    }

    # Calculate field area from geometry (in hectares)
    tryCatch({
      field_geom_calc <- terra::vect(sf::as_Spatial(field_sf))
      terra::crs(field_geom_calc) <- terra::crs(current_ci)  # Set CRS to match
      field_area_ha <- terra::expanse(field_geom_calc) / 10000  # Convert m² to hectares
      field_area_acres <- field_area_ha / 0.404686  # Convert to acres
    }, error = function(e) {
      message(paste("  Skipping", field_id, "- error calculating geometry:", e$message))
      next
    })

    # Extract current CI values with CRS validation
    tryCatch({
      field_geom <- terra::vect(sf::as_Spatial(field_sf))
      # Set CRS explicitly to match raster (as_Spatial loses CRS info)
      terra::crs(field_geom) <- terra::crs(current_ci)

      extract_result <- terra::extract(current_ci, field_geom)
      current_ci_vals <- extract_result[, 2]
      current_ci_vals <- current_ci_vals[!is.na(current_ci_vals)]
    }, error = function(e) {
      message(paste("  Error extracting CI for", field_id, ":", e$message))
      current_ci_vals <<- c()
    })

    if (length(current_ci_vals) == 0) {
      if (i <= 5) {  # Debug first 5 fields only
        message(paste("  DEBUG: Field", field_id, "- extract returned empty, geometry may not overlap or all NA"))
      }
      next
    }

    # Calculate CI statistics
    mean_ci_current <- mean(current_ci_vals, na.rm = TRUE)
    cv_current <- sd(current_ci_vals, na.rm = TRUE) / mean_ci_current
    range_min <- min(current_ci_vals, na.rm = TRUE)
    range_max <- max(current_ci_vals, na.rm = TRUE)

    # Calculate weekly CI change
    weekly_ci_change <- NA
    range_str <- sprintf("%.1f-%.1f", range_min, range_max)
    if (!is.null(previous_ci)) {
      previous_ci_vals <- terra::extract(previous_ci, field_geom)[, 2]
      previous_ci_vals <- previous_ci_vals[!is.na(previous_ci_vals)]
      if (length(previous_ci_vals) > 0) {
        mean_ci_previous <- mean(previous_ci_vals, na.rm = TRUE)
        weekly_ci_change <- mean_ci_current - mean_ci_previous
      }
    }

    # Format CI change
    ci_std <- sd(current_ci_vals, na.rm = TRUE)
    if (is.na(weekly_ci_change)) {
      weekly_ci_change_str <- sprintf("%.1f ± %.2f", mean_ci_current, ci_std)
    } else {
      direction <- if (weekly_ci_change > 0) "+" else ""
      weekly_ci_change_str <- sprintf("%s%.1f (%.1f ± %.2f)", direction, weekly_ci_change, mean_ci_current, ci_std)
    }

    # Get field age from most recent season_start in harvest.xlsx (relative to report_date)
    age_weeks <- NA
    if (!is.null(planting_dates)) {
      # Check for exact field match OR if field_id is NA (uniform age mode)
      planting_row <- which(tolower(planting_dates$field_id) == tolower(field_id) | is.na(planting_dates$field_id))
      if (length(planting_row) > 0) {
        planting_date <- as.Date(planting_dates$planting_date[planting_row[1]])
        age_weeks <- as.integer(difftime(report_date, planting_date, units = "weeks"))
      }
    }

    # Calculate germination progression (for germination phase only)
    pct_ci_above_2 <- sum(current_ci_vals > 2) / length(current_ci_vals) * 100
    pct_ci_ge_2 <- sum(current_ci_vals >= 2) / length(current_ci_vals) * 100
    germination_progress_str <- NA_character_
    if (!is.na(age_weeks) && age_weeks >= 0 && age_weeks <= 6) {
      germination_progress_str <- sprintf("%.0f%% > 2, %.0f%% ≥ 2", pct_ci_above_2, pct_ci_ge_2)
    }

    # Assign phase and trigger
    # Priority: Check harvest imminence first (overrides age-based phase)
    phase <- "Unknown"
    imminent_prob_val <- NA
    if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
      harvest_row <- which(harvest_imminence_data$field_id == field_id)
      if (length(harvest_row) > 0) {
        imminent_prob_val <- harvest_imminence_data$imminent_prob[harvest_row[1]]
        if (harvest_imminence_data$harvest_imminent[harvest_row[1]]) {
          phase <- "Harvest Imminent"
        }
      }
    }

    # If not harvest imminent, use age-based phase
    if (phase == "Unknown") {
      phase <- get_phase_by_age(age_weeks)
    }

    status_trigger <- get_status_trigger(current_ci_vals, weekly_ci_change, age_weeks)

    # Track phase transitions
    nmr_weeks_in_phase <- 1
    if (!is.null(previous_week_csv)) {
      prev_row <- which(previous_week_csv$Field_id == field_id)
      if (length(prev_row) > 0) {
        prev_phase <- previous_week_csv$`Phase (age based)`[prev_row[1]]
        prev_weeks <- as.numeric(previous_week_csv$nmr_weeks_in_this_phase[prev_row[1]])

        if (!is.na(prev_phase) && prev_phase == phase) {
          nmr_weeks_in_phase <- prev_weeks + if (!is.na(weeks_lookback) && weeks_lookback > 1) weeks_lookback else 1
        } else {
          nmr_weeks_in_phase <- if (!is.na(weeks_lookback) && weeks_lookback > 1) weeks_lookback else 1
        }
      }
    }

    field_count <- field_count + 1

    # Get cloud data for this field if available
    cloud_pct <- NA
    cloud_cat <- NA
    if (!is.null(cloud_data) && nrow(cloud_data) > 0) {
      cloud_row <- which(cloud_data$field_id == field_id)
      if (length(cloud_row) > 0) {
        cloud_pct <- cloud_data$pct_clear[cloud_row[1]]
        cloud_cat <- cloud_data$cloud_category[cloud_row[1]]
      }
    }

    field_analysis[[field_count]] <- data.frame(
      Field_id = field_id,
      Farm_Section = farm_section,
      Field_name = field_name,
      Acreage = round(field_area_acres, 2),
      Mean_CI = round(mean_ci_current, 2),
      Weekly_ci_change = weekly_ci_change_str,
      Germination_progress = germination_progress_str,
      Age_week = age_weeks,
      `Phase (age based)` = phase,
      nmr_weeks_in_this_phase = nmr_weeks_in_phase,
      Imminent_prob = imminent_prob_val,
      Status_trigger = status_trigger,
      CI_range = range_str,
      CV = round(cv_current, 3),
      Cloud_pct_clear = cloud_pct,
      Cloud_category = cloud_cat,
      stringsAsFactors = FALSE,
      check.names = FALSE
    )
  }

  if (length(field_analysis) == 0) {
    message("ERROR: No fields processed!")
    return(data.frame())
  }

  field_df <- do.call(rbind, field_analysis)
  rownames(field_df) <- NULL
  message(paste("Per-field analysis completed for", nrow(field_df), "fields"))
  return(field_df)
}

generate_field_analysis_summary <- function(field_df) {
  message("Generating summary statistics...")

  # Total acreage (FIRST - needed for all percentages)
  total_acreage <- sum(field_df$Acreage, na.rm = TRUE)

  # Phase breakdown
  germination_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Germination"], na.rm = TRUE)
  tillering_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Tillering"], na.rm = TRUE)
  grand_growth_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Grand Growth"], na.rm = TRUE)
  maturation_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Maturation"], na.rm = TRUE)
  unknown_phase_acreage <- sum(field_df$Acreage[field_df$`Phase (age based)` == "Unknown"], na.rm = TRUE)

  # Status trigger breakdown
  harvest_ready_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "harvest_ready"], na.rm = TRUE)
  stress_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "stress_detected_whole_field"], na.rm = TRUE)
  recovery_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "strong_recovery"], na.rm = TRUE)
  growth_on_track_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "growth_on_track"], na.rm = TRUE)
  germination_complete_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_complete"], na.rm = TRUE)
  germination_started_acreage <- sum(field_df$Acreage[field_df$Status_trigger == "germination_started"], na.rm = TRUE)
  no_trigger_acreage <- sum(field_df$Acreage[is.na(field_df$Status_trigger)], na.rm = TRUE)

  # Cloud coverage breakdown
  clear_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Clear view"], na.rm = TRUE)
  partial_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "Partial coverage"], na.rm = TRUE)
  no_image_acreage <- sum(field_df$Acreage[field_df$Cloud_category == "No image available"], na.rm = TRUE)
  monitored_acreage <- clear_acreage + partial_acreage + no_image_acreage  # Total monitored

  # Count fields by cloud category
  clear_fields <- sum(field_df$Cloud_category == "Clear view", na.rm = TRUE)
  partial_fields <- sum(field_df$Cloud_category == "Partial coverage", na.rm = TRUE)
  no_image_fields <- sum(field_df$Cloud_category == "No image available", na.rm = TRUE)
  total_fields <- nrow(field_df)

  # Create summary as a proper table with clear structure
  summary_df <- data.frame(
    Category = c(
      "MONITORED AREA",
      "Total Monitored Acreage",
      "",
      "CLOUD COVERAGE",
      "Clear view (# fields)",
      "Clear view (acres)",
      "Partial coverage (# fields)",
      "Partial coverage (acres)",
      "No image available (# fields)",
      "No image available (acres)",
      "",
      "PHASE DISTRIBUTION",
      "Germination",
      "Tillering",
      "Grand Growth",
      "Maturation",
      "Unknown Phase",
      "",
      "STATUS TRIGGERS",
      "Harvest Ready",
      "Strong Recovery",
      "Growth On Track",
      "Stress Detected",
      "Germination Complete",
      "Germination Started",
      "No Active Trigger",
      "",
      "TOTAL FARM",
      "Total Acreage"
    ),
    Acreage = c(
      NA,
      round(monitored_acreage, 2),
      NA,
      NA,
      paste0(clear_fields, " fields"),
      round(clear_acreage, 2),
      paste0(partial_fields, " fields"),
      round(partial_acreage, 2),
      paste0(no_image_fields, " fields"),
      round(no_image_acreage, 2),
      NA,
      NA,
      round(germination_acreage, 2),
      round(tillering_acreage, 2),
      round(grand_growth_acreage, 2),
      round(maturation_acreage, 2),
      round(unknown_phase_acreage, 2),
      NA,
      NA,
      round(harvest_ready_acreage, 2),
      round(recovery_acreage, 2),
      round(growth_on_track_acreage, 2),
      round(stress_acreage, 2),
      round(germination_complete_acreage, 2),
      round(germination_started_acreage, 2),
      round(no_trigger_acreage, 2),
      NA,
      NA,
      round(total_acreage, 2)
    ),
    stringsAsFactors = FALSE
  )

  # Add metadata as attributes for use in report
  attr(summary_df, "cloud_fields_clear") <- clear_fields
  attr(summary_df, "cloud_fields_partial") <- partial_fields
  attr(summary_df, "cloud_fields_no_image") <- no_image_fields
  attr(summary_df, "cloud_fields_total") <- total_fields

  return(summary_df)
}

export_field_analysis_excel <- function(field_df, summary_df, project_dir, current_week, reports_dir) {
  message("Exporting per-field analysis to Excel and RDS...")

  # Save to kpis/field_analysis subfolder
  output_subdir <- file.path(reports_dir, "kpis", "field_analysis")
  if (!dir.exists(output_subdir)) {
    dir.create(output_subdir, recursive = TRUE)
  }

  # Create Excel with two sheets: Field Data and Summary
  excel_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d", current_week), ".xlsx")
  excel_path <- file.path(output_subdir, excel_filename)

  # Normalize path for Windows
  excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)

  # Prepare sheets as a list
  sheets <- list(
    "Field Data" = field_df,
    "Summary" = summary_df
  )

  # Export to Excel
  write_xlsx(sheets, excel_path)
  message(paste("✓ Field analysis Excel exported to:", excel_path))

  # Also save as RDS for 91_CI_report_with_kpis_Angata.Rmd to consume
  # RDS format: list with field_analysis and field_analysis_summary for compatibility
  kpi_data <- list(
    field_analysis = field_df,
    field_analysis_summary = summary_df,
    metadata = list(
      project = project_dir,
      current_week = current_week,
      export_time = Sys.time()
    )
  )

  rds_filename <- paste0(project_dir, "_kpi_summary_tables_week", sprintf("%02d", current_week), ".rds")
  rds_path <- file.path(reports_dir, "kpis", rds_filename)

  saveRDS(kpi_data, rds_path)
  message(paste("✓ Field analysis RDS exported to:", rds_path))

  return(list(excel = excel_path, rds = rds_path))
}

export_harvest_predictions <- function(harvest_data, project_dir, current_week, reports_dir) {
  message("Exporting weekly harvest predictions...")

  if (is.null(harvest_data) || nrow(harvest_data) == 0) {
    message("No harvest predictions to export")
    return(NULL)
  }

  # Save to kpis/harvest_predictions subfolder
  output_subdir <- file.path(reports_dir, "kpis", "harvest_predictions")
  if (!dir.exists(output_subdir)) {
    dir.create(output_subdir, recursive = TRUE)
  }

  # Create Excel with harvest predictions
  excel_filename <- paste0(project_dir, "_harvest_predictions_week", sprintf("%02d", current_week), ".xlsx")
  excel_path <- file.path(output_subdir, excel_filename)

  # Normalize path for Windows
  excel_path <- normalizePath(excel_path, winslash = "\\", mustWork = FALSE)

  # Prepare data for export
  export_df <- harvest_data %>%
    select(field_id, imminent_prob, harvest_imminent) %>%
    mutate(harvest_imminent = ifelse(harvest_imminent, "Yes", "No"))

  # Export to Excel
  write_xlsx(list("Predictions" = export_df), excel_path)
  message(paste("✓ Weekly harvest predictions exported to:", excel_path))

  return(excel_path)
}

# ============================================================================
# MAIN
# ============================================================================

main <- function() {
  args <- commandArgs(trailingOnly = TRUE)

  end_date <- if (length(args) >= 1 && !is.na(args[1])) {
    as.Date(args[1])
  } else if (exists("end_date_str", envir = .GlobalEnv)) {
    as.Date(get("end_date_str", envir = .GlobalEnv))
  } else {
    Sys.Date()
  }

  offset <- if (length(args) >= 2 && !is.na(args[2])) {
    as.numeric(args[2])
  } else if (exists("offset", envir = .GlobalEnv)) {
    get("offset", envir = .GlobalEnv)
  } else {
    7
  }

  project_dir <- if (length(args) >= 3 && !is.na(args[3])) {
    as.character(args[3])
  } else if (exists("project_dir", envir = .GlobalEnv)) {
    get("project_dir", envir = .GlobalEnv)
  } else {
    "angata"
  }

  assign("project_dir", project_dir, envir = .GlobalEnv)

  # Load utilities and configuration
  source(here("r_app", "crop_messaging_utils.R"))
  source(here("r_app", "parameters_project.R"))

  message("=== FIELD ANALYSIS WEEKLY ===")
  message(paste("Date:", end_date))
  message(paste("Project:", project_dir))

  # Calculate week numbers
  weeks <- list(
    current_week = as.numeric(format(end_date, "%V")),
    previous_week = as.numeric(format(end_date, "%V")) - 1,
    year = as.numeric(format(end_date, "%Y"))
  )
  if (weeks$previous_week < 1) weeks$previous_week <- 52

  message(paste("Week:", weeks$current_week, "/ Year:", weeks$year))

  # Load CI mosaics
  load_weekly_ci <- function(week_num, year, mosaic_dir) {
    week_file <- sprintf("week_%02d_%d.tif", week_num, year)
    week_path <- file.path(mosaic_dir, week_file)
    if (!file.exists(week_path)) {
      message(paste("  Mosaic not found:", week_file))
      return(NULL)
    }
    tryCatch({
      mosaic_raster <- terra::rast(week_path)
      message(paste("    Mosaic bands:", terra::nlyr(mosaic_raster), "layers"))

      # Check for named CI band, otherwise extract band 5 (CI is typically the 5th band)
      if ("CI" %in% names(mosaic_raster)) {
        ci_raster <- mosaic_raster[["CI"]]
        message(paste("    Extracted named CI band"))
      } else if (terra::nlyr(mosaic_raster) >= 5) {
        # Mosaic has 5+ bands: R, G, B, NIR, CI - extract band 5
        ci_raster <- mosaic_raster[[5]]
        message(paste("    Extracted band 5 (CI) from multi-band mosaic"))
      } else {
        # Fallback to band 1 if fewer than 5 bands
        ci_raster <- mosaic_raster[[1]]
        message(paste("    Using band 1 (only", terra::nlyr(mosaic_raster), "bands available)"))
      }
      names(ci_raster) <- "CI"
      message(paste("  ✓ Loaded:", week_file))
      return(ci_raster)
    }, error = function(e) {
      message(paste("  Error loading:", e$message))
      return(NULL)
    })
  }

  message("Loading CI mosaics...")
  current_ci <- load_weekly_ci(weeks$current_week, weeks$year, weekly_CI_mosaic)
  previous_ci <- load_weekly_ci(weeks$previous_week, weeks$year, weekly_CI_mosaic)

  if (is.null(current_ci)) {
    stop("Current week CI mosaic is required but not found")
  }

  # Load historical data
  previous_week_result <- load_previous_week_csv(project_dir, weeks$current_week, reports_dir)
  planting_dates <- extract_planting_dates(harvesting_data)

  # ============================================================================
  # HARVEST IMMINENCE MODEL - DISABLED FOR UNIFORM AGE MODE
  # TO SWITCH BACK: Set SKIP_HARVEST_MODEL <- FALSE below and uncomment loading code
  # ============================================================================
  SKIP_HARVEST_MODEL <- TRUE

  harvest_imminence_data <- NULL
  if (!SKIP_HARVEST_MODEL) {
    message("Loading harvest imminence model...")
    # Try multiple possible locations for the harvest model
    harvest_model_candidates <- c(
      here("python_app", "harvest_detection_experiments", "experiment_framework", "04_production_export"),
      here("04_production_export"),
      here("..", "python_app", "harvest_detection_experiments", "experiment_framework", "04_production_export")
    )

    harvest_model_dir <- NULL
    for (candidate in harvest_model_candidates) {
      if (dir.exists(candidate)) {
        harvest_model_dir <- candidate
        message(paste("Found harvest model at:", harvest_model_dir))
        break
      }
    }

    if (!is.null(harvest_model_dir)) {
      # Load CI data for model (from cumulative RDS)
      ci_rds_path <- file.path(cumulative_CI_vals_dir, "combined_CI_data.rds")
      if (file.exists(ci_rds_path)) {
        ci_data <- readRDS(ci_rds_path)
        harvest_imminence_data <- calculate_harvest_imminence(ci_data, field_boundaries_sf,
                                                               harvest_model_dir, imminent_threshold = 0.5)
      } else {
        message("Note: CI data not found - harvest imminence detection will be skipped")
      }
    } else {
      message("Note: Harvest model directory not found - harvest imminence detection will be skipped")
    }
  } else {
    message("Harvest imminence model skipped (SKIP_HARVEST_MODEL = TRUE)")
  }

  # Calculate cloud coverage from weekly mosaic (more accurate than 8-band data)
  cloud_data <- NULL
  if (!is.null(current_ci)) {
    cloud_data <- calculate_cloud_coverage_from_mosaic(current_ci, field_boundaries_sf)

    # Export cloud coverage data to RDS for use in downstream reporting scripts
    if (!is.null(cloud_data) && nrow(cloud_data) > 0) {
      cloud_data_file <- file.path(reports_dir,
                                   paste0(project_dir, "_cloud_coverage_week", weeks$current_week, ".rds"))
      saveRDS(cloud_data, cloud_data_file)
      message(paste("Cloud coverage data exported to:", cloud_data_file))
    }
  } else {
    message("Note: Current mosaic not available - cloud coverage analysis will be skipped")
  }

  # Calculate analysis
  field_analysis_df <- calculate_per_field_analysis(
    current_ci, previous_ci, field_boundaries_sf,
    previous_week_result, planting_dates, end_date, cloud_data, harvest_imminence_data
  )

  if (nrow(field_analysis_df) == 0) {
    stop("No fields were analyzed successfully")
  }

  summary_statistics_df <- generate_field_analysis_summary(field_analysis_df)

  export_paths <- export_field_analysis_excel(
    field_analysis_df, summary_statistics_df, project_dir,
    weeks$current_week, reports_dir
  )

  # Export weekly harvest predictions if available
  if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
    harvest_export_path <- export_harvest_predictions(harvest_imminence_data, project_dir,
                                                       weeks$current_week, reports_dir)
  }

  # Print summary
  cat("\n=== FIELD ANALYSIS SUMMARY ===\n")
  cat("Fields analyzed:", nrow(field_analysis_df), "\n")
  cat("Excel export:", export_paths$excel, "\n")
  cat("RDS export:", export_paths$rds, "\n")
  if (!is.null(harvest_imminence_data) && nrow(harvest_imminence_data) > 0) {
    cat("Harvest predictions export:", harvest_export_path, "\n")
  }
  cat("\n")

  cat("--- Per-field results (first 10) ---\n")
  print(head(field_analysis_df[, c("Field_id", "Acreage", "Age_week", "Phase (age based)",
                                    "Germination_progress", "Status_trigger", "Weekly_ci_change")], 10))

  cat("\n--- Summary statistics ---\n")
  print(summary_statistics_df)
}

if (sys.nframe() == 0) {
  main()
}