SmartCane/r_app/80_weekly_stats_utils.R

# 80_WEEKLY_STATS_UTILS.R
# ============================================================================
# UTILITY FUNCTIONS FOR WEEKLY STATISTICS CALCULATION
#
# This file contains reusable functions for:
#   - Tile grid management
#   - Tile loading and merging
#   - Field-level statistics calculation from CI rasters
#   - Weekly stats caching (RDS/CSV export/import)
#   - KPI trend calculations
#   - Historical data loading and auto-generation from mosaics
#
# Used by: 80_calculate_kpis.R, run_full_pipeline.R, other reporting scripts
# ============================================================================

# ============================================================================
# WEEK/YEAR CALCULATION HELPERS (Consistent across all scripts)
# ============================================================================

#' Calculate week and year for a given lookback offset
#' This function handles ISO 8601 week numbering with proper year wrapping
#' when crossing year boundaries (e.g., week 01/2026 -> week 52/2025)
#'
#' @param current_week ISO week number (1-53)
#' @param current_year ISO week year (from format(..., "%G"))
#' @param offset_weeks Number of weeks to go back (0 = current week, 1 = previous week, etc.)
#'
#' @return List with: week (ISO week number), year (ISO week year)
#'
#' @details
#' This is the authoritative week/year calculation function.
#' Used by:
#'   - load_historical_field_data() - to find RDS/CSV files for 4-week lookback
#'   - Script 80 main - to calculate previous week with year wrapping
#'   - Any other script needing to walk backwards through weeks
#'
#' Example: Week 01/2026, offset=1 -> returns list(week=52, year=2025)
calculate_target_week_and_year <- function(current_week, current_year, offset_weeks = 0) {
  target_week <- current_week - offset_weeks
  target_year <- current_year

  # Handle wrapping: when going back from week 1, wrap to week 52 of previous year
  while (target_week < 1) {
    target_week <- target_week + 52
    target_year <- target_year - 1
  }

  return(list(week = target_week, year = target_year))
}

# ============================================================================
# TILE-AWARE HELPER FUNCTIONS
# ============================================================================

get_tile_ids_for_field <- function(field_geom, tile_grid, field_id = NULL) {
  if (inherits(field_geom, "sf")) {
    field_bbox <- sf::st_bbox(field_geom)
    field_xmin <- field_bbox["xmin"]
    field_xmax <- field_bbox["xmax"]
    field_ymin <- field_bbox["ymin"]
    field_ymax <- field_bbox["ymax"]
  } else if (inherits(field_geom, "SpatVector")) {
    field_bbox <- terra::ext(field_geom)
    field_xmin <- field_bbox$xmin
    field_xmax <- field_bbox$xmax
    field_ymin <- field_bbox$ymin
    field_ymax <- field_bbox$ymax
  } else {
    stop("field_geom must be sf or terra::vect object")
  }

  intersecting_tiles <- tile_grid$id[
    !(tile_grid$xmax < field_xmin |
      tile_grid$xmin > field_xmax |
      tile_grid$ymax < field_ymin |
      tile_grid$ymin > field_ymax)
  ]

  return(as.numeric(intersecting_tiles))
}

load_tiles_for_field <- function(field_geom, tile_ids, week_num, year, mosaic_dir) {
  if (length(tile_ids) == 0) {
    return(NULL)
  }

  tiles_list <- list()
  for (tile_id in sort(tile_ids)) {
    tile_filename <- sprintf("week_%02d_%d_%02d.tif", week_num, year, tile_id)
    tile_path <- file.path(mosaic_dir, tile_filename)

    if (file.exists(tile_path)) {
      tryCatch({
        tile_rast <- terra::rast(tile_path)
        ci_band <- terra::subset(tile_rast, 5)
        tiles_list[[length(tiles_list) + 1]] <- ci_band
      }, error = function(e) {
        message(paste("  Warning: Could not load tile", tile_id, ":", e$message))
      })
    }
  }

  if (length(tiles_list) == 0) {
    return(NULL)
  }

  if (length(tiles_list) == 1) {
    return(tiles_list[[1]])
  } else {
    tryCatch({
      rsrc <- terra::sprc(tiles_list)
      merged <- terra::mosaic(rsrc, fun = "max")
      return(merged)
    }, error = function(e) {
      message(paste("  Warning: Could not merge tiles:", e$message))
      return(tiles_list[[1]])
    })
  }
}

build_tile_grid <- function(mosaic_dir, week_num, year) {
  # Handle grid-size subdirectories (e.g., weekly_tile_max/5x5/)
  detected_grid_size <- NA
  if (dir.exists(mosaic_dir)) {
    subfolders <- list.dirs(mosaic_dir, full.names = FALSE, recursive = FALSE)
    grid_patterns <- grep("^\\d+x\\d+$", subfolders, value = TRUE)

    if (length(grid_patterns) > 0) {
      detected_grid_size <- grid_patterns[1]
      mosaic_dir <- file.path(mosaic_dir, detected_grid_size)
      message(paste("  Using grid-size subdirectory:", detected_grid_size))
    }
  }

  tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
  tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)

  if (length(tile_files) == 0) {
    stop(paste("No tile files found for week", week_num, year, "in", mosaic_dir))
  }

  tile_grid <- data.frame(
    id = integer(),
    xmin = numeric(),
    xmax = numeric(),
    ymin = numeric(),
    ymax = numeric(),
    stringsAsFactors = FALSE
  )

  for (tile_file in tile_files) {
    tryCatch({
      matches <- regmatches(basename(tile_file), regexpr("_([0-9]{2})\\.tif$", basename(tile_file)))
      if (length(matches) > 0) {
        tile_id <- as.integer(sub("_|\\.tif", "", matches[1]))
        tile_rast <- terra::rast(tile_file)
        tile_ext <- terra::ext(tile_rast)
        tile_grid <- rbind(tile_grid, data.frame(
          id = tile_id,
          xmin = tile_ext$xmin,
          xmax = tile_ext$xmax,
          ymin = tile_ext$ymin,
          ymax = tile_ext$ymax,
          stringsAsFactors = FALSE
        ))
      }
    }, error = function(e) {
      message(paste("  Warning: Could not process tile", basename(tile_file), ":", e$message))
    })
  }

  if (nrow(tile_grid) == 0) {
    stop("Could not extract extents from any tile files")
  }

  return(list(
    tile_grid = tile_grid,
    mosaic_dir = mosaic_dir,
    grid_size = detected_grid_size
  ))
}

# ============================================================================
# STATISTICAL CATEGORIZATION FUNCTIONS
# ============================================================================

categorize_four_week_trend <- function(ci_values_list) {
  if (is.null(ci_values_list) || length(ci_values_list) < 2) {
    return(NA_character_)
  }

  ci_values_list <- ci_values_list[!is.na(ci_values_list)]
  if (length(ci_values_list) < 2) {
    return(NA_character_)
  }

  weekly_changes <- diff(ci_values_list)
  avg_weekly_change <- mean(weekly_changes, na.rm = TRUE)

  if (avg_weekly_change >= FOUR_WEEK_TREND_STRONG_GROWTH_MIN) {
    return("strong growth")
  } else if (avg_weekly_change >= FOUR_WEEK_TREND_GROWTH_MIN &&
             avg_weekly_change < FOUR_WEEK_TREND_GROWTH_MAX) {
    return("growth")
  } else if (abs(avg_weekly_change) <= FOUR_WEEK_TREND_NO_GROWTH_RANGE) {
    return("no growth")
  } else if (avg_weekly_change <= FOUR_WEEK_TREND_DECLINE_MIN &&
             avg_weekly_change > FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
    return("decline")
  } else if (avg_weekly_change < FOUR_WEEK_TREND_STRONG_DECLINE_MAX) {
    return("strong decline")
  } else {
    return("no growth")
  }
}

round_cloud_to_intervals <- function(cloud_pct_clear) {
  if (is.na(cloud_pct_clear)) {
    return(NA_character_)
  }

  if (cloud_pct_clear < 10) return("0-10%")
  if (cloud_pct_clear < 20) return("10-20%")
  if (cloud_pct_clear < 30) return("20-30%")
  if (cloud_pct_clear < 40) return("30-40%")
  if (cloud_pct_clear < 50) return("40-50%")
  if (cloud_pct_clear < 60) return("50-60%")
  if (cloud_pct_clear < 70) return("60-70%")
  if (cloud_pct_clear < 80) return("70-80%")
  if (cloud_pct_clear < 90) return("80-90%")
  if (cloud_pct_clear < 95) return("90-95%")
  return("95-100%")
}

get_ci_percentiles <- function(ci_values) {
  if (is.null(ci_values) || length(ci_values) == 0) {
    return(NA_character_)
  }

  ci_values <- ci_values[!is.na(ci_values)]
  if (length(ci_values) == 0) {
    return(NA_character_)
  }

  p10 <- quantile(ci_values, CI_PERCENTILE_LOW, na.rm = TRUE)
  p90 <- quantile(ci_values, CI_PERCENTILE_HIGH, na.rm = TRUE)

  return(sprintf("%.1f-%.1f", p10, p90))
}

calculate_cv_trend <- function(cv_current, cv_previous) {
  if (is.na(cv_current) || is.na(cv_previous)) {
    return(NA_real_)
  }
  return(round(cv_current - cv_previous, 4))
}

calculate_four_week_trend <- function(mean_ci_values) {
  #' Calculate four-week CI trend from available weeks
  #' Uses whatever weeks are available (1-4 weeks) to estimate trend

  if (is.null(mean_ci_values) || length(mean_ci_values) == 0) {
    return(NA_real_)
  }

  ci_clean <- mean_ci_values[!is.na(mean_ci_values)]

  if (length(ci_clean) < 2) {
    return(NA_real_)
  }

  trend <- ci_clean[length(ci_clean)] - ci_clean[1]
  return(round(trend, 2))
}

categorize_cv_slope <- function(slope) {
  #' Categorize CV slope (8-week regression) into field uniformity interpretation

  if (is.na(slope)) {
    return(NA_character_)
  }

  if (slope <= CV_SLOPE_IMPROVEMENT_MIN) {
    return("Excellent uniformity")
  } else if (slope < CV_SLOPE_HOMOGENOUS_MIN) {
    return("Homogenous growth")
  } else if (slope <= CV_SLOPE_HOMOGENOUS_MAX) {
    return("Homogenous growth")
  } else if (slope <= CV_SLOPE_PATCHINESS_MAX) {
    return("Minor patchiness")
  } else {
    return("Severe fragmentation")
  }
}

calculate_cv_trend_long_term <- function(cv_values) {
  #' Calculate 8-week CV trend via linear regression slope

  if (is.null(cv_values) || length(cv_values) == 0) {
    return(NA_real_)
  }

  cv_clean <- cv_values[!is.na(cv_values)]

  if (length(cv_clean) < 2) {
    return(NA_real_)
  }

  weeks <- seq_along(cv_clean)

  tryCatch({
    lm_fit <- lm(cv_clean ~ weeks)
    slope <- coef(lm_fit)["weeks"]
    return(round(as.numeric(slope), 4))
  }, error = function(e) {
    return(NA_real_)
  })
}

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

get_phase_by_age <- function(age_weeks) {
  if (is.na(age_weeks)) return(NA_character_)
  for (i in seq_len(nrow(PHASE_DEFINITIONS))) {
    if (age_weeks >= PHASE_DEFINITIONS$age_start[i] &&
        age_weeks <= PHASE_DEFINITIONS$age_end[i]) {
      return(PHASE_DEFINITIONS$phase[i])
    }
  }
  return("Unknown")
}

get_status_trigger <- function(ci_values, ci_change, age_weeks) {
  if (is.na(age_weeks) || length(ci_values) == 0) return(NA_character_)

  ci_values <- ci_values[!is.na(ci_values)]
  if (length(ci_values) == 0) return(NA_character_)

  pct_above_2 <- sum(ci_values > 2) / length(ci_values) * 100
  pct_at_or_above_2 <- sum(ci_values >= 2) / length(ci_values) * 100
  ci_cv <- if (mean(ci_values, na.rm = TRUE) > 0) sd(ci_values) / mean(ci_values, na.rm = TRUE) else 0
  mean_ci <- mean(ci_values, na.rm = TRUE)

  if (age_weeks >= 0 && age_weeks <= 6) {
    if (pct_at_or_above_2 >= 70) {
      return("germination_complete")
    } else if (pct_above_2 > 10) {
      return("germination_started")
    }
  }

  if (age_weeks >= 45) {
    return("harvest_ready")
  }

  if (age_weeks > 6 && !is.na(ci_change) && ci_change < -1.5 && ci_cv < 0.25) {
    return("stress_detected_whole_field")
  }

  if (age_weeks > 6 && !is.na(ci_change) && ci_change > 1.5) {
    return("strong_recovery")
  }

  if (age_weeks >= 4 && age_weeks < 39 && !is.na(ci_change) && ci_change > 0.2) {
    return("growth_on_track")
  }

  if (age_weeks >= 39 && age_weeks < 45 && mean_ci > 3.5) {
    return("maturation_progressing")
  }

  return(NA_character_)
}

extract_planting_dates <- function(harvesting_data, field_boundaries_sf = NULL) {
  # Extract planting dates from harvest.xlsx (season_start column)
  # Returns: data.frame with columns (field_id, planting_date)

  if (is.null(harvesting_data) || nrow(harvesting_data) == 0) {
    message("Warning: No harvesting data available - planting dates will be NA.")
    if (!is.null(field_boundaries_sf)) {
      return(data.frame(
        field_id = field_boundaries_sf$field,
        planting_date = rep(as.Date(NA), nrow(field_boundaries_sf)),
        stringsAsFactors = FALSE
      ))
    }
    return(NULL)
  }

  tryCatch({
    planting_dates <- harvesting_data %>%
      arrange(field, desc(season_start)) %>%
      distinct(field, .keep_all = TRUE) %>%
      select(field, season_start) %>%
      rename(field_id = field, planting_date = season_start) %>%
      filter(!is.na(planting_date)) %>%
      as.data.frame()

    message(paste("Extracted planting dates for", nrow(planting_dates), "fields from harvest.xlsx"))
    return(planting_dates)
  }, error = function(e) {
    message(paste("Error extracting planting dates:", e$message))
    return(NULL)
  })
}

# ============================================================================
# MODULAR STATISTICS CALCULATION
# ============================================================================

calculate_field_statistics <- function(field_boundaries_sf, week_num, year,
                                        mosaic_dir, report_date = Sys.Date()) {

  message(paste("Calculating statistics for all fields - Week", week_num, year))

  # Support both tile-based and single-file mosaics
  tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_num, year)
  single_file_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year)

  # Try tile-based first
  tile_files <- list.files(mosaic_dir, pattern = tile_pattern, full.names = TRUE)

  # If no tiles, try single-file
  if (length(tile_files) == 0) {
    single_file <- list.files(mosaic_dir, pattern = single_file_pattern, full.names = TRUE)
    if (length(single_file) > 0) {
      message(paste("  Using single-file mosaic for week", week_num))
      tile_files <- single_file[1]  # Use first match as single "tile"
    } else {
      stop(paste("No mosaic files found for week", week_num, year, "in", mosaic_dir))
    }
  }

  message(paste("  Found", length(tile_files), "mosaic file(s) for week", week_num))

  results_list <- list()
  fields_processed <- 0

  for (tile_idx in seq_along(tile_files)) {
    tile_file <- tile_files[tile_idx]

    tryCatch({
      current_rast <- terra::rast(tile_file)
      ci_band <- current_rast[["CI"]]

      if (is.null(ci_band) || !inherits(ci_band, "SpatRaster")) {
        message(paste("    [SKIP] Tile", basename(tile_file), "- CI band not found"))
        return(NULL)
      }

      extracted <- terra::extract(ci_band, field_boundaries_sf, na.rm = FALSE)
      unique_field_ids <- unique(extracted$ID[!is.na(extracted$ID)])

      for (field_poly_idx in unique_field_ids) {
        field_id <- field_boundaries_sf$field[field_poly_idx]
        ci_vals <- extracted$CI[extracted$ID == field_poly_idx]
        ci_vals <- ci_vals[!is.na(ci_vals)]

        if (length(ci_vals) == 0) {
          next
        }

        mean_ci <- mean(ci_vals, na.rm = TRUE)
        ci_std <- sd(ci_vals, na.rm = TRUE)
        cv <- if (mean_ci > 0) ci_std / mean_ci else NA_real_
        range_min <- min(ci_vals, na.rm = TRUE)
        range_max <- max(ci_vals, na.rm = TRUE)
        range_str <- sprintf("%.1f-%.1f", range_min, range_max)
        ci_percentiles_str <- get_ci_percentiles(ci_vals)

        # Count pixels with CI >= 2 (germination threshold)
        GERMINATION_CI_THRESHOLD <- 2.0
        num_pixels_gte_2 <- sum(ci_vals >= GERMINATION_CI_THRESHOLD, na.rm = TRUE)
        num_pixels_total <- length(ci_vals)
        pct_pixels_gte_2 <- if (num_pixels_total > 0) round((num_pixels_gte_2 / num_pixels_total) * 100, 1) else 0

        field_rows <- extracted[extracted$ID == field_poly_idx, ]
        num_total <- nrow(field_rows)
        num_data <- sum(!is.na(field_rows$CI))
        pct_clear <- if (num_total > 0) round((num_data / num_total) * 100, 1) else 0
        cloud_cat <- if (num_data == 0) "No image available"
                     else if (pct_clear >= 95) "Clear view"
                     else "Partial coverage"

        # Age_week and Phase are now calculated in main script using actual planting dates
        # Germination_progress is calculated in main script after Age_week is known

        existing_idx <- which(sapply(results_list, function(x) x$Field_id) == field_id)

        if (length(existing_idx) > 0) {
          next
        }

        results_list[[length(results_list) + 1]] <- data.frame(
          Field_id = field_id,
          Mean_CI = round(mean_ci, 2),
          CV = round(cv * 100, 2),
          CI_range = range_str,
          CI_Percentiles = ci_percentiles_str,
          Pct_pixels_CI_gte_2 = pct_pixels_gte_2,
          Cloud_pct_clear = pct_clear,
          Cloud_category = cloud_cat,
          stringsAsFactors = FALSE
        )

        fields_processed <- fields_processed + 1
      }

      message(paste("    Tile", tile_idx, "of", length(tile_files), "processed"))

    }, error = function(e) {
      message(paste("    [ERROR] Tile", basename(tile_file), ":", e$message))
    })
  }

  if (length(results_list) == 0) {
    stop(paste("No fields processed successfully for week", week_num))
  }

  stats_df <- dplyr::bind_rows(results_list)
  message(paste("  ✓ Successfully calculated statistics for", nrow(stats_df), "fields"))

  return(stats_df)
}

# ============================================================================
# CALCULATE KPI TRENDS
# ============================================================================

calculate_kpi_trends <- function(current_stats, prev_stats = NULL,
                                  project_dir = NULL, reports_dir = NULL,
                                  current_week = NULL, year = NULL) {

  message("Calculating KPI trends from current and previous week data")

  current_stats$Weekly_ci_change <- NA_real_
  current_stats$CV_Trend_Short_Term <- NA_real_
  current_stats$Four_week_trend <- NA_real_
  current_stats$CV_Trend_Long_Term <- NA_real_
  current_stats$nmr_of_weeks_analysed <- 1L

  if (is.null(prev_stats) || nrow(prev_stats) == 0) {
    message("  No previous week data available - using defaults")
    return(current_stats)
  }

  message(paste("  prev_stats has", nrow(prev_stats), "rows and", ncol(prev_stats), "columns"))

  prev_lookup <- setNames(seq_len(nrow(prev_stats)), prev_stats$Field_id)

  prev_field_analysis <- NULL

  tryCatch({
    analysis_dir <- file.path(reports_dir, "kpis", "field_analysis")
    if (dir.exists(analysis_dir)) {
      analysis_files <- list.files(analysis_dir, pattern = "_field_analysis_week.*\\.csv$", full.names = TRUE)
      if (length(analysis_files) > 0) {
        recent_file <- analysis_files[which.max(file.info(analysis_files)$mtime)]
        prev_field_analysis <- readr::read_csv(recent_file, show_col_types = FALSE,
                                               col_select = c(Field_id, nmr_of_weeks_analysed, Phase))
      }
    }
  }, error = function(e) {
    message(paste("  Note: Could not load previous field_analysis for nmr_weeks tracking:", e$message))
  })

  if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
    message(paste("  Using previous field_analysis to track nmr_of_weeks_analysed"))
  }

  historical_4weeks <- list()
  historical_8weeks <- list()

  if (!is.null(project_dir) && !is.null(reports_dir) && !is.null(current_week)) {
    message("  Loading historical field_stats for 4-week and 8-week trends...")

    for (lookback in 1:4) {
      target_week <- current_week - lookback
      target_year <- year
      if (target_week < 1) {
        target_week <- target_week + 52
        target_year <- target_year - 1
      }

      rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
      rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)

      if (file.exists(rds_path)) {
        tryCatch({
          stats_data <- readRDS(rds_path)
          historical_4weeks[[length(historical_4weeks) + 1]] <- list(
            week = target_week,
            stats = stats_data
          )
        }, error = function(e) {
          message(paste("    Warning: Could not load week", target_week, ":", e$message))
        })
      }
    }

    for (lookback in 1:8) {
      target_week <- current_week - lookback
      target_year <- year
      if (target_week < 1) {
        target_week <- target_week + 52
        target_year <- target_year - 1
      }

      rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, target_week, target_year)
      rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)

      if (file.exists(rds_path)) {
        tryCatch({
          stats_data <- readRDS(rds_path)
          historical_8weeks[[length(historical_8weeks) + 1]] <- list(
            week = target_week,
            stats = stats_data
          )
        }, error = function(e) {
          # Silently skip
        })
      }
    }

    if (length(historical_4weeks) > 0) {
      message(paste("    Loaded", length(historical_4weeks), "weeks for 4-week trend"))
    }
    if (length(historical_8weeks) > 0) {
      message(paste("    Loaded", length(historical_8weeks), "weeks for 8-week CV trend"))
    }
  }

  cv_trends_calculated <- 0
  four_week_trends_calculated <- 0
  cv_long_term_calculated <- 0

  for (i in seq_len(nrow(current_stats))) {
    field_id <- current_stats$Field_id[i]
    prev_idx <- prev_lookup[field_id]

    if (!is.na(prev_idx) && prev_idx > 0 && prev_idx <= nrow(prev_stats)) {
      prev_row <- prev_stats[prev_idx, , drop = FALSE]

      prev_ci <- prev_row$Mean_CI[1]
      if (!is.na(prev_ci) && !is.na(current_stats$Mean_CI[i])) {
        current_stats$Weekly_ci_change[i] <-
          round(current_stats$Mean_CI[i] - prev_ci, 2)
      }

      prev_cv <- prev_row$CV[1]
      if (!is.na(prev_cv) && !is.na(current_stats$CV[i])) {
        current_stats$CV_Trend_Short_Term[i] <-
          calculate_cv_trend(current_stats$CV[i], prev_cv)
        cv_trends_calculated <- cv_trends_calculated + 1
      }

      if (length(historical_4weeks) > 0) {
        ci_values_4week <- numeric()

        for (hist_idx in rev(seq_along(historical_4weeks))) {
          hist_data <- historical_4weeks[[hist_idx]]$stats
          hist_field <- which(hist_data$Field_id == field_id)
          if (length(hist_field) > 0 && !is.na(hist_data$Mean_CI[hist_field[1]])) {
            ci_values_4week <- c(ci_values_4week, hist_data$Mean_CI[hist_field[1]])
          }
        }

        ci_values_4week <- c(ci_values_4week, current_stats$Mean_CI[i])

        if (length(ci_values_4week) >= 2) {
          current_stats$Four_week_trend[i] <- calculate_four_week_trend(ci_values_4week)
          four_week_trends_calculated <- four_week_trends_calculated + 1
        }
      }

      if (length(historical_8weeks) > 0) {
        cv_values_8week <- numeric()

        for (hist_idx in rev(seq_along(historical_8weeks))) {
          hist_data <- historical_8weeks[[hist_idx]]$stats
          hist_field <- which(hist_data$Field_id == field_id)
          if (length(hist_field) > 0 && !is.na(hist_data$CV[hist_field[1]])) {
            cv_values_8week <- c(cv_values_8week, hist_data$CV[hist_field[1]])
          }
        }

        cv_values_8week <- c(cv_values_8week, current_stats$CV[i])

        if (length(cv_values_8week) >= 2) {
          slope <- calculate_cv_trend_long_term(cv_values_8week)
          current_stats$CV_Trend_Long_Term[i] <- slope
          cv_long_term_calculated <- cv_long_term_calculated + 1
        }
      }

      if (!is.null(prev_field_analysis) && nrow(prev_field_analysis) > 0) {
        prev_analysis_row <- prev_field_analysis %>%
          dplyr::filter(Field_id == field_id)

        if (nrow(prev_analysis_row) > 0) {
          prev_nmr_weeks_analysis <- prev_analysis_row$nmr_of_weeks_analysed[1]

          # Only increment nmr_of_weeks_analysed if we have previous data
          if (!is.na(prev_nmr_weeks_analysis)) {
            current_stats$nmr_of_weeks_analysed[i] <- prev_nmr_weeks_analysis + 1L
          } else {
            current_stats$nmr_of_weeks_analysed[i] <- 1L
          }
        }
      }
    }
  }

  message(paste("  ✓ Calculated CV_Trend_Short_Term:", cv_trends_calculated, "fields"))
  message(paste("  ✓ Calculated Four_week_trend:", four_week_trends_calculated, "fields"))
  message(paste("  ✓ Calculated CV_Trend_Long_Term:", cv_long_term_calculated, "fields"))
  return(current_stats)
}

# ============================================================================
# LOAD PER-FIELD DAILY RDS DATA (NEW ARCHITECTURE)
# ============================================================================

#' Load per-field daily CI data from daily_vals/ directory
#'
#' Reads per-field daily RDS files (output from Script 20):
#'   daily_vals/{FIELD}/{YYYY-MM-DD}.rds
#'
#' Filters to dates matching the week specified, and returns combined data for all fields.
#'
#' @param week_num ISO week number (1-53)
#' @param year ISO week year
#' @param daily_vals_dir Directory containing daily_vals/{FIELD}/ structure
#' @param field_boundaries_sf Field boundaries (for validation)
#' @return Data frame with columns: field, sub_field, Date, ci_mean, ci_sd, ... (per-field daily data)
#'
load_per_field_daily_rds <- function(week_num, year, daily_vals_dir, field_boundaries_sf = NULL) {

  if (!dir.exists(daily_vals_dir)) {
    warning(paste("daily_vals directory not found:", daily_vals_dir))
    return(NULL)
  }

  # Calculate week date range
  # Create a date in the specified ISO week
  jan_4 <- as.Date(paste0(year, "-01-04"))
  week_start <- jan_4 - (as.numeric(format(jan_4, "%w")) - 2) * 86400 + (week_num - 1) * 7 * 86400
  week_end <- week_start + 6

  # List all field directories
  field_dirs <- list.dirs(daily_vals_dir, full.names = FALSE, recursive = FALSE)

  if (length(field_dirs) == 0) {
    warning(paste("No field subdirectories found in:", daily_vals_dir))
    return(NULL)
  }

  combined_data <- data.frame()

  # Loop through each field and load matching RDS files for this week
  for (field in field_dirs) {
    field_path <- file.path(daily_vals_dir, field)

    # Find all RDS files in this field directory
    rds_files <- list.files(field_path, pattern = "\\.rds$", full.names = TRUE)

    if (length(rds_files) == 0) {
      next
    }

    # Filter to files within the week date range
    for (rds_file in rds_files) {
      # Extract date from filename: {FIELD}/{YYYY-MM-DD}.rds
      date_str <- tools::file_path_sans_ext(basename(rds_file))
      file_date <- tryCatch(as.Date(date_str), error = function(e) NA)

      if (is.na(file_date) || file_date < week_start || file_date > week_end) {
        next
      }

      # Load RDS file
      tryCatch({
        rds_data <- readRDS(rds_file)
        rds_data$Date <- file_date
        combined_data <- rbind(combined_data, rds_data)
      }, error = function(e) {
        warning(paste("Could not load RDS file:", rds_file, "-", e$message))
      })
    }
  }

  if (nrow(combined_data) == 0) {
    warning(paste("No RDS data found for week", week_num, "in", daily_vals_dir))
    return(NULL)
  }

  return(combined_data)
}

# ============================================================================
# LOAD OR CALCULATE WEEKLY STATISTICS
# ============================================================================

load_or_calculate_weekly_stats <- function(week_num, year, project_dir, field_boundaries_sf,
                                           mosaic_dir, reports_dir, report_date = Sys.Date()) {

  rds_filename <- sprintf("%s_field_stats_week%02d_%d.rds", project_dir, week_num, year)
  rds_path <- file.path(reports_dir, "kpis", "field_stats", rds_filename)

  if (file.exists(rds_path)) {
    message(paste("Loading cached statistics from:", basename(rds_path)))
    return(readRDS(rds_path))
  }

  message(paste("Cached RDS not found, calculating statistics from tiles for week", week_num))
  stats_df <- calculate_field_statistics(field_boundaries_sf, week_num, year,
                                          mosaic_dir, report_date)

  output_dir <- file.path(reports_dir, "kpis", "field_stats")
  if (!dir.exists(output_dir)) {
    dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
  }

  saveRDS(stats_df, rds_path)
  message(paste("Saved weekly statistics RDS:", basename(rds_path)))

  csv_filename <- sprintf("%s_field_stats_week%02d_%d.csv", project_dir, week_num, year)
  csv_path <- file.path(output_dir, csv_filename)
  readr::write_csv(stats_df, csv_path)
  message(paste("Saved weekly statistics CSV:", basename(csv_path)))

  return(stats_df)
}

load_historical_field_data <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL, daily_vals_dir = NULL) {

  # NEW ARCHITECTURE: Try per-field daily RDS first
  # If not available, fall back to consolidated RDS

  # Determine daily_vals_dir if not provided
  if (is.null(daily_vals_dir)) {
    daily_vals_dir <- file.path("laravel_app", "storage", "app", project_dir, "Data", "extracted_ci", "daily_vals")
  }

  message(paste("Loading historical data from:", ifelse(dir.exists(daily_vals_dir), "per-field daily RDS", "consolidated RDS")))

  historical_data <- list()
  loaded_weeks <- c()
  missing_weeks <- c()

  # Try per-field daily RDS first
  use_per_field <- dir.exists(daily_vals_dir)

  if (use_per_field) {
    message(paste("  Attempting to load from per-field RDS in:", daily_vals_dir))

    for (lookback in 0:(num_weeks - 1)) {
      target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback)
      target_week <- target$week
      target_year <- target$year

      # Load from per-field daily RDS
      per_field_data <- load_per_field_daily_rds(target_week, target_year, daily_vals_dir, field_boundaries_sf)

      if (!is.null(per_field_data) && nrow(per_field_data) > 0) {
        # Aggregate to field-week level
        week_stats <- per_field_data %>%
          dplyr::group_by(field) %>%
          dplyr::summarise(
            Field_id = dplyr::first(field),
            Mean_CI = mean(ci_mean, na.rm = TRUE),
            CI_SD = mean(ci_sd, na.rm = TRUE),
            CV = mean(ci_sd / ci_mean, na.rm = TRUE),
            .groups = "drop"
          )

        historical_data[[lookback + 1]] <- list(
          week = target_week,
          year = target_year,
          data = week_stats
        )
        loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
      } else {
        missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
      }
    }
  }

  if (length(historical_data) == 0) {
    message(paste("Error: No historical data found"))
    return(NULL)
  }

  message(paste("✓ Loaded", length(historical_data), "weeks:", paste(loaded_weeks, collapse = ", ")))

  return(historical_data)
}

#' [OLD CONSOLIDATED RDS FALLBACK - KEPT FOR REFERENCE]
#' This function is now replaced by per-field RDS loading above.
#' Keeping it as a comment for potential fallback logic.

load_historical_field_data_consolidated <- function(project_dir, current_week, current_year, reports_dir, num_weeks = 4, auto_generate = TRUE, field_boundaries_sf = NULL) {
  historical_data <- list()
  loaded_weeks <- c()
  missing_weeks <- c()

  for (lookback in 0:(num_weeks - 1)) {
    # Calculate target week and year using authoritative helper (handles year boundaries)
    target <- calculate_target_week_and_year(current_week, current_year, offset_weeks = lookback)
    target_week <- target$week
    target_year <- target$year

    # Construct filename with BOTH week and year (proper ISO format)
    csv_filename <- paste0(project_dir, "_field_analysis_week", sprintf("%02d_%d", target_week, target_year), ".csv")
    csv_path <- file.path(reports_dir, "kpis", "field_analysis", csv_filename)

    if (file.exists(csv_path)) {
      tryCatch({
        data <- read_csv(csv_path, show_col_types = FALSE)
        historical_data[[lookback + 1]] <- list(
          week = target_week,
          year = target_year,
          data = data
        )
        loaded_weeks <- c(loaded_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
      }, error = function(e) {
        message(paste("  Warning: Could not load week", target_week, "/", target_year, ":", e$message))
        missing_weeks <<- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
      })
    } else {
      missing_weeks <- c(missing_weeks, paste0("week", sprintf("%02d_%d", target_week, target_year)))
    }
  }

  if (length(missing_weeks) > 0 && auto_generate) {
    message(paste("⚠ Missing weeks:", paste(missing_weeks, collapse = ", ")))
    message("Scanning for ALL available weekly mosaics and calculating stats...\n")

    if (is.null(field_boundaries_sf)) {
      message("  Error: field_boundaries_sf not provided - cannot auto-generate")
      return(historical_data)
    }

    if (!exists("weekly_tile_max")) {
      message("  ✗ weekly_tile_max path not defined")
      return(historical_data)
    }

    check_paths <- c(file.path(weekly_tile_max, "5x5"), weekly_tile_max)
    mosaic_scan_dir <- NA

    for (check_path in check_paths) {
      if (dir.exists(check_path)) {
        tif_files <- list.files(check_path, pattern = "week_.*\\.tif$", full.names = TRUE)
        if (length(tif_files) > 0) {
          mosaic_scan_dir <- check_path
          break
        }
      }
    }

    if (is.na(mosaic_scan_dir)) {
      message("  ✗ No mosaic files found in weekly_tile_max")
      return(historical_data)
    }

    weeks_to_load <- 8
    today <- Sys.Date()
    target_dates <- today - (0:(weeks_to_load - 1)) * 7

    expected_weeks <- data.frame(
      date = target_dates,
      week = as.numeric(format(target_dates, "%V")),
      year = as.numeric(format(target_dates, "%G")),
      stringsAsFactors = FALSE
    )
    expected_weeks <- unique(expected_weeks)

    message(paste("  Expected weeks (last 8 from", format(today, "%Y-%m-%d"), "):"))
    for (i in seq_len(nrow(expected_weeks))) {
      message(paste("    Week", sprintf("%02d", expected_weeks$week[i]), expected_weeks$year[i]))
    }
    message("")

    tif_files <- list.files(mosaic_scan_dir, pattern = "week_([0-9]{2})_([0-9]{4})_[0-9]{2}\\.tif$",
                           full.names = FALSE)

    available_weeks <- data.frame()
    for (filename in tif_files) {
      matches <- regmatches(filename, gregexpr("week_([0-9]{2})_([0-9]{4})", filename))[[1]]
      if (length(matches) > 0) {
        week_year <- strsplit(matches[1], "_")[[1]]
        if (length(week_year) == 3) {
          week_num <- as.numeric(week_year[2])
          year_num <- as.numeric(week_year[3])

          if (week_num %in% expected_weeks$week && year_num %in% expected_weeks$year) {
            available_weeks <- rbind(available_weeks,
                                    data.frame(week = week_num, year = year_num))
          }
        }
      }
    }

    available_weeks <- unique(available_weeks)
    available_weeks <- merge(available_weeks, expected_weeks[, c("week", "year", "date")], by = c("week", "year"))
    available_weeks <- available_weeks[order(available_weeks$date, decreasing = TRUE), ]

    if (nrow(available_weeks) == 0) {
      message("  ✗ No matching mosaic files found")
      message(paste("  Scanned directory:", mosaic_scan_dir))
      return(historical_data)
    }

    message(paste("  Found", nrow(available_weeks), "week(s) with available mosaics:"))

    for (i in seq_len(nrow(available_weeks))) {
      week_to_calc <- available_weeks$week[i]
      year_to_calc <- available_weeks$year[i]
      date_to_calc <- available_weeks$date[i]

      tile_pattern <- sprintf("week_%02d_%d_([0-9]{2})\\.tif", week_to_calc, year_to_calc)
      tile_files <- list.files(mosaic_scan_dir, pattern = tile_pattern, full.names = TRUE)

      if (length(tile_files) == 0) {
        message(paste("    ✗ Week", sprintf("%02d", week_to_calc), year_to_calc, "- no tiles found"))
        next
      }

      message(paste("    ✓ Week", sprintf("%02d", week_to_calc), year_to_calc, "-", length(tile_files), "mosaics"))

      tryCatch({
        week_stats <- load_or_calculate_weekly_stats(
          week_num = week_to_calc,
          year = year_to_calc,
          project_dir = project_dir,
          field_boundaries_sf = field_boundaries_sf,
          mosaic_dir = mosaic_scan_dir,
          reports_dir = reports_dir,
          report_date = date_to_calc
        )

        if (!is.null(week_stats) && nrow(week_stats) > 0) {
          message(paste("      ✓ Calculated stats for", nrow(week_stats), "fields"))

          historical_data[[length(historical_data) + 1]] <- list(
            week = week_to_calc,
            year = year_to_calc,
            data = week_stats
          )
          loaded_weeks <- c(loaded_weeks, paste0(week_to_calc, "_", year_to_calc))
        }
      }, error = function(e) {
        message(paste("      ✗ Error:", e$message))
      })
    }
  }

  if (length(historical_data) == 0) {
    message(paste("Error: No historical field data found and could not auto-generate weeks"))
    return(NULL)
  }

  message(paste("✓ Loaded", length(historical_data), "weeks of historical data:",
                paste(loaded_weeks, collapse = ", ")))

  return(historical_data)
}

# ============================================================================
# HELPER: Extract field-level statistics from CI raster
# ============================================================================

extract_field_statistics_from_ci <- function(ci_band, field_boundaries_sf) {
  #' Extract CI statistics for all fields from a single CI raster band

  extract_result <- terra::extract(ci_band, field_boundaries_sf)

  stats_list <- list()

  for (field_idx in seq_len(nrow(field_boundaries_sf))) {
    field_pixels <- extract_result[extract_result$ID == field_idx, 2]
    pixels <- as.numeric(field_pixels[!is.na(field_pixels)])

    if (length(pixels) == 0) {
      stats_list[[field_idx]] <- data.frame(
        field_idx = field_idx,
        mean_ci = NA_real_,
        cv = NA_real_,
        p10 = NA_real_,
        p90 = NA_real_,
        min_ci = NA_real_,
        max_ci = NA_real_,
        pixel_count_valid = 0,
        pixel_count_total = 0,
        stringsAsFactors = FALSE
      )
      next
    }

    mean_val <- mean(pixels, na.rm = TRUE)
    cv_val <- if (mean_val > 0) sd(pixels, na.rm = TRUE) / mean_val else NA_real_
    p10_val <- quantile(pixels, probs = CI_PERCENTILE_LOW, na.rm = TRUE)[[1]]
    p90_val <- quantile(pixels, probs = CI_PERCENTILE_HIGH, na.rm = TRUE)[[1]]
    min_val <- min(pixels, na.rm = TRUE)
    max_val <- max(pixels, na.rm = TRUE)

    stats_list[[field_idx]] <- data.frame(
      field_idx = field_idx,
      mean_ci = mean_val,
      cv = cv_val,
      p10 = p10_val,
      p90 = p90_val,
      min_ci = min_val,
      max_ci = max_val,
      pixel_count_valid = length(pixels),
      pixel_count_total = nrow(extract_result[extract_result$ID == field_idx, ]),
      stringsAsFactors = FALSE
    )
  }

  return(dplyr::bind_rows(stats_list))
}

# ============================================================================
# COMMENTED OUT / UNUSED FUNCTIONS (kept for future use)
# ============================================================================

# analyze_single_field <- function(field_idx, field_boundaries_sf, tile_grid, week_num, year,
#                                   mosaic_dir, historical_data = NULL, planting_dates = NULL,
#                                   report_date = Sys.Date(), harvest_imminence_data = NULL,
#                                   harvesting_data = NULL) {
#   # [Function kept as reference for parallel field analysis]
#   # Currently replaced by calculate_field_statistics() for efficiency
# }