# ==============================================================================
# 10_CREATE_PER_FIELD_TIFFS_UTILS.R
# ==============================================================================
# UTILITY FUNCTIONS FOR SCRIPT 10: Per-Field TIFF Creation
#
# PURPOSE:
#   Extracted helper functions specific to Script 10 (per-field TIFF splitting).
#   These functions are domain-specific (not generic) and handle raster cropping
#   and batch processing of satellite imagery for per-field storage.
#
# FUNCTIONS:
#   1. crop_tiff_to_fields() — Crop a single TIFF to field boundaries
#   2. process_new_merged_tif() — Batch process all new merged TIFFs into per-field structure
#
# DEPENDENCIES:
#   - terra (raster operations)
#   - sf (geometry operations)
#   - 00_common_utils.R (logging via safe_log())
#
# USAGE:
#   source(here::here("r_app", "10_create_per_field_tiffs_utils.R"))
#
# ==============================================================================

library(terra)
library(sf)

# ==============================================================================
#' Crop Single TIFF to Field Boundaries
#'
#' Loads a single satellite TIFF, finds overlapping field boundaries, and writes
#' cropped per-field versions to output directory. Handles CRS reprojection and
#' idempotent file creation (skips existing files).
#'
#' @param tif_path Character. Absolute path to source TIFF file (4-band or 5-band).
#' @param tif_date Character. Date string (YYYY-MM-DD) for naming output files.
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
#' @param output_base_dir Character. Base directory where per-field TIFFs are stored
#'                                   (e.g., "field_tiles" or "field_tiles_CI").
#'
#' @return List with elements:
#'   - created: Integer. Number of field TIFFs created
#'   - skipped: Integer. Number of files that already existed
#'   - errors: Integer. Number of fields that failed to crop
#'
#' @details
#'   **Output Structure:**
#'   Creates files at: {output_base_dir}/{field_name}/{tif_date}.tif
#'
#'   **CRS Handling:**
#'   Automatically reprojects field boundaries to match raster CRS before cropping.
#'
#'   **Idempotency:**
#'   If output file already exists, skips writing (counted as "skipped").
#'   Safe to re-run without duplicate file creation.
#'
#' @examples
#' \dontrun{
#'   fields <- load_field_boundaries("path/to/pivot.geojson")
#'   result <- crop_tiff_to_fields(
#'     tif_path = "merged_tif/2024-01-15.tif",
#'     tif_date = "2024-01-15",
#'     fields = fields,
#'     output_base_dir = "field_tiles"
#'   )
#'   cat(sprintf("Created: %d, Skipped: %d, Errors: %d\n",
#'       result$created, result$skipped, result$errors))
#' }
#'
crop_tiff_to_fields <- function(tif_path, tif_date, fields, output_base_dir) {

  created <- 0
  skipped <- 0
  errors <- 0

  # Load raster
  if (!file.exists(tif_path)) {
    safe_log(paste("ERROR: TIFF not found:", tif_path), "ERROR")
    return(list(created = 0, skipped = 0, errors = 1))
  }

  rast <- tryCatch({
    rast(tif_path)
  }, error = function(e) {
    safe_log(paste("ERROR loading raster:", e$message), "ERROR")
    return(NULL)
  })

  if (is.null(rast)) {
    return(list(created = 0, skipped = 0, errors = 1))
  }

  # Create raster bounding box in raster CRS
  rast_bbox <- st_as_sfc(st_bbox(rast))
  st_crs(rast_bbox) <- st_crs(rast)

  # Reproject fields to match raster CRS
  fields_reprojected <- st_transform(fields, st_crs(rast_bbox))

  # Find which fields intersect this raster (CRITICAL: raster bbox first, then fields)
  overlapping_indices <- st_intersects(rast_bbox, fields_reprojected, sparse = TRUE)
  overlapping_indices <- unique(unlist(overlapping_indices))

  if (length(overlapping_indices) == 0) {
    safe_log(paste("No fields intersect TIFF:", basename(tif_path)), "INFO")
    return(list(created = 0, skipped = 0, errors = 0))
  }

  # Process each overlapping field
  for (field_idx in overlapping_indices) {
    field_name <- fields$field[field_idx]
    field_geom <- fields_reprojected[field_idx, ]

    # Create field directory
    field_dir <- file.path(output_base_dir, field_name)
    if (!dir.exists(field_dir)) {
      dir.create(field_dir, recursive = TRUE, showWarnings = FALSE)
    }

    # Output file path
    output_path <- file.path(field_dir, paste0(tif_date, ".tif"))

    # Check if file already exists (idempotent)
    if (file.exists(output_path)) {
      skipped <- skipped + 1
      next
    }

    # Crop raster to field boundary
    tryCatch({
      field_rast <- crop(rast, field_geom)

      # Skip empty tiles: cloud-masked pixels are stored as 0 in uint16
      # (NaN cannot be represented in that format). A band sum of 0 means
      # no satellite data was captured for this field on this date.
      band_sums <- terra::global(field_rast, fun = "sum", na.rm = TRUE)
      if (sum(band_sums$sum, na.rm = TRUE) == 0) {
        safe_log(paste("SKIP (no data):", field_name, tif_date), "WARNING")
        skipped <- skipped + 1
      } else {
        writeRaster(field_rast, output_path, overwrite = TRUE)
        created <- created + 1
      }
    }, error = function(e) {
      safe_log(paste("ERROR cropping field", field_name, ":", e$message), "ERROR")
      errors <<- errors + 1
    })
  }

  return(list(created = created, skipped = skipped, errors = errors))
}

# ==============================================================================
#' Process New Merged TIFFs into Per-Field Structure
#'
#' Batch processes all new 4-band raw satellite TIFFs from merged_tif/ directory.
#' Crops each TIFF to field boundaries and stores results in per-field subdirectories.
#' Supports migration mode: skips dates already processed and migrated to field_tiles_CI/.
#'
#' @param merged_tif_dir Character. Source directory containing raw merged TIFFs
#'                                   (e.g., "merged_tif/").
#' @param field_tiles_dir Character. Target directory for per-field TIFFs
#'                                   (e.g., "field_tiles/").
#' @param fields sf object. GeoDataFrame of field boundaries with 'field_name' column.
#' @param field_tiles_ci_dir Character. Optional. DEPRECATED - no longer used.
#'                                       Kept for backward compatibility but ignored.
#'                                       Script 10 now only checks its own output
#'                                       directory (field_tiles/).
#' @param end_date Date. Optional. End date for processing window (YYYY-MM-DD).
#'                       Default: NULL (process all available dates).
#' @param offset Integer. Optional. Number of days to look back from end_date.
#'               Only used if end_date is also provided.
#'               Default: NULL (process all available dates).
#'
#' @return List with elements:
#'   - total_created: Integer. Total field TIFFs created across all dates
#'   - total_skipped: Integer. Total files that already existed
#'   - total_errors: Integer. Total cropping errors across all dates
#'
#' @details
#'   **Migration Logic:**
#'   When migration phase is active (field_tiles_CI/ contains CI-calculated TIFFs),
#'   this function detects which dates have been migrated and skips reprocessing them.
#'   This prevents redundant work during the per-field architecture transition.
#'
#'   **Date Detection:**
#'   Finds all files in merged_tif_dir matching pattern: YYYY-MM-DD.tif
#'
#'   **Output Structure:**
#'   Creates per-field subdirectories and stores: field_tiles/{FIELD}/{DATE}.tif
#'
#' @examples
#' \dontrun{
#'   fields <- load_field_boundaries("path/to/pivot.geojson")
#'   result <- process_new_merged_tif(
#'     merged_tif_dir = "merged_tif",
#'     field_tiles_dir = "field_tiles",
#'     fields = fields,
#'     field_tiles_ci_dir = "field_tiles_CI"
#'   )
#'   cat(sprintf("Processing complete: created=%d, skipped=%d, errors=%d\n",
#'       result$total_created, result$total_skipped, result$total_errors))
#' }
#'
process_new_merged_tif <- function(merged_tif_dir, field_tiles_dir, fields, field_tiles_ci_dir = NULL,
                                  end_date = NULL, offset = NULL) {

  safe_log("\n========================================", "INFO")
  safe_log("PHASE 2: PROCESSING NEW DOWNLOADS", "INFO")
  safe_log("========================================", "INFO")

  # Check if download directory exists
  if (!dir.exists(merged_tif_dir)) {
    safe_log("No merged_tif/ directory found - no new data to process", "WARNING")
    return(list(total_created = 0, total_skipped = 0, total_errors = 0))
  }

  # Create output directory
  if (!dir.exists(field_tiles_dir)) {
    dir.create(field_tiles_dir, recursive = TRUE, showWarnings = FALSE)
  }

  # Find all date-pattern TIFFs in root of merged_tif
  tiff_files <- list.files(
    merged_tif_dir,
    pattern = "^[0-9]{4}-[0-9]{2}-[0-9]{2}\\.tif$",
    full.names = TRUE
  )

  # FILTER by date window if end_date and offset provided
  if (!is.null(end_date) && !is.null(offset)) {
    start_date <- end_date - offset
    date_range <- seq(start_date, end_date, by = "day")
    date_range_str <- format(date_range, "%Y-%m-%d")
    
    # Extract dates from filenames and filter
    tiff_dates <- gsub("\\.tif$", "", basename(tiff_files))
    tiff_files <- tiff_files[tiff_dates %in% date_range_str]
    
    safe_log(sprintf("Date window filter applied: %s to %s (%d dates)", start_date, end_date, length(date_range_str)), "INFO")
  }

  safe_log(paste("Found", length(tiff_files), "TIFF(s) to process"), "INFO")

  if (length(tiff_files) == 0) {
    safe_log("No new TIFFs found - nothing to process", "WARNING")
    return(list(total_created = 0, total_skipped = 0, total_errors = 0))
  }

  # Process each new TIFF
  total_created <- 0
  total_skipped <- 0
  total_errors <- 0

  for (tif_path in tiff_files) {
    tif_date <- gsub("\\.tif$", "", basename(tif_path))

    # SKIP CHECK: Only check Script 10's own output directory (field_tiles/)
    # Do NOT check field_tiles_CI/ — that's Script 20's responsibility.
    # Script 10 and Script 20 are independent; Script 10 should not depend on
    # Script 20's success/failure. If field_tiles/ is missing, reprocess it.
    if (dir.exists(field_tiles_dir)) {
      date_exists_in_field_tiles <- FALSE
      
      # Check if ANY field directory has this date
      field_dirs <- list.dirs(field_tiles_dir, full.names = TRUE, recursive = FALSE)
      for (field_dir in field_dirs) {
        potential_file <- file.path(field_dir, paste0(tif_date, ".tif"))
        if (file.exists(potential_file)) {
          date_exists_in_field_tiles <- TRUE
          break
        }
      }
      
      if (date_exists_in_field_tiles) {
        safe_log(paste("Skipping:", tif_date, "(already exists in field_tiles/)"), "INFO")
        total_skipped <- total_skipped + 1
        next
      }
    }

    safe_log(paste("Processing:", tif_date), "INFO")

    result <- crop_tiff_to_fields(tif_path, tif_date, fields, field_tiles_dir)
    total_created <- total_created + result$created
    total_skipped <- total_skipped + result$skipped
    total_errors <- total_errors + result$errors
  }

  safe_log(paste("Processing complete: created =", total_created,
                   ", skipped =", total_skipped, ", errors =", total_errors), "INFO")

  return(list(total_created = total_created, total_skipped = total_skipped,
              total_errors = total_errors))
}