SmartCane/r_app/parameters_project.R

# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R
#
# PARAMETERS_PROJECT.R
# ====================
# This script defines project parameters, directory structures, and loads field boundaries.
# It establishes all the necessary paths and creates required directories for the SmartCane project.

# 1. Load required libraries
# -------------------------
suppressPackageStartupMessages({
  library(here)
  library(readxl)
  library(sf)
  library(dplyr)
  library(tidyr)
  library(jsonlite)  # For reading tiling_config.json
})

# 2. Smart detection for tile-based vs single-file mosaic approach
# ----------------------------------------------------------------
detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
  # PRIORITY 1: Check for tiling_config.json metadata file from script 10
  # This is the most reliable source since script 10 explicitly records its decision

  if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
    # Try to find tiling_config.json in any grid-size subfolder
    config_files <- list.files(daily_tiles_split_dir,
                               pattern = "tiling_config\\.json$",
                               recursive = TRUE,
                               full.names = TRUE)

    if (length(config_files) > 0) {
      # Found a config file - use the most recent one
      config_file <- config_files[which.max(file.info(config_files)$mtime)]

      tryCatch({
        config_json <- jsonlite::read_json(config_file)
        return(list(
          has_tiles = config_json$has_tiles %||% TRUE,
          detected_tiles = character(),
          total_files = 0,
          source = "tiling_config.json",
          grid_size = config_json$grid_size %||% "unknown"
        ))
      }, error = function(e) {
        warning("Error reading tiling_config.json: ", e$message)
        # Fall through to file-based detection
      })
    }
  }

  # PRIORITY 2: File-based detection (fallback if metadata not found)
  # Check if merged_final_tif/ contains tile-named files OR grid-size subdirectories

  if (!dir.exists(merged_final_tif_dir)) {
    return(list(
      has_tiles = FALSE,
      detected_tiles = character(),
      total_files = 0,
      source = "directory_not_found"
    ))
  }

  # First check if there are grid-size subdirectories (5x5, 10x10, etc.)
  # This indicates the tiles are organized: merged_final_tif/{grid_size}/{DATE}/{DATE}_XX.tif
  grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE)
  grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)

  if (length(grid_patterns) > 0) {
    # Found grid-size subdirectories - tiles exist!
    grid_size <- grid_patterns[1]
    grid_dir <- file.path(merged_final_tif_dir, grid_size)

    # List sample tile files from the grid directory
    sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3]

    return(list(
      has_tiles = TRUE,
      detected_tiles = sample_tiles,
      total_files = length(sample_tiles),
      source = "grid_subdirectory_detection",
      grid_size = grid_size,
      grid_path = grid_dir
    ))
  }

  # Fall back to checking for tile-named files directly in merged_final_tif
  # List all .tif files in merged_final_tif
  tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)

  if (length(tif_files) == 0) {
    return(list(
      has_tiles = FALSE,
      detected_tiles = character(),
      total_files = 0,
      source = "no_files_found"
    ))
  }

  # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
  # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
  tile_pattern <- "_(\\d{2})\\.tif$"
  tile_files <- tif_files[grepl(tile_pattern, tif_files)]

  has_tiles <- length(tile_files) > 0

  return(list(
    has_tiles = has_tiles,
    detected_tiles = tile_files,
    total_files = length(tif_files),
    source = "file_pattern_detection"
  ))
}

# 2. Define project directory structure
# -----------------------------------
setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") {
  # Base directories
  laravel_storage_dir <- here("laravel_app/storage/app", project_dir)

  # Determine which TIF source folder to use based on data_source parameter
  # Default is merged_tif_8b for newer data with cloud masking (8-band + UDM)
  # Alternative: merged_tif for 4-band legacy data
  merged_tif_folder <- here(laravel_storage_dir, data_source)

  # Detect tile mode based on metadata from script 10 or file patterns
  merged_final_dir <- here(laravel_storage_dir, "merged_final_tif")
  daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")

  tile_detection <- detect_mosaic_mode(
    merged_final_tif_dir = merged_final_dir,
    daily_tiles_split_dir = daily_tiles_split_dir
  )
  use_tile_mosaic <- tile_detection$has_tiles

  # Main subdirectories
  dirs <- list(
    reports = here(laravel_storage_dir, "reports"),
    logs = here(laravel_storage_dir, "logs"),
    data = here(laravel_storage_dir, "Data"),
    tif = list(
      merged = merged_tif_folder,  # Use data_source parameter to select folder
      final = merged_final_dir
    ),
    weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"),
    weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"),
    extracted_ci = list(
      base = here(laravel_storage_dir, "Data/extracted_ci"),
      daily = here(laravel_storage_dir, "Data/extracted_ci/daily_vals"),
      cumulative = here(laravel_storage_dir, "Data/extracted_ci/cumulative_vals")
    ),
    vrt = here(laravel_storage_dir, "Data/vrt"),
    harvest = here(laravel_storage_dir, "Data/HarvestData")
  )

  # Create all directories
  for (dir_path in unlist(dirs)) {
    dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
  }

  # Return directory structure for use in other functions
  return(list(
    laravel_storage_dir = laravel_storage_dir,
    reports_dir = dirs$reports,
    log_dir = dirs$logs,
    data_dir = dirs$data,
    planet_tif_folder = dirs$tif$merged,
    merged_final = dirs$tif$final,
    daily_CI_vals_dir = dirs$extracted_ci$daily,
    cumulative_CI_vals_dir = dirs$extracted_ci$cumulative,
    weekly_CI_mosaic = if (use_tile_mosaic) dirs$weekly_tile_max else dirs$weekly_mosaic,  # SMART: Route based on tile detection
    daily_vrt = dirs$vrt,  # Point to Data/vrt folder where R creates VRT files from CI extraction
    weekly_tile_max = dirs$weekly_tile_max,  # Per-tile weekly MAX mosaics (Script 04 output)
    use_tile_mosaic = use_tile_mosaic,  # Flag indicating if tiles are used for this project
    tile_detection_info = list(
      has_tiles = tile_detection$has_tiles,
      detected_source = tile_detection$source,
      detected_count = tile_detection$total_files,
      grid_size = tile_detection$grid_size %||% "unknown",
      sample_tiles = head(tile_detection$detected_tiles, 3)
    ),
    harvest_dir = dirs$harvest,
    extracted_CI_dir = dirs$extracted_ci$base
  ))
}

#set working dir.
# 3. Load field boundaries
# ----------------------
load_field_boundaries <- function(data_dir) {
  # Choose field boundaries file based on project and script type
  # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
  # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
  use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
                 exists("ci_extraction_script")  # ci_extraction_script flag set by scripts 02-03

  if (use_pivot_2) {
    field_boundaries_path <- here(data_dir, "pivot_2.geojson")
  } else {
    field_boundaries_path <- here(data_dir, "pivot.geojson")
  }

  if (!file.exists(field_boundaries_path)) {
    stop(paste("Field boundaries file not found at path:", field_boundaries_path))
  }

  tryCatch({
    # Read GeoJSON with explicit CRS handling
    field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)

    # Remove OBJECTID column immediately if it exists
    if ("OBJECTID" %in% names(field_boundaries_sf)) {
      field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
    }

    # Validate and fix CRS if needed - DO NOT call is.na on CRS objects as it can cause errors
    # Just ensure CRS is set; terra will handle projection if needed
    tryCatch({
      # Simply assign WGS84 if not already set (safe approach)
      # This avoids any problematic is.na() calls on complex CRS objects
      if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
        st_crs(field_boundaries_sf) <- 4326
        warning("CRS was missing, assigned WGS84 (EPSG:4326)")
      }
    }, error = function(e) {
      # If any CRS operation fails, just try to set it
      tryCatch({
        st_crs(field_boundaries_sf) <<- 4326
      }, error = function(e2) {
        # Silently continue - terra might handle it
        warning(paste("Could not set CRS:", e2$message))
      })
    })

    # Handle column names - accommodate optional sub_area column
    # IMPORTANT: Must preserve geometry column properly when renaming sf object
    if ("sub_area" %in% names(field_boundaries_sf)) {
      # Reorder columns but keep geometry last
      field_boundaries_sf <- field_boundaries_sf %>%
        dplyr::select(field, sub_field, sub_area) %>%
        sf::st_set_geometry("geometry")
    } else {
      # Reorder columns but keep geometry last
      field_boundaries_sf <- field_boundaries_sf %>%
        dplyr::select(field, sub_field) %>%
        sf::st_set_geometry("geometry")
    }

    # Convert to terra vector if possible, otherwise use sf
    # Some GeoJSON files (like aura with complex MultiPolygons) may have GDAL/terra compatibility issues
    field_boundaries <- tryCatch({
      field_boundaries_terra <- terra::vect(field_boundaries_sf)

      # Ensure terra object has valid CRS with safer checks
      crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
      crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""

      if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
        terra::crs(field_boundaries_terra) <- "EPSG:4326"
        warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
      }
      field_boundaries_terra

    }, error = function(e) {
      warning(paste("Terra conversion failed, using sf object instead:", e$message))
      # Return sf object as fallback - functions will handle both types
      field_boundaries_sf
    })

    return(list(
      field_boundaries_sf = field_boundaries_sf,
      field_boundaries = field_boundaries
    ))
  }, error = function(e) {
    cat("[DEBUG] Error in load_field_boundaries:\n")
    cat("  Message:", e$message, "\n")
    cat("  Call:", deparse(e$call), "\n")
    stop(paste("Error loading field boundaries:", e$message))
  })
}

# 4. Load harvesting data
# ---------------------
load_harvesting_data <- function(data_dir) {
  harvest_file <- here(data_dir, "harvest.xlsx")

  if (!file.exists(harvest_file)) {
    warning(paste("Harvest data file not found at path:", harvest_file))
    return(NULL)
  }

  # Helper function to parse dates with multiple format detection
  parse_flexible_date <- function(x) {
    if (is.na(x) || is.null(x)) return(NA_real_)
    if (inherits(x, "Date")) return(x)
    if (inherits(x, "POSIXct")) return(as.Date(x))

    # If it's numeric (Excel date serial), convert directly
    if (is.numeric(x)) {
      return(as.Date(x, origin = "1899-12-30"))
    }

    # Try character conversion with multiple formats
    x_char <- as.character(x)

    # Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS
    formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")

    for (fmt in formats) {
      result <- suppressWarnings(as.Date(x_char, format = fmt))
      if (!is.na(result)) return(result)
    }

    # If all else fails, return NA
    return(NA)
  }

  tryCatch({
    harvesting_data <- read_excel(harvest_file) %>%
      dplyr::select(
        c(
          "field",
          "sub_field",
          "year",
          "season_start",
          "season_end",
          "age",
          "sub_area",
          "tonnage_ha"
        )
      ) %>%
      mutate(
        field = as.character(field),
        sub_field = as.character(sub_field),
        year = as.numeric(year),
        season_start = sapply(season_start, parse_flexible_date),
        season_end = sapply(season_end, parse_flexible_date),
        season_start = as.Date(season_start, origin = "1970-01-01"),
        season_end = as.Date(season_end, origin = "1970-01-01"),
        age = as.numeric(age),
        sub_area = as.character(sub_area),
        tonnage_ha = as.numeric(tonnage_ha)
      ) %>%
      mutate(
        season_end = case_when(
          season_end > Sys.Date() ~ Sys.Date(),
          is.na(season_end) ~ Sys.Date(),
          TRUE ~ season_end
        ),
        age = round(as.numeric(season_end - season_start) / 7, 0)
      )

    return(harvesting_data)
  }, error = function(e) {
    warning(paste("Error loading harvesting data:", e$message))
    return(NULL)
  })
}

# 5. Define logging functions globally first
# ---------------------------------------
# Create a simple default log function in case setup_logging hasn't been called yet
log_message <- function(message, level = "INFO") {
  timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
  formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
  cat(formatted_message, "\n")
}

log_head <- function(list, level = "INFO") {
  log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
}

# 6. Set up full logging system with file output
# -------------------------------------------
setup_logging <- function(log_dir) {
  log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log"))

  # Create enhanced log functions
  log_message <- function(message, level = "INFO") {
    timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
    formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
    cat(formatted_message, "\n", file = log_file, append = TRUE)

    # Also print to console for debugging
    if (level %in% c("ERROR", "WARNING")) {
      cat(formatted_message, "\n")
    }
  }

  log_head <- function(list, level = "INFO") {
    log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
  }

  # Update the global functions with the enhanced versions
  assign("log_message", log_message, envir = .GlobalEnv)
  assign("log_head", log_head, envir = .GlobalEnv)

  return(list(
    log_file = log_file,
    log_message = log_message,
    log_head = log_head
  ))
}

# 7. Initialize the project
# ----------------------
# Export project directories and settings
initialize_project <- function(project_dir, data_source = "merged_tif_8b") {
  # Set up directory structure, passing data_source to select TIF folder
  dirs <- setup_project_directories(project_dir, data_source = data_source)

  # Set up logging
  logging <- setup_logging(dirs$log_dir)

  # Load field boundaries
  boundaries <- load_field_boundaries(dirs$data_dir)

  # Load harvesting data
  harvesting_data <- load_harvesting_data(dirs$data_dir)

  # Return all initialized components
  return(c(
    dirs,
    list(
      logging = logging,
      field_boundaries = boundaries$field_boundaries,
      field_boundaries_sf = boundaries$field_boundaries_sf,
      harvesting_data = harvesting_data
    )
  ))
}

# When script is sourced, initialize with the global project_dir variable if it exists
if (exists("project_dir")) {
  # Now we can safely log before initialization
  log_message(paste("Initializing project with directory:", project_dir))

  # Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default
  data_src <- if (exists("data_source")) data_source else "merged_tif_8b"
  log_message(paste("Using data source directory:", data_src))

  project_config <- initialize_project(project_dir, data_source = data_src)

  # Expose all variables to the global environment
  list2env(project_config, envir = .GlobalEnv)

  # Log project initialization completion with tile mode info
  log_message(paste("Project initialized with directory:", project_dir))
  if (exists("use_tile_mosaic")) {
    mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE"
    log_message(paste("Mosaic mode detected:", mosaic_mode))
    if (exists("tile_detection_info") && !is.null(tile_detection_info)) {
      log_message(paste("  - Detection source:", tile_detection_info$detected_source))
      log_message(paste("  - Grid size:", tile_detection_info$grid_size))
      log_message(paste("  - Detected files in storage:", tile_detection_info$detected_count))
      if (length(tile_detection_info$sample_tiles) > 0) {
        log_message(paste("  - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", ")))
      }
    }
  }
} else {
  warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
}