# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R # # PARAMETERS_PROJECT.R # ==================== # This script defines project parameters, directory structures, and loads field boundaries. # It establishes all the necessary paths and creates required directories for the SmartCane project. # 1. Load required libraries # ------------------------- suppressPackageStartupMessages({ library(here) library(readxl) library(sf) library(dplyr) library(tidyr) library(jsonlite) # For reading tiling_config.json }) # 2. Smart detection for tile-based vs single-file mosaic approach # ---------------------------------------------------------------- detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) { # PRIORITY 1: Check for tiling_config.json metadata file from script 10 # This is the most reliable source since script 10 explicitly records its decision if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) { # Try to find tiling_config.json in any grid-size subfolder config_files <- list.files(daily_tiles_split_dir, pattern = "tiling_config\\.json$", recursive = TRUE, full.names = TRUE) if (length(config_files) > 0) { # Found a config file - use the most recent one config_file <- config_files[which.max(file.info(config_files)$mtime)] tryCatch({ config_json <- jsonlite::read_json(config_file) return(list( has_tiles = config_json$has_tiles %||% TRUE, detected_tiles = character(), total_files = 0, source = "tiling_config.json", grid_size = config_json$grid_size %||% "unknown" )) }, error = function(e) { warning("Error reading tiling_config.json: ", e$message) # Fall through to file-based detection }) } } # PRIORITY 2: File-based detection (fallback if metadata not found) # Check if merged_final_tif/ contains tile-named files if (!dir.exists(merged_final_tif_dir)) { return(list( has_tiles = FALSE, detected_tiles = character(), total_files = 0, source = "directory_not_found" )) } # List all .tif files in merged_final_tif tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE) if (length(tif_files) == 0) { return(list( has_tiles = FALSE, detected_tiles = character(), total_files = 0, source = "no_files_found" )) } # Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits) # Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif tile_pattern <- "_(\\d{2})\\.tif$" tile_files <- tif_files[grepl(tile_pattern, tif_files)] has_tiles <- length(tile_files) > 0 return(list( has_tiles = has_tiles, detected_tiles = tile_files, total_files = length(tif_files), source = "file_pattern_detection" )) } # 2. Define project directory structure # ----------------------------------- setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") { # Base directories laravel_storage_dir <- here("laravel_app/storage/app", project_dir) # Determine which TIF source folder to use based on data_source parameter # Default is merged_tif_8b for newer data with cloud masking (8-band + UDM) # Alternative: merged_tif for 4-band legacy data merged_tif_folder <- here(laravel_storage_dir, data_source) # Detect tile mode based on metadata from script 10 or file patterns merged_final_dir <- here(laravel_storage_dir, "merged_final_tif") daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split") tile_detection <- detect_mosaic_mode( merged_final_tif_dir = merged_final_dir, daily_tiles_split_dir = daily_tiles_split_dir ) use_tile_mosaic <- tile_detection$has_tiles # Main subdirectories dirs <- list( reports = here(laravel_storage_dir, "reports"), logs = here(laravel_storage_dir, "logs"), data = here(laravel_storage_dir, "Data"), tif = list( merged = merged_tif_folder, # Use data_source parameter to select folder final = merged_final_dir ), weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"), weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"), extracted_ci = list( base = here(laravel_storage_dir, "Data/extracted_ci"), daily = here(laravel_storage_dir, "Data/extracted_ci/daily_vals"), cumulative = here(laravel_storage_dir, "Data/extracted_ci/cumulative_vals") ), vrt = here(laravel_storage_dir, "Data/vrt"), harvest = here(laravel_storage_dir, "Data/HarvestData") ) # Create all directories for (dir_path in unlist(dirs)) { dir.create(dir_path, showWarnings = FALSE, recursive = TRUE) } # Return directory structure for use in other functions return(list( laravel_storage_dir = laravel_storage_dir, reports_dir = dirs$reports, log_dir = dirs$logs, data_dir = dirs$data, planet_tif_folder = dirs$tif$merged, merged_final = dirs$tif$final, daily_CI_vals_dir = dirs$extracted_ci$daily, cumulative_CI_vals_dir = dirs$extracted_ci$cumulative, weekly_CI_mosaic = if (use_tile_mosaic) dirs$weekly_tile_max else dirs$weekly_mosaic, # SMART: Route based on tile detection daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output) use_tile_mosaic = use_tile_mosaic, # Flag indicating if tiles are used for this project tile_detection_info = list( has_tiles = tile_detection$has_tiles, detected_source = tile_detection$source, detected_count = tile_detection$total_files, grid_size = tile_detection$grid_size %||% "unknown", sample_tiles = head(tile_detection$detected_tiles, 3) ), harvest_dir = dirs$harvest, extracted_CI_dir = dirs$extracted_ci$base )) } #set working dir. # 3. Load field boundaries # ---------------------- load_field_boundaries <- function(data_dir) { # Choose field boundaries file based on project and script type # ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model) # All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson use_pivot_2 <- exists("project_dir") && project_dir == "esa" && exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03 if (use_pivot_2) { field_boundaries_path <- here(data_dir, "pivot_2.geojson") } else { field_boundaries_path <- here(data_dir, "pivot.geojson") } if (!file.exists(field_boundaries_path)) { stop(paste("Field boundaries file not found at path:", field_boundaries_path)) } tryCatch({ # Read GeoJSON with explicit CRS handling field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE) # Remove OBJECTID column immediately if it exists if ("OBJECTID" %in% names(field_boundaries_sf)) { field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID) } # Validate and fix CRS if needed - DO NOT call is.na on CRS objects as it can cause errors # Just ensure CRS is set; terra will handle projection if needed tryCatch({ # Simply assign WGS84 if not already set (safe approach) # This avoids any problematic is.na() calls on complex CRS objects if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) { st_crs(field_boundaries_sf) <- 4326 warning("CRS was missing, assigned WGS84 (EPSG:4326)") } }, error = function(e) { # If any CRS operation fails, just try to set it tryCatch({ st_crs(field_boundaries_sf) <<- 4326 }, error = function(e2) { # Silently continue - terra might handle it warning(paste("Could not set CRS:", e2$message)) }) }) # Handle column names - accommodate optional sub_area column # IMPORTANT: Must preserve geometry column properly when renaming sf object if ("sub_area" %in% names(field_boundaries_sf)) { # Reorder columns but keep geometry last field_boundaries_sf <- field_boundaries_sf %>% dplyr::select(field, sub_field, sub_area) %>% sf::st_set_geometry("geometry") } else { # Reorder columns but keep geometry last field_boundaries_sf <- field_boundaries_sf %>% dplyr::select(field, sub_field) %>% sf::st_set_geometry("geometry") } # Convert to terra vector if possible, otherwise use sf # Some GeoJSON files (like aura with complex MultiPolygons) may have GDAL/terra compatibility issues field_boundaries <- tryCatch({ field_boundaries_terra <- terra::vect(field_boundaries_sf) # Ensure terra object has valid CRS with safer checks crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL) crs_str <- if (!is.null(crs_value)) as.character(crs_value) else "" if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) { terra::crs(field_boundaries_terra) <- "EPSG:4326" warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)") } field_boundaries_terra }, error = function(e) { warning(paste("Terra conversion failed, using sf object instead:", e$message)) # Return sf object as fallback - functions will handle both types field_boundaries_sf }) return(list( field_boundaries_sf = field_boundaries_sf, field_boundaries = field_boundaries )) }, error = function(e) { cat("[DEBUG] Error in load_field_boundaries:\n") cat(" Message:", e$message, "\n") cat(" Call:", deparse(e$call), "\n") stop(paste("Error loading field boundaries:", e$message)) }) } # 4. Load harvesting data # --------------------- load_harvesting_data <- function(data_dir) { harvest_file <- here(data_dir, "harvest.xlsx") if (!file.exists(harvest_file)) { warning(paste("Harvest data file not found at path:", harvest_file)) return(NULL) } # Helper function to parse dates with multiple format detection parse_flexible_date <- function(x) { if (is.na(x) || is.null(x)) return(NA_real_) if (inherits(x, "Date")) return(x) if (inherits(x, "POSIXct")) return(as.Date(x)) # If it's numeric (Excel date serial), convert directly if (is.numeric(x)) { return(as.Date(x, origin = "1899-12-30")) } # Try character conversion with multiple formats x_char <- as.character(x) # Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S") for (fmt in formats) { result <- suppressWarnings(as.Date(x_char, format = fmt)) if (!is.na(result)) return(result) } # If all else fails, return NA return(NA) } tryCatch({ harvesting_data <- read_excel(harvest_file) %>% dplyr::select( c( "field", "sub_field", "year", "season_start", "season_end", "age", "sub_area", "tonnage_ha" ) ) %>% mutate( field = as.character(field), sub_field = as.character(sub_field), year = as.numeric(year), season_start = sapply(season_start, parse_flexible_date), season_end = sapply(season_end, parse_flexible_date), season_start = as.Date(season_start, origin = "1970-01-01"), season_end = as.Date(season_end, origin = "1970-01-01"), age = as.numeric(age), sub_area = as.character(sub_area), tonnage_ha = as.numeric(tonnage_ha) ) %>% mutate( season_end = case_when( season_end > Sys.Date() ~ Sys.Date(), is.na(season_end) ~ Sys.Date(), TRUE ~ season_end ), age = round(as.numeric(season_end - season_start) / 7, 0) ) return(harvesting_data) }, error = function(e) { warning(paste("Error loading harvesting data:", e$message)) return(NULL) }) } # 5. Define logging functions globally first # --------------------------------------- # Create a simple default log function in case setup_logging hasn't been called yet log_message <- function(message, level = "INFO") { timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) cat(formatted_message, "\n") } log_head <- function(list, level = "INFO") { log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) } # 6. Set up full logging system with file output # ------------------------------------------- setup_logging <- function(log_dir) { log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log")) # Create enhanced log functions log_message <- function(message, level = "INFO") { timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") formatted_message <- paste0("[", level, "] ", timestamp, " - ", message) cat(formatted_message, "\n", file = log_file, append = TRUE) # Also print to console for debugging if (level %in% c("ERROR", "WARNING")) { cat(formatted_message, "\n") } } log_head <- function(list, level = "INFO") { log_message(paste(capture.output(str(head(list))), collapse = "\n"), level) } # Update the global functions with the enhanced versions assign("log_message", log_message, envir = .GlobalEnv) assign("log_head", log_head, envir = .GlobalEnv) return(list( log_file = log_file, log_message = log_message, log_head = log_head )) } # 7. Initialize the project # ---------------------- # Export project directories and settings initialize_project <- function(project_dir, data_source = "merged_tif_8b") { # Set up directory structure, passing data_source to select TIF folder dirs <- setup_project_directories(project_dir, data_source = data_source) # Set up logging logging <- setup_logging(dirs$log_dir) # Load field boundaries boundaries <- load_field_boundaries(dirs$data_dir) # Load harvesting data harvesting_data <- load_harvesting_data(dirs$data_dir) # Return all initialized components return(c( dirs, list( logging = logging, field_boundaries = boundaries$field_boundaries, field_boundaries_sf = boundaries$field_boundaries_sf, harvesting_data = harvesting_data ) )) } # When script is sourced, initialize with the global project_dir variable if it exists if (exists("project_dir")) { # Now we can safely log before initialization log_message(paste("Initializing project with directory:", project_dir)) # Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default data_src <- if (exists("data_source")) data_source else "merged_tif_8b" log_message(paste("Using data source directory:", data_src)) project_config <- initialize_project(project_dir, data_source = data_src) # Expose all variables to the global environment list2env(project_config, envir = .GlobalEnv) # Log project initialization completion with tile mode info log_message(paste("Project initialized with directory:", project_dir)) if (exists("use_tile_mosaic")) { mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE" log_message(paste("Mosaic mode detected:", mosaic_mode)) if (exists("tile_detection_info") && !is.null(tile_detection_info)) { log_message(paste(" - Detection source:", tile_detection_info$detected_source)) log_message(paste(" - Grid size:", tile_detection_info$grid_size)) log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count)) if (length(tile_detection_info$sample_tiles) > 0) { log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", "))) } } } } else { warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R") }