SmartCane/r_app/parameters_project.R

337 lines
12 KiB
R

# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R
#
# PARAMETERS_PROJECT.R
# ====================
# This script defines project parameters, directory structures, and loads field boundaries.
# It establishes all the necessary paths and creates required directories for the SmartCane project.
# 1. Load required libraries
# -------------------------
suppressPackageStartupMessages({
library(here)
library(readxl)
library(sf)
library(dplyr)
library(tidyr)
})
# 2. Define project directory structure
# -----------------------------------
setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") {
# Base directories
laravel_storage_dir <- here("laravel_app/storage/app", project_dir)
# Determine which TIF source folder to use based on data_source parameter
# Default is merged_tif_8b for newer data with cloud masking (8-band + UDM)
# Alternative: merged_tif for 4-band legacy data
merged_tif_folder <- here(laravel_storage_dir, data_source)
# Main subdirectories
dirs <- list(
reports = here(laravel_storage_dir, "reports"),
logs = here(laravel_storage_dir, "logs"),
data = here(laravel_storage_dir, "Data"),
tif = list(
merged = merged_tif_folder, # Use data_source parameter to select folder
final = here(laravel_storage_dir, "merged_final_tif")
),
weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"),
weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"),
extracted_ci = list(
base = here(laravel_storage_dir, "Data/extracted_ci"),
daily = here(laravel_storage_dir, "Data/extracted_ci/daily_vals"),
cumulative = here(laravel_storage_dir, "Data/extracted_ci/cumulative_vals")
),
vrt = here(laravel_storage_dir, "Data/vrt"),
harvest = here(laravel_storage_dir, "Data/HarvestData")
)
# Create all directories
for (dir_path in unlist(dirs)) {
dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
}
# Return directory structure for use in other functions
return(list(
laravel_storage_dir = laravel_storage_dir,
reports_dir = dirs$reports,
log_dir = dirs$logs,
data_dir = dirs$data,
planet_tif_folder = dirs$tif$merged,
merged_final = dirs$tif$final,
daily_CI_vals_dir = dirs$extracted_ci$daily,
cumulative_CI_vals_dir = dirs$extracted_ci$cumulative,
weekly_CI_mosaic = dirs$weekly_mosaic,
daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction
weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output)
harvest_dir = dirs$harvest,
extracted_CI_dir = dirs$extracted_ci$base
))
}
#set working dir.
# 3. Load field boundaries
# ----------------------
load_field_boundaries <- function(data_dir) {
# Choose field boundaries file based on project and script type
# ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
# All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
if (use_pivot_2) {
field_boundaries_path <- here(data_dir, "pivot_2.geojson")
} else {
field_boundaries_path <- here(data_dir, "pivot.geojson")
}
if (!file.exists(field_boundaries_path)) {
stop(paste("Field boundaries file not found at path:", field_boundaries_path))
}
tryCatch({
# Read GeoJSON with explicit CRS handling
field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
# Remove OBJECTID column immediately if it exists
if ("OBJECTID" %in% names(field_boundaries_sf)) {
field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
}
# Validate and fix CRS if needed - DO NOT call is.na on CRS objects as it can cause errors
# Just ensure CRS is set; terra will handle projection if needed
tryCatch({
# Simply assign WGS84 if not already set (safe approach)
# This avoids any problematic is.na() calls on complex CRS objects
if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
st_crs(field_boundaries_sf) <- 4326
warning("CRS was missing, assigned WGS84 (EPSG:4326)")
}
}, error = function(e) {
# If any CRS operation fails, just try to set it
tryCatch({
st_crs(field_boundaries_sf) <<- 4326
}, error = function(e2) {
# Silently continue - terra might handle it
warning(paste("Could not set CRS:", e2$message))
})
})
# Handle column names - accommodate optional sub_area column
# IMPORTANT: Must preserve geometry column properly when renaming sf object
if ("sub_area" %in% names(field_boundaries_sf)) {
# Reorder columns but keep geometry last
field_boundaries_sf <- field_boundaries_sf %>%
dplyr::select(field, sub_field, sub_area) %>%
sf::st_set_geometry("geometry")
} else {
# Reorder columns but keep geometry last
field_boundaries_sf <- field_boundaries_sf %>%
dplyr::select(field, sub_field) %>%
sf::st_set_geometry("geometry")
}
# Convert to terra vector if possible, otherwise use sf
# Some GeoJSON files (like aura with complex MultiPolygons) may have GDAL/terra compatibility issues
field_boundaries <- tryCatch({
field_boundaries_terra <- terra::vect(field_boundaries_sf)
# Ensure terra object has valid CRS with safer checks
crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
terra::crs(field_boundaries_terra) <- "EPSG:4326"
warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
}
field_boundaries_terra
}, error = function(e) {
warning(paste("Terra conversion failed, using sf object instead:", e$message))
# Return sf object as fallback - functions will handle both types
field_boundaries_sf
})
return(list(
field_boundaries_sf = field_boundaries_sf,
field_boundaries = field_boundaries
))
}, error = function(e) {
cat("[DEBUG] Error in load_field_boundaries:\n")
cat(" Message:", e$message, "\n")
cat(" Call:", deparse(e$call), "\n")
stop(paste("Error loading field boundaries:", e$message))
})
}
# 4. Load harvesting data
# ---------------------
load_harvesting_data <- function(data_dir) {
harvest_file <- here(data_dir, "harvest.xlsx")
if (!file.exists(harvest_file)) {
warning(paste("Harvest data file not found at path:", harvest_file))
return(NULL)
}
# Helper function to parse dates with multiple format detection
parse_flexible_date <- function(x) {
if (is.na(x) || is.null(x)) return(NA_real_)
if (inherits(x, "Date")) return(x)
if (inherits(x, "POSIXct")) return(as.Date(x))
# If it's numeric (Excel date serial), convert directly
if (is.numeric(x)) {
return(as.Date(x, origin = "1899-12-30"))
}
# Try character conversion with multiple formats
x_char <- as.character(x)
# Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS
formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
for (fmt in formats) {
result <- suppressWarnings(as.Date(x_char, format = fmt))
if (!is.na(result)) return(result)
}
# If all else fails, return NA
return(NA)
}
tryCatch({
harvesting_data <- read_excel(harvest_file) %>%
dplyr::select(
c(
"field",
"sub_field",
"year",
"season_start",
"season_end",
"age",
"sub_area",
"tonnage_ha"
)
) %>%
mutate(
field = as.character(field),
sub_field = as.character(sub_field),
year = as.numeric(year),
season_start = sapply(season_start, parse_flexible_date),
season_end = sapply(season_end, parse_flexible_date),
season_start = as.Date(season_start, origin = "1970-01-01"),
season_end = as.Date(season_end, origin = "1970-01-01"),
age = as.numeric(age),
sub_area = as.character(sub_area),
tonnage_ha = as.numeric(tonnage_ha)
) %>%
mutate(
season_end = case_when(
season_end > Sys.Date() ~ Sys.Date(),
is.na(season_end) ~ Sys.Date(),
TRUE ~ season_end
),
age = round(as.numeric(season_end - season_start) / 7, 0)
)
return(harvesting_data)
}, error = function(e) {
warning(paste("Error loading harvesting data:", e$message))
return(NULL)
})
}
# 5. Define logging functions globally first
# ---------------------------------------
# Create a simple default log function in case setup_logging hasn't been called yet
log_message <- function(message, level = "INFO") {
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
cat(formatted_message, "\n")
}
log_head <- function(list, level = "INFO") {
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
}
# 6. Set up full logging system with file output
# -------------------------------------------
setup_logging <- function(log_dir) {
log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log"))
# Create enhanced log functions
log_message <- function(message, level = "INFO") {
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
cat(formatted_message, "\n", file = log_file, append = TRUE)
# Also print to console for debugging
if (level %in% c("ERROR", "WARNING")) {
cat(formatted_message, "\n")
}
}
log_head <- function(list, level = "INFO") {
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
}
# Update the global functions with the enhanced versions
assign("log_message", log_message, envir = .GlobalEnv)
assign("log_head", log_head, envir = .GlobalEnv)
return(list(
log_file = log_file,
log_message = log_message,
log_head = log_head
))
}
# 7. Initialize the project
# ----------------------
# Export project directories and settings
initialize_project <- function(project_dir, data_source = "merged_tif_8b") {
# Set up directory structure, passing data_source to select TIF folder
dirs <- setup_project_directories(project_dir, data_source = data_source)
# Set up logging
logging <- setup_logging(dirs$log_dir)
# Load field boundaries
boundaries <- load_field_boundaries(dirs$data_dir)
# Load harvesting data
harvesting_data <- load_harvesting_data(dirs$data_dir)
# Return all initialized components
return(c(
dirs,
list(
logging = logging,
field_boundaries = boundaries$field_boundaries,
field_boundaries_sf = boundaries$field_boundaries_sf,
harvesting_data = harvesting_data
)
))
}
# When script is sourced, initialize with the global project_dir variable if it exists
if (exists("project_dir")) {
# Now we can safely log before initialization
log_message(paste("Initializing project with directory:", project_dir))
# Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default
data_src <- if (exists("data_source")) data_source else "merged_tif_8b"
log_message(paste("Using data source directory:", data_src))
project_config <- initialize_project(project_dir, data_source = data_src)
# Expose all variables to the global environment
list2env(project_config, envir = .GlobalEnv)
# Log project initialization completion
log_message(paste("Project initialized with directory:", project_dir))
} else {
warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
}