- Fixed CI calculation: changed from NDVI (NIR-Red)/(NIR+Red) to correct NIR/Green-1 formula in: * process_single_tile() function * create_ci_band() utility function * Updated create_mask_and_crop() documentation - Renamed numbered shell scripts for clarity (matching R script numbering): * 01_run_planet_download -> 10_planet_download.sh * 02_run_ci_extraction -> 20_ci_extraction.sh * 03_run_growth_model -> 30_growth_model.sh * 04_run_mosaic_creation -> 40_mosaic_creation.sh * 09_run_calculate_kpis -> 80_calculate_kpis.sh * 10_run_kpi_report -> 90_kpi_report.sh - Archived obsolete shell scripts to old_sh/: * build_mosaic.sh, build_report.sh, interpolate_growth_model.sh * 05_run_dashboard_report.sh, 06_run_crop_messaging.sh * 11_run_yield_prediction.sh/ps1 * runcane.sh, runpython.sh, smartcane.sh, update_RDS.sh - Deleted test/debug files and temporary outputs: * analyze_*.R, benchmark_gpu_vs_cpu.py, convert_angata_harvest.py * debug_mosaic.R, examine_kpi_results.R, generate_sar_report.R * inspect_8band_structure.R, inspect_tif_bands.R * old_working_utils.R, predict_harvest_operational.R * run_kpi_calculation.R, run_report.R, simple_sar_test.R * data_validation_tool/, harvest_ci_pattern_analysis.png, kpi_debug.out - Enhanced harvest prediction: Added threshold tuning (0.40-0.45) and field type handling - Enhanced mosaic creation: Improved tile detection and routing logic
440 lines
16 KiB
R
440 lines
16 KiB
R
# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\parameters_project.R
|
|
#
|
|
# PARAMETERS_PROJECT.R
|
|
# ====================
|
|
# This script defines project parameters, directory structures, and loads field boundaries.
|
|
# It establishes all the necessary paths and creates required directories for the SmartCane project.
|
|
|
|
# 1. Load required libraries
|
|
# -------------------------
|
|
suppressPackageStartupMessages({
|
|
library(here)
|
|
library(readxl)
|
|
library(sf)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(jsonlite) # For reading tiling_config.json
|
|
})
|
|
|
|
# 2. Smart detection for tile-based vs single-file mosaic approach
|
|
# ----------------------------------------------------------------
|
|
detect_mosaic_mode <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
|
|
# PRIORITY 1: Check for tiling_config.json metadata file from script 10
|
|
# This is the most reliable source since script 10 explicitly records its decision
|
|
|
|
if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
|
|
# Try to find tiling_config.json in any grid-size subfolder
|
|
config_files <- list.files(daily_tiles_split_dir,
|
|
pattern = "tiling_config\\.json$",
|
|
recursive = TRUE,
|
|
full.names = TRUE)
|
|
|
|
if (length(config_files) > 0) {
|
|
# Found a config file - use the most recent one
|
|
config_file <- config_files[which.max(file.info(config_files)$mtime)]
|
|
|
|
tryCatch({
|
|
config_json <- jsonlite::read_json(config_file)
|
|
return(list(
|
|
has_tiles = config_json$has_tiles %||% TRUE,
|
|
detected_tiles = character(),
|
|
total_files = 0,
|
|
source = "tiling_config.json",
|
|
grid_size = config_json$grid_size %||% "unknown"
|
|
))
|
|
}, error = function(e) {
|
|
warning("Error reading tiling_config.json: ", e$message)
|
|
# Fall through to file-based detection
|
|
})
|
|
}
|
|
}
|
|
|
|
# PRIORITY 2: File-based detection (fallback if metadata not found)
|
|
# Check if merged_final_tif/ contains tile-named files
|
|
|
|
if (!dir.exists(merged_final_tif_dir)) {
|
|
return(list(
|
|
has_tiles = FALSE,
|
|
detected_tiles = character(),
|
|
total_files = 0,
|
|
source = "directory_not_found"
|
|
))
|
|
}
|
|
|
|
# List all .tif files in merged_final_tif
|
|
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
|
|
|
|
if (length(tif_files) == 0) {
|
|
return(list(
|
|
has_tiles = FALSE,
|
|
detected_tiles = character(),
|
|
total_files = 0,
|
|
source = "no_files_found"
|
|
))
|
|
}
|
|
|
|
# Check if ANY file matches tile naming pattern: *_XX.tif (where XX is 2 digits)
|
|
# Tile pattern examples: 2025-11-27_00.tif, 2025-11-27_01.tif, week_50_2024_00.tif
|
|
tile_pattern <- "_(\\d{2})\\.tif$"
|
|
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
|
|
|
|
has_tiles <- length(tile_files) > 0
|
|
|
|
return(list(
|
|
has_tiles = has_tiles,
|
|
detected_tiles = tile_files,
|
|
total_files = length(tif_files),
|
|
source = "file_pattern_detection"
|
|
))
|
|
}
|
|
|
|
# 2. Define project directory structure
|
|
# -----------------------------------
|
|
setup_project_directories <- function(project_dir, data_source = "merged_tif_8b") {
|
|
# Base directories
|
|
laravel_storage_dir <- here("laravel_app/storage/app", project_dir)
|
|
|
|
# Determine which TIF source folder to use based on data_source parameter
|
|
# Default is merged_tif_8b for newer data with cloud masking (8-band + UDM)
|
|
# Alternative: merged_tif for 4-band legacy data
|
|
merged_tif_folder <- here(laravel_storage_dir, data_source)
|
|
|
|
# Detect tile mode based on metadata from script 10 or file patterns
|
|
merged_final_dir <- here(laravel_storage_dir, "merged_final_tif")
|
|
daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")
|
|
|
|
tile_detection <- detect_mosaic_mode(
|
|
merged_final_tif_dir = merged_final_dir,
|
|
daily_tiles_split_dir = daily_tiles_split_dir
|
|
)
|
|
use_tile_mosaic <- tile_detection$has_tiles
|
|
|
|
# Main subdirectories
|
|
dirs <- list(
|
|
reports = here(laravel_storage_dir, "reports"),
|
|
logs = here(laravel_storage_dir, "logs"),
|
|
data = here(laravel_storage_dir, "Data"),
|
|
tif = list(
|
|
merged = merged_tif_folder, # Use data_source parameter to select folder
|
|
final = merged_final_dir
|
|
),
|
|
weekly_mosaic = here(laravel_storage_dir, "weekly_mosaic"),
|
|
weekly_tile_max = here(laravel_storage_dir, "weekly_tile_max"),
|
|
extracted_ci = list(
|
|
base = here(laravel_storage_dir, "Data/extracted_ci"),
|
|
daily = here(laravel_storage_dir, "Data/extracted_ci/daily_vals"),
|
|
cumulative = here(laravel_storage_dir, "Data/extracted_ci/cumulative_vals")
|
|
),
|
|
vrt = here(laravel_storage_dir, "Data/vrt"),
|
|
harvest = here(laravel_storage_dir, "Data/HarvestData")
|
|
)
|
|
|
|
# Create all directories
|
|
for (dir_path in unlist(dirs)) {
|
|
dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
|
|
}
|
|
|
|
# Return directory structure for use in other functions
|
|
return(list(
|
|
laravel_storage_dir = laravel_storage_dir,
|
|
reports_dir = dirs$reports,
|
|
log_dir = dirs$logs,
|
|
data_dir = dirs$data,
|
|
planet_tif_folder = dirs$tif$merged,
|
|
merged_final = dirs$tif$final,
|
|
daily_CI_vals_dir = dirs$extracted_ci$daily,
|
|
cumulative_CI_vals_dir = dirs$extracted_ci$cumulative,
|
|
weekly_CI_mosaic = if (use_tile_mosaic) dirs$weekly_tile_max else dirs$weekly_mosaic, # SMART: Route based on tile detection
|
|
daily_vrt = dirs$vrt, # Point to Data/vrt folder where R creates VRT files from CI extraction
|
|
weekly_tile_max = dirs$weekly_tile_max, # Per-tile weekly MAX mosaics (Script 04 output)
|
|
use_tile_mosaic = use_tile_mosaic, # Flag indicating if tiles are used for this project
|
|
tile_detection_info = list(
|
|
has_tiles = tile_detection$has_tiles,
|
|
detected_source = tile_detection$source,
|
|
detected_count = tile_detection$total_files,
|
|
grid_size = tile_detection$grid_size %||% "unknown",
|
|
sample_tiles = head(tile_detection$detected_tiles, 3)
|
|
),
|
|
harvest_dir = dirs$harvest,
|
|
extracted_CI_dir = dirs$extracted_ci$base
|
|
))
|
|
}
|
|
|
|
#set working dir.
|
|
# 3. Load field boundaries
|
|
# ----------------------
|
|
load_field_boundaries <- function(data_dir) {
|
|
# Choose field boundaries file based on project and script type
|
|
# ESA project uses pivot_2.geojson ONLY for scripts 02-03 (CI extraction & growth model)
|
|
# All other scripts (including 04-mosaic, 09-KPIs, 10-reports) use pivot.geojson
|
|
use_pivot_2 <- exists("project_dir") && project_dir == "esa" &&
|
|
exists("ci_extraction_script") # ci_extraction_script flag set by scripts 02-03
|
|
|
|
if (use_pivot_2) {
|
|
field_boundaries_path <- here(data_dir, "pivot_2.geojson")
|
|
} else {
|
|
field_boundaries_path <- here(data_dir, "pivot.geojson")
|
|
}
|
|
|
|
if (!file.exists(field_boundaries_path)) {
|
|
stop(paste("Field boundaries file not found at path:", field_boundaries_path))
|
|
}
|
|
|
|
tryCatch({
|
|
# Read GeoJSON with explicit CRS handling
|
|
field_boundaries_sf <- st_read(field_boundaries_path, quiet = TRUE)
|
|
|
|
# Remove OBJECTID column immediately if it exists
|
|
if ("OBJECTID" %in% names(field_boundaries_sf)) {
|
|
field_boundaries_sf <- field_boundaries_sf %>% select(-OBJECTID)
|
|
}
|
|
|
|
# Validate and fix CRS if needed - DO NOT call is.na on CRS objects as it can cause errors
|
|
# Just ensure CRS is set; terra will handle projection if needed
|
|
tryCatch({
|
|
# Simply assign WGS84 if not already set (safe approach)
|
|
# This avoids any problematic is.na() calls on complex CRS objects
|
|
if (is.na(sf::st_crs(field_boundaries_sf)$epsg)) {
|
|
st_crs(field_boundaries_sf) <- 4326
|
|
warning("CRS was missing, assigned WGS84 (EPSG:4326)")
|
|
}
|
|
}, error = function(e) {
|
|
# If any CRS operation fails, just try to set it
|
|
tryCatch({
|
|
st_crs(field_boundaries_sf) <<- 4326
|
|
}, error = function(e2) {
|
|
# Silently continue - terra might handle it
|
|
warning(paste("Could not set CRS:", e2$message))
|
|
})
|
|
})
|
|
|
|
# Handle column names - accommodate optional sub_area column
|
|
# IMPORTANT: Must preserve geometry column properly when renaming sf object
|
|
if ("sub_area" %in% names(field_boundaries_sf)) {
|
|
# Reorder columns but keep geometry last
|
|
field_boundaries_sf <- field_boundaries_sf %>%
|
|
dplyr::select(field, sub_field, sub_area) %>%
|
|
sf::st_set_geometry("geometry")
|
|
} else {
|
|
# Reorder columns but keep geometry last
|
|
field_boundaries_sf <- field_boundaries_sf %>%
|
|
dplyr::select(field, sub_field) %>%
|
|
sf::st_set_geometry("geometry")
|
|
}
|
|
|
|
# Convert to terra vector if possible, otherwise use sf
|
|
# Some GeoJSON files (like aura with complex MultiPolygons) may have GDAL/terra compatibility issues
|
|
field_boundaries <- tryCatch({
|
|
field_boundaries_terra <- terra::vect(field_boundaries_sf)
|
|
|
|
# Ensure terra object has valid CRS with safer checks
|
|
crs_value <- tryCatch(terra::crs(field_boundaries_terra), error = function(e) NULL)
|
|
crs_str <- if (!is.null(crs_value)) as.character(crs_value) else ""
|
|
|
|
if (is.null(crs_value) || length(crs_value) == 0 || nchar(crs_str) == 0) {
|
|
terra::crs(field_boundaries_terra) <- "EPSG:4326"
|
|
warning("Terra object CRS was empty, assigned WGS84 (EPSG:4326)")
|
|
}
|
|
field_boundaries_terra
|
|
|
|
}, error = function(e) {
|
|
warning(paste("Terra conversion failed, using sf object instead:", e$message))
|
|
# Return sf object as fallback - functions will handle both types
|
|
field_boundaries_sf
|
|
})
|
|
|
|
return(list(
|
|
field_boundaries_sf = field_boundaries_sf,
|
|
field_boundaries = field_boundaries
|
|
))
|
|
}, error = function(e) {
|
|
cat("[DEBUG] Error in load_field_boundaries:\n")
|
|
cat(" Message:", e$message, "\n")
|
|
cat(" Call:", deparse(e$call), "\n")
|
|
stop(paste("Error loading field boundaries:", e$message))
|
|
})
|
|
}
|
|
|
|
# 4. Load harvesting data
|
|
# ---------------------
|
|
load_harvesting_data <- function(data_dir) {
|
|
harvest_file <- here(data_dir, "harvest.xlsx")
|
|
|
|
if (!file.exists(harvest_file)) {
|
|
warning(paste("Harvest data file not found at path:", harvest_file))
|
|
return(NULL)
|
|
}
|
|
|
|
# Helper function to parse dates with multiple format detection
|
|
parse_flexible_date <- function(x) {
|
|
if (is.na(x) || is.null(x)) return(NA_real_)
|
|
if (inherits(x, "Date")) return(x)
|
|
if (inherits(x, "POSIXct")) return(as.Date(x))
|
|
|
|
# If it's numeric (Excel date serial), convert directly
|
|
if (is.numeric(x)) {
|
|
return(as.Date(x, origin = "1899-12-30"))
|
|
}
|
|
|
|
# Try character conversion with multiple formats
|
|
x_char <- as.character(x)
|
|
|
|
# Try common formats: YYYY-MM-DD, DD/MM/YYYY, MM/DD/YYYY, YYYY-MM-DD HH:MM:SS
|
|
formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
|
|
|
|
for (fmt in formats) {
|
|
result <- suppressWarnings(as.Date(x_char, format = fmt))
|
|
if (!is.na(result)) return(result)
|
|
}
|
|
|
|
# If all else fails, return NA
|
|
return(NA)
|
|
}
|
|
|
|
tryCatch({
|
|
harvesting_data <- read_excel(harvest_file) %>%
|
|
dplyr::select(
|
|
c(
|
|
"field",
|
|
"sub_field",
|
|
"year",
|
|
"season_start",
|
|
"season_end",
|
|
"age",
|
|
"sub_area",
|
|
"tonnage_ha"
|
|
)
|
|
) %>%
|
|
mutate(
|
|
field = as.character(field),
|
|
sub_field = as.character(sub_field),
|
|
year = as.numeric(year),
|
|
season_start = sapply(season_start, parse_flexible_date),
|
|
season_end = sapply(season_end, parse_flexible_date),
|
|
season_start = as.Date(season_start, origin = "1970-01-01"),
|
|
season_end = as.Date(season_end, origin = "1970-01-01"),
|
|
age = as.numeric(age),
|
|
sub_area = as.character(sub_area),
|
|
tonnage_ha = as.numeric(tonnage_ha)
|
|
) %>%
|
|
mutate(
|
|
season_end = case_when(
|
|
season_end > Sys.Date() ~ Sys.Date(),
|
|
is.na(season_end) ~ Sys.Date(),
|
|
TRUE ~ season_end
|
|
),
|
|
age = round(as.numeric(season_end - season_start) / 7, 0)
|
|
)
|
|
|
|
return(harvesting_data)
|
|
}, error = function(e) {
|
|
warning(paste("Error loading harvesting data:", e$message))
|
|
return(NULL)
|
|
})
|
|
}
|
|
|
|
# 5. Define logging functions globally first
|
|
# ---------------------------------------
|
|
# Create a simple default log function in case setup_logging hasn't been called yet
|
|
log_message <- function(message, level = "INFO") {
|
|
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
|
formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
|
|
cat(formatted_message, "\n")
|
|
}
|
|
|
|
log_head <- function(list, level = "INFO") {
|
|
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
|
|
}
|
|
|
|
# 6. Set up full logging system with file output
|
|
# -------------------------------------------
|
|
setup_logging <- function(log_dir) {
|
|
log_file <- here(log_dir, paste0(format(Sys.Date(), "%Y%m%d"), ".log"))
|
|
|
|
# Create enhanced log functions
|
|
log_message <- function(message, level = "INFO") {
|
|
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
|
formatted_message <- paste0("[", level, "] ", timestamp, " - ", message)
|
|
cat(formatted_message, "\n", file = log_file, append = TRUE)
|
|
|
|
# Also print to console for debugging
|
|
if (level %in% c("ERROR", "WARNING")) {
|
|
cat(formatted_message, "\n")
|
|
}
|
|
}
|
|
|
|
log_head <- function(list, level = "INFO") {
|
|
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
|
|
}
|
|
|
|
# Update the global functions with the enhanced versions
|
|
assign("log_message", log_message, envir = .GlobalEnv)
|
|
assign("log_head", log_head, envir = .GlobalEnv)
|
|
|
|
return(list(
|
|
log_file = log_file,
|
|
log_message = log_message,
|
|
log_head = log_head
|
|
))
|
|
}
|
|
|
|
# 7. Initialize the project
|
|
# ----------------------
|
|
# Export project directories and settings
|
|
initialize_project <- function(project_dir, data_source = "merged_tif_8b") {
|
|
# Set up directory structure, passing data_source to select TIF folder
|
|
dirs <- setup_project_directories(project_dir, data_source = data_source)
|
|
|
|
# Set up logging
|
|
logging <- setup_logging(dirs$log_dir)
|
|
|
|
# Load field boundaries
|
|
boundaries <- load_field_boundaries(dirs$data_dir)
|
|
|
|
# Load harvesting data
|
|
harvesting_data <- load_harvesting_data(dirs$data_dir)
|
|
|
|
# Return all initialized components
|
|
return(c(
|
|
dirs,
|
|
list(
|
|
logging = logging,
|
|
field_boundaries = boundaries$field_boundaries,
|
|
field_boundaries_sf = boundaries$field_boundaries_sf,
|
|
harvesting_data = harvesting_data
|
|
)
|
|
))
|
|
}
|
|
|
|
# When script is sourced, initialize with the global project_dir variable if it exists
|
|
if (exists("project_dir")) {
|
|
# Now we can safely log before initialization
|
|
log_message(paste("Initializing project with directory:", project_dir))
|
|
|
|
# Use data_source if it exists (passed from 02_ci_extraction.R), otherwise use default
|
|
data_src <- if (exists("data_source")) data_source else "merged_tif_8b"
|
|
log_message(paste("Using data source directory:", data_src))
|
|
|
|
project_config <- initialize_project(project_dir, data_source = data_src)
|
|
|
|
# Expose all variables to the global environment
|
|
list2env(project_config, envir = .GlobalEnv)
|
|
|
|
# Log project initialization completion with tile mode info
|
|
log_message(paste("Project initialized with directory:", project_dir))
|
|
if (exists("use_tile_mosaic")) {
|
|
mosaic_mode <- if (use_tile_mosaic) "TILE-BASED" else "SINGLE-FILE"
|
|
log_message(paste("Mosaic mode detected:", mosaic_mode))
|
|
if (exists("tile_detection_info") && !is.null(tile_detection_info)) {
|
|
log_message(paste(" - Detection source:", tile_detection_info$detected_source))
|
|
log_message(paste(" - Grid size:", tile_detection_info$grid_size))
|
|
log_message(paste(" - Detected files in storage:", tile_detection_info$detected_count))
|
|
if (length(tile_detection_info$sample_tiles) > 0) {
|
|
log_message(paste(" - Sample tile files:", paste(tile_detection_info$sample_tiles, collapse = ", ")))
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
|
|
}
|