835 lines
29 KiB
R
835 lines
29 KiB
R
# ==============================================================================
|
|
# PARAMETERS_PROJECT_2.R (CLEANED VERSION)
|
|
# ==============================================================================
|
|
# PURPOSE:
|
|
# Project configuration, directory structure setup, and helper functions
|
|
# for centralized path management and project initialization.
|
|
#
|
|
# SECTION 1: Libraries & Dependencies
|
|
# SECTION 2: Client Type Mapping & Configuration
|
|
# SECTION 3: Directory Structure Setup
|
|
# SECTION 4: Date/Week Utility Functions (centralized)
|
|
# SECTION 5: Field Boundary & Harvest Data Loaders
|
|
# SECTION 6: Project Initialization & Logging
|
|
# SECTION 7: Mosaic & KPI Verification Helpers
|
|
#
|
|
# NOTE: Duplicate functions (safe_log, smartcane_debug, smartcane_warn,
|
|
# load_field_boundaries, date_list, repair_geojson_geometries)
|
|
# have been REMOVED - they belong in 00_common_utils.R.
|
|
# Source 00_common_utils.R after parameters_project.R to get those.
|
|
# ==============================================================================
|
|
|
|
# ==============================================================================
|
|
# SECTION 1: LIBRARIES & DEPENDENCIES
|
|
# ==============================================================================
|
|
suppressPackageStartupMessages({
|
|
library(here)
|
|
library(readxl)
|
|
library(sf)
|
|
library(dplyr)
|
|
library(tidyr)
|
|
library(lubridate)
|
|
library(jsonlite) # For reading tiling_config.json
|
|
})
|
|
|
|
# ==============================================================================
|
|
# SECTION 2: CLIENT TYPE MAPPING & CONFIGURATION
|
|
# ==============================================================================
|
|
# Maps project names to client types for pipeline control
|
|
# This determines which scripts run and what outputs they produce
|
|
|
|
CLIENT_TYPE_MAP <- list(
|
|
"angata" = "cane_supply",
|
|
"chemba" = "agronomic_support",
|
|
"xinavane" = "agronomic_support",
|
|
"esa" = "agronomic_support",
|
|
"simba" = "agronomic_support",
|
|
"john" = "agronomic_support"
|
|
)
|
|
|
|
#' Get client type for a project
|
|
#' @param project_name Character project name
|
|
#' @return Character client type ("cane_supply" or "agronomic_support")
|
|
get_client_type <- function(project_name) {
|
|
client_type <- CLIENT_TYPE_MAP[[project_name]]
|
|
if (is.null(client_type)) {
|
|
warning(paste0("Project '", project_name, "' not found in CLIENT_TYPE_MAP. Defaulting to 'agronomic_support'.", sep=""))
|
|
return("agronomic_support") # Default for all unlisted projects
|
|
}
|
|
return(client_type)
|
|
}
|
|
|
|
# Client-specific KPI configurations
|
|
# Defines which KPIs and outputs are required for each client type
|
|
CLIENT_TYPE_CONFIGS <- list(
|
|
|
|
# Aura (agronomic_support): Farm-level KPI summaries for agronomists
|
|
"agronomic_support" = list(
|
|
client_type = "agronomic_support",
|
|
description = "Farm-level KPI summaries for agronomic decision support",
|
|
kpi_calculations = c(
|
|
"field_uniformity",
|
|
"area_change",
|
|
"tch_forecasted",
|
|
"growth_decline",
|
|
"weed_presence",
|
|
"gap_filling"
|
|
),
|
|
outputs = c("kpi_summary_tables", "field_details"),
|
|
requires_harvest_data = FALSE,
|
|
script_90_compatible = TRUE,
|
|
script_91_compatible = FALSE
|
|
),
|
|
|
|
# Cane Supply (cane_supply): Per-field analysis with harvest prediction
|
|
"cane_supply" = list(
|
|
client_type = "cane_supply",
|
|
description = "Per-field analysis with harvest prediction and phase assignment",
|
|
kpi_calculations = c(
|
|
"per_field_analysis",
|
|
"phase_assignment",
|
|
"harvest_prediction",
|
|
"status_triggers"
|
|
),
|
|
outputs = c("field_analysis_excel", "field_analysis_summary"),
|
|
requires_harvest_data = TRUE,
|
|
script_90_compatible = FALSE,
|
|
script_91_compatible = TRUE
|
|
)
|
|
)
|
|
|
|
#' Get KPI configuration for a specific client type
|
|
#' @param client_type Character (e.g., "agronomic_support", "cane_supply")
|
|
#' @return List containing configuration for that client type
|
|
get_client_kpi_config <- function(client_type) {
|
|
config <- CLIENT_TYPE_CONFIGS[[client_type]]
|
|
if (is.null(config)) {
|
|
warning(sprintf("Client type '%s' not found - using cane_supply defaults", client_type))
|
|
return(CLIENT_TYPE_CONFIGS[["cane_supply"]])
|
|
}
|
|
return(config)
|
|
}
|
|
|
|
# ==============================================================================
|
|
# SECTION 3: DIRECTORY STRUCTURE SETUP
|
|
# ==============================================================================
|
|
# CENTRALIZED PATH MANAGEMENT: All file paths in the entire pipeline
|
|
# are derived from setup_project_directories().
|
|
# This is the single source of truth for 8 tiers of directories.
|
|
|
|
#' Setup complete project directory structure
|
|
#'
|
|
#' Creates all 8 tiers of directories and returns a comprehensive list
|
|
#' of paths for use throughout the pipeline.
|
|
#'
|
|
#' @param project_dir Character. Project name (e.g., "angata", "aura")
|
|
#' @param data_source Character. "merged_tif" (default) or "merged_tif_8b"
|
|
#' @return List containing all 8 tiers of paths
|
|
#'
|
|
#' @details
|
|
#' TIER 1: Raw data (merged_tif) - Python download output
|
|
#' TIER 2: Per-field TIFFs - Script 10 output
|
|
#' TIER 3: CI extraction - Script 20 output
|
|
#' TIER 4: Growth model - Script 30 output
|
|
#' TIER 5: Mosaics - Script 40 output
|
|
#' TIER 6: KPI & reports - Scripts 80/90/91 output
|
|
#' TIER 7: Support files - GeoJSON, Excel, logs
|
|
#' TIER 8: Metadata - Config, CRS info
|
|
setup_project_directories <- function(project_dir, data_source = "merged_tif") {
|
|
# BASE DIRECTORIES
|
|
laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir)
|
|
|
|
# TIER 1: RAW DATA (Script 00 output - Python download)
|
|
merged_tif_folder <- here(laravel_storage_dir, "merged_tif")
|
|
|
|
# TIER 2: PER-FIELD TIFFS (Script 10 output)
|
|
field_tiles_dir <- here(laravel_storage_dir, "field_tiles")
|
|
field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI")
|
|
daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")
|
|
|
|
# SUPPORT TIER: DATA DIRECTORY (define early for use in later tiers)
|
|
data_dir <- here(laravel_storage_dir, "Data")
|
|
|
|
# TIER 3: CI EXTRACTION (Script 20 output)
|
|
# Structure: Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds (per-field daily CI values)
|
|
extracted_ci_base_dir <- here(data_dir, "extracted_ci")
|
|
daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals") # Per-field structure
|
|
cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals")
|
|
ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python")
|
|
|
|
# TIER 4: GROWTH MODEL (Script 30 output)
|
|
growth_model_interpolated_dir <- here(data_dir, "growth_model_interpolated")
|
|
|
|
# TIER 5: MOSAICS (Script 40 output)
|
|
weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
|
|
weekly_tile_max_dir <- here(laravel_storage_dir, "weekly_tile_max")
|
|
|
|
# TIER 6: KPI & REPORTING (Scripts 80/90/91 output)
|
|
reports_dir <- here(laravel_storage_dir, "reports")
|
|
kpi_reports_dir <- here(reports_dir, "kpis", "field_level")
|
|
kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats")
|
|
kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis")
|
|
|
|
# TIER 7: SUPPORT (various scripts)
|
|
vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction
|
|
harvest_dir <- here(data_dir, "harvest") # Harvest data directory
|
|
log_dir <- here(laravel_storage_dir, "logs")
|
|
|
|
# Create all directories
|
|
all_dirs <- c(
|
|
merged_tif_folder, field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir,
|
|
extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
|
|
growth_model_interpolated_dir,
|
|
weekly_mosaic_dir, weekly_tile_max_dir,
|
|
reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
|
|
data_dir, vrt_dir, harvest_dir, log_dir
|
|
)
|
|
|
|
for (dir_path in all_dirs) {
|
|
dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
|
|
}
|
|
|
|
# TIER 8: CONFIG & METADATA PATHS
|
|
field_boundaries_path <- here(data_dir, "pivot.geojson")
|
|
|
|
# Return comprehensive list
|
|
return(list(
|
|
# PROJECT ROOT
|
|
laravel_storage_dir = laravel_storage_dir,
|
|
|
|
# TIER 1: Raw data
|
|
merged_tif_folder = merged_tif_folder,
|
|
|
|
# TIER 2: Per-field TIFFs
|
|
field_tiles_dir = field_tiles_dir,
|
|
field_tiles_ci_dir = field_tiles_ci_dir,
|
|
daily_tiles_split_dir = daily_tiles_split_dir,
|
|
|
|
# TIER 3: CI extraction
|
|
extracted_ci_base_dir = extracted_ci_base_dir,
|
|
daily_ci_vals_dir = daily_ci_vals_dir,
|
|
cumulative_ci_vals_dir = cumulative_ci_vals_dir,
|
|
ci_for_python_dir = ci_for_python_dir,
|
|
|
|
# TIER 4: Growth model
|
|
growth_model_interpolated_dir = growth_model_interpolated_dir,
|
|
|
|
# TIER 5: Mosaics
|
|
weekly_mosaic_dir = weekly_mosaic_dir,
|
|
weekly_tile_max_dir = weekly_tile_max_dir,
|
|
|
|
# TIER 6: KPI & reporting
|
|
reports_dir = reports_dir,
|
|
kpi_reports_dir = kpi_reports_dir,
|
|
kpi_field_stats_dir = kpi_field_stats_dir,
|
|
kpi_field_analysis_dir = kpi_field_analysis_dir,
|
|
|
|
# TIER 7: Support
|
|
data_dir = data_dir,
|
|
vrt_dir = vrt_dir,
|
|
harvest_dir = harvest_dir,
|
|
log_dir = log_dir,
|
|
|
|
# TIER 8: Metadata
|
|
field_boundaries_path = field_boundaries_path
|
|
))
|
|
}
|
|
|
|
# ==============================================================================
|
|
# SECTION 4: DATE/WEEK UTILITY FUNCTIONS
|
|
# ==============================================================================
|
|
# ISO 8601 week/year functions for consistent date handling across scripts
|
|
|
|
#' Extract ISO week number from a date
|
|
#' @param date Date object or string convertible to Date
|
|
#' @return Numeric ISO week number (1-53)
|
|
get_iso_week <- function(date) {
|
|
as.numeric(format(date, "%V"))
|
|
}
|
|
|
|
#' Extract ISO year from a date
|
|
#' @param date Date object or string convertible to Date
|
|
#' @return Numeric ISO year
|
|
get_iso_year <- function(date) {
|
|
as.numeric(format(date, "%G"))
|
|
}
|
|
|
|
#' Extract both ISO week and year as a list
|
|
#' @param date Date object or string convertible to Date
|
|
#' @return List with elements: week (1-53), year
|
|
get_iso_week_year <- function(date) {
|
|
list(
|
|
week = as.numeric(format(date, "%V")),
|
|
year = as.numeric(format(date, "%G"))
|
|
)
|
|
}
|
|
|
|
#' Format date as a week/year label
|
|
#' @param date Date object or string convertible to Date
|
|
#' @param separator Character. Separator between week and year (default "_")
|
|
#' @return Character in format "week##_YYYY" (e.g., "week03_2025")
|
|
format_week_label <- function(date, separator = "_") {
|
|
wwy <- get_iso_week_year(date)
|
|
sprintf("week%02d%s%d", wwy$week, separator, wwy$year)
|
|
}
|
|
|
|
# ==============================================================================
|
|
# SECTION 5: FIELD BOUNDARY & HARVEST DATA LOADERS
|
|
# ==============================================================================
|
|
# IMPORTANT: These functions are also defined in 00_common_utils.R
|
|
# to avoid duplication. Source 00_common_utils.R AFTER parameters_project.R
|
|
# to override these stub definitions with the full implementations.
|
|
|
|
#' Load Field Boundaries from GeoJSON
|
|
#'
|
|
#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
|
|
#' Handles CRS validation and column standardization.
|
|
#'
|
|
#' @param data_dir Directory containing GeoJSON file
|
|
#' @return List with elements:
|
|
#' - field_boundaries_sf: sf (Simple Features) object
|
|
#' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
|
|
#'
|
|
load_field_boundaries <- function(data_dir) {
|
|
field_boundaries_path <- file.path(data_dir, "pivot.geojson")
|
|
|
|
if (!file.exists(field_boundaries_path)) {
|
|
stop("Field boundaries file not found at:", field_boundaries_path)
|
|
}
|
|
|
|
tryCatch({
|
|
boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE)
|
|
|
|
# Filter out features with empty geometries
|
|
boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ]
|
|
|
|
# Repair geometries if needed
|
|
if (!all(sf::st_is_valid(boundaries_sf))) {
|
|
boundaries_sf <- sf::st_make_valid(boundaries_sf)
|
|
}
|
|
|
|
# Convert to terra SpatVect
|
|
boundaries_spat <- terra::vect(boundaries_sf)
|
|
|
|
return(list(
|
|
field_boundaries_sf = boundaries_sf,
|
|
field_boundaries = boundaries_spat
|
|
))
|
|
}, error = function(e) {
|
|
stop("Error loading field boundaries:", e$message)
|
|
})
|
|
}
|
|
|
|
#' Create Date List
|
|
#'
|
|
#' Creates a sequence of dates from end_date going back offset days
|
|
#'
|
|
#' @param end_date End date (Date object)
|
|
#' @param offset Number of days to go back
|
|
#' @return Character vector of dates in YYYY-MM-DD format
|
|
date_list <- function(end_date, offset) {
|
|
start_date <- end_date - offset
|
|
date_seq <- seq(start_date, end_date, by = "day")
|
|
format(date_seq, "%Y-%m-%d")
|
|
}
|
|
|
|
#' Repair GeoJSON Geometries
|
|
#'
|
|
#' Validates and repairs invalid geometries in sf object
|
|
#'
|
|
#' @param sf_object sf object with potentially invalid geometries
|
|
#' @return sf object with repaired geometries
|
|
repair_geojson_geometries <- function(sf_object) {
|
|
if (!all(sf::st_is_valid(sf_object))) {
|
|
sf_object <- sf::st_make_valid(sf_object)
|
|
}
|
|
sf_object
|
|
}
|
|
|
|
#' Load harvesting data from Excel
|
|
#'
|
|
#' Loads crop harvest schedule from harvest.xlsx file.
|
|
#' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.).
|
|
#'
|
|
#' @param data_dir Directory containing harvest.xlsx file
|
|
#' @return Data frame with columns: field, sub_field, year, season_start,
|
|
#' season_end, age (weeks), sub_area, tonnage_ha. Returns NULL if not found.
|
|
load_harvesting_data <- function(data_dir) {
|
|
harvest_file <- file.path(data_dir, "harvest.xlsx")
|
|
|
|
if (!file.exists(harvest_file)) {
|
|
warning(paste("Harvest data file not found at path:", harvest_file))
|
|
return(NULL)
|
|
}
|
|
|
|
# Helper function to parse dates with multiple format detection
|
|
parse_flexible_date <- function(x) {
|
|
if (is.na(x) || is.null(x)) return(NA_real_)
|
|
if (inherits(x, "Date")) return(x)
|
|
if (inherits(x, "POSIXct")) return(as.Date(x))
|
|
|
|
if (is.numeric(x)) {
|
|
return(as.Date(x, origin = "1899-12-30"))
|
|
}
|
|
|
|
x_char <- as.character(x)
|
|
formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
|
|
|
|
for (fmt in formats) {
|
|
result <- suppressWarnings(as.Date(x_char, format = fmt))
|
|
if (!is.na(result)) return(result)
|
|
}
|
|
|
|
return(NA)
|
|
}
|
|
|
|
tryCatch({
|
|
harvesting_data <- read_excel(harvest_file) %>%
|
|
dplyr::select(c(
|
|
"field", "sub_field", "year",
|
|
"season_start", "season_end", "age", "sub_area", "tonnage_ha"
|
|
)) %>%
|
|
mutate(
|
|
field = as.character(field),
|
|
sub_field = as.character(sub_field),
|
|
year = as.numeric(year),
|
|
season_start = sapply(season_start, parse_flexible_date),
|
|
season_end = sapply(season_end, parse_flexible_date),
|
|
season_start = as.Date(season_start, origin = "1970-01-01"),
|
|
season_end = as.Date(season_end, origin = "1970-01-01"),
|
|
age = as.numeric(age),
|
|
sub_area = as.character(sub_area),
|
|
tonnage_ha = as.numeric(tonnage_ha)
|
|
) %>%
|
|
mutate(
|
|
season_end = case_when(
|
|
season_end > Sys.Date() ~ Sys.Date(),
|
|
is.na(season_end) ~ Sys.Date(),
|
|
TRUE ~ season_end
|
|
),
|
|
age = round(as.numeric(season_end - season_start) / 7, 0)
|
|
)
|
|
|
|
return(harvesting_data)
|
|
}, error = function(e) {
|
|
warning(paste("Error loading harvesting data:", e$message))
|
|
return(NULL)
|
|
})
|
|
}
|
|
|
|
# ==============================================================================
|
|
# SECTION 6: LOGGING SYSTEM SETUP
|
|
# ==============================================================================
|
|
# Create default logging functions
|
|
|
|
#' Safe Logging Function
|
|
#'
|
|
#' Generic logging with [LEVEL] prefix. Works standalone without any framework.
|
|
#' Consistent with SmartCane logging standard.
|
|
#'
|
|
#' @param message The message to log
|
|
#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
|
|
#' @return NULL (invisible, used for side effects)
|
|
#'
|
|
safe_log <- function(message, level = "INFO") {
|
|
prefix <- sprintf("[%s]", level)
|
|
cat(sprintf("%s %s\n", prefix, message))
|
|
}
|
|
|
|
#' SmartCane Debug Logging (Conditional)
|
|
#'
|
|
#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
|
|
#'
|
|
#' @param message The message to log
|
|
#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
|
|
#' @return NULL (invisible, used for side effects)
|
|
#'
|
|
smartcane_debug <- function(message, verbose = FALSE) {
|
|
if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
|
|
return(invisible(NULL))
|
|
}
|
|
safe_log(message, level = "DEBUG")
|
|
}
|
|
|
|
#' SmartCane Warning Logging
|
|
#'
|
|
#' Logs WARN-level messages. Convenience wrapper around safe_log().
|
|
#'
|
|
#' @param message The message to log
|
|
#' @return NULL (invisible, used for side effects)
|
|
#'
|
|
smartcane_warn <- function(message) {
|
|
safe_log(message, level = "WARN")
|
|
}
|
|
|
|
log_message <- function(message, level = "INFO") {
|
|
prefix <- sprintf("[%s]", level)
|
|
cat(sprintf("%s %s\n", prefix, message))
|
|
}
|
|
|
|
log_head <- function(data, level = "INFO") {
|
|
log_message(paste(capture.output(str(head(data))), collapse = "\n"), level)
|
|
}
|
|
|
|
#' Setup full logging system with file output
|
|
#'
|
|
#' Creates log directory and returns logging functions that write to both
|
|
#' console and log file.
|
|
#'
|
|
#' @param log_dir Character. Directory for log files
|
|
#' @return List with log_file path, log_message function, and log_head function
|
|
setup_logging <- function(log_dir) {
|
|
dir.create(log_dir, showWarnings = FALSE, recursive = TRUE)
|
|
|
|
log_file <- file.path(log_dir, sprintf(
|
|
"smartcane_%s.log",
|
|
format(Sys.time(), "%Y%m%d_%H%M%S")
|
|
))
|
|
|
|
log_message <- function(message, level = "INFO") {
|
|
prefix <- sprintf("[%s] [%s]", level, format(Sys.time(), "%Y-%m-%d %H:%M:%S"))
|
|
formatted_message <- sprintf("%s %s", prefix, message)
|
|
cat(formatted_message, "\n")
|
|
cat(formatted_message, "\n", file = log_file, append = TRUE)
|
|
}
|
|
|
|
log_head <- function(list, level = "INFO") {
|
|
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
|
|
}
|
|
|
|
assign("log_message", log_message, envir = .GlobalEnv)
|
|
assign("log_head", log_head, envir = .GlobalEnv)
|
|
|
|
return(list(
|
|
log_file = log_file,
|
|
log_message = log_message,
|
|
log_head = log_head
|
|
))
|
|
}
|
|
|
|
# ==============================================================================
|
|
# SECTION 6B: DATA SOURCE DETECTION
|
|
# ==============================================================================
|
|
|
|
#' Detect data source for project
|
|
#'
|
|
#' Returns the data source directory (always "merged_tif" for consistency)
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @return Character. "merged_tif"
|
|
detect_data_source <- function(project_dir) {
|
|
return("merged_tif")
|
|
}
|
|
|
|
#' Detect tile structure from merged TIF directory
|
|
#'
|
|
#' Checks if tiles exist by looking for:
|
|
#' 1. tiling_config.json metadata file (most reliable)
|
|
#' 2. Grid subdirectories (5x5, 10x10, etc.)
|
|
#' 3. Tile-named files (*_XX.tif pattern)
|
|
#'
|
|
#' @param merged_final_tif_dir Character. Path to merged TIF directory
|
|
#' @param daily_tiles_split_dir Character. Optional path to tiles directory
|
|
#' @return List with has_tiles (logical), detected_tiles, total_files, source, grid_size
|
|
detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
|
|
# PRIORITY 1: Check for tiling_config.json metadata file from script 10
|
|
if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
|
|
config_files <- list.files(daily_tiles_split_dir,
|
|
pattern = "tiling_config\\.json$",
|
|
recursive = TRUE,
|
|
full.names = TRUE)
|
|
|
|
if (length(config_files) > 0) {
|
|
config_file <- config_files[which.max(file.info(config_files)$mtime)]
|
|
|
|
tryCatch({
|
|
config_json <- jsonlite::read_json(config_file)
|
|
return(list(
|
|
has_tiles = config_json$has_tiles %||% TRUE,
|
|
detected_tiles = character(),
|
|
total_files = 0,
|
|
source = "tiling_config.json",
|
|
grid_size = config_json$grid_size %||% "unknown"
|
|
))
|
|
}, error = function(e) {
|
|
warning("Error reading tiling_config.json: ", e$message)
|
|
})
|
|
}
|
|
}
|
|
|
|
# PRIORITY 2: File-based detection (fallback)
|
|
if (!dir.exists(merged_final_tif_dir)) {
|
|
return(list(
|
|
has_tiles = FALSE,
|
|
detected_tiles = character(),
|
|
total_files = 0,
|
|
source = "directory_not_found"
|
|
))
|
|
}
|
|
|
|
# Check for grid-size subdirectories (5x5, 10x10, etc.)
|
|
grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE)
|
|
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
|
|
|
|
if (length(grid_patterns) > 0) {
|
|
grid_size <- grid_patterns[1]
|
|
grid_dir <- file.path(merged_final_tif_dir, grid_size)
|
|
sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3]
|
|
|
|
return(list(
|
|
has_tiles = TRUE,
|
|
detected_tiles = sample_tiles,
|
|
total_files = length(sample_tiles),
|
|
source = "grid_subdirectory_detection",
|
|
grid_size = grid_size,
|
|
grid_path = grid_dir
|
|
))
|
|
}
|
|
|
|
# Check for tile-named files (*_XX.tif pattern)
|
|
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
|
|
|
|
if (length(tif_files) == 0) {
|
|
return(list(
|
|
has_tiles = FALSE,
|
|
detected_tiles = character(),
|
|
total_files = 0,
|
|
source = "no_files_found"
|
|
))
|
|
}
|
|
|
|
tile_pattern <- "_(\\d{2})\\.tif$"
|
|
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
|
|
has_tiles <- length(tile_files) > 0
|
|
|
|
return(list(
|
|
has_tiles = has_tiles,
|
|
detected_tiles = tile_files,
|
|
total_files = length(tif_files),
|
|
source = "file_pattern_detection"
|
|
))
|
|
}
|
|
|
|
# ==============================================================================
|
|
# SECTION 7: MOSAIC & KPI VERIFICATION HELPERS
|
|
# ==============================================================================
|
|
# Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths
|
|
|
|
#' Detect mosaic mode from project structure
|
|
#'
|
|
#' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @return Character. "tiled" or "single-file"
|
|
detect_mosaic_mode <- function(project_dir) {
|
|
# Per-field architecture is standard - always return "single-file"
|
|
# unless weekly_tile_max directory exists with content
|
|
mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
|
|
|
|
if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) {
|
|
return("tiled")
|
|
}
|
|
return("single-file")
|
|
}
|
|
|
|
#' Detect grid size from tile directory structure
|
|
#'
|
|
#' For per-field architecture, returns "unknown" (grid-based tiling is legacy)
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @return Character. Grid size ("unknown" for per-field)
|
|
detect_grid_size <- function(project_dir) {
|
|
# Per-field architecture doesn't use grid-based organization
|
|
return("unknown")
|
|
}
|
|
|
|
#' Get project storage path
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param subdir Character. Optional subdirectory (default NULL)
|
|
#' @return Character. Full path
|
|
get_project_storage_path <- function(project_dir, subdir = NULL) {
|
|
path <- file.path("laravel_app", "storage", "app", project_dir)
|
|
if (!is.null(subdir)) {
|
|
path <- file.path(path, subdir)
|
|
}
|
|
return(path)
|
|
}
|
|
|
|
#' Get mosaic directory
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param mosaic_mode Character. "tiled" or "single-file"
|
|
#' @return Character. Full path to mosaic directory
|
|
get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
|
|
if (mosaic_mode == "auto") {
|
|
mosaic_mode <- detect_mosaic_mode(project_dir)
|
|
}
|
|
|
|
if (mosaic_mode == "tiled") {
|
|
get_project_storage_path(project_dir, "weekly_tile_max")
|
|
} else {
|
|
get_project_storage_path(project_dir, "weekly_mosaic")
|
|
}
|
|
}
|
|
|
|
#' Get KPI directory based on client type
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param client_type Character. Client type
|
|
#' @return Character. Full path to KPI directory
|
|
get_kpi_dir <- function(project_dir, client_type) {
|
|
base_path <- get_project_storage_path(project_dir, "reports/kpis")
|
|
|
|
if (client_type == "agronomic_support") {
|
|
return(file.path(base_path, "field_level"))
|
|
} else {
|
|
return(file.path(base_path, "field_analysis"))
|
|
}
|
|
}
|
|
|
|
#' Get expected output path for harvest imminent file
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param week_num Integer. ISO week number
|
|
#' @param year_num Integer. Year
|
|
#' @return Character. Full path to expected harvest imminent CSV file
|
|
get_harvest_output_path <- function(project_dir, week_num, year_num) {
|
|
file.path(
|
|
"laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats",
|
|
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, week_num, year_num)
|
|
)
|
|
}
|
|
|
|
#' Check if harvest output file exists for a specific week
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param week_num Integer. ISO week number
|
|
#' @param year_num Integer. Year
|
|
#' @return Logical. TRUE if file exists
|
|
check_harvest_output_exists <- function(project_dir, week_num, year_num) {
|
|
path <- get_harvest_output_path(project_dir, week_num, year_num)
|
|
file.exists(path)
|
|
}
|
|
|
|
#' Get mosaic verification directory
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param mosaic_mode Character. "tiled" or "single-file"
|
|
#' @return Character. Full path to mosaic directory
|
|
get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
|
|
base <- file.path("laravel_app", "storage", "app", project_dir)
|
|
|
|
if (mosaic_mode == "tiled") {
|
|
file.path(base, "weekly_tile_max")
|
|
} else {
|
|
file.path(base, "weekly_mosaic")
|
|
}
|
|
}
|
|
|
|
#' Check if mosaic files exist for a specific week
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param week_num Integer. ISO week number
|
|
#' @param year_num Integer. Year
|
|
#' @param mosaic_mode Character. "tiled" or "single-file"
|
|
#' @return List with created (logical), file_count (int), sample_files (char vector)
|
|
check_mosaic_exists <- function(project_dir, week_num, year_num, mosaic_mode) {
|
|
mosaic_dir <- get_mosaic_verification_dir(project_dir, mosaic_mode)
|
|
|
|
if (!dir.exists(mosaic_dir)) {
|
|
return(list(created = FALSE, file_count = 0, sample_files = character()))
|
|
}
|
|
|
|
week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
|
|
mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE)
|
|
|
|
list(
|
|
created = length(mosaic_files) > 0,
|
|
file_count = length(mosaic_files),
|
|
sample_files = head(mosaic_files, 3)
|
|
)
|
|
}
|
|
|
|
#' Check KPI completeness for reporting window
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param client_type Character. Client type
|
|
#' @param end_date Date. End date of reporting window
|
|
#' @param reporting_weeks Integer. Number of weeks to check
|
|
#' @return List with kpi_dir, kpis_df, missing_count
|
|
check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks = 1) {
|
|
kpi_dir <- get_kpi_dir(project_dir, client_type)
|
|
|
|
if (!dir.exists(kpi_dir)) {
|
|
dir.create(kpi_dir, showWarnings = FALSE, recursive = TRUE)
|
|
}
|
|
|
|
kpis_df <- data.frame()
|
|
missing_count <- 0
|
|
|
|
for (weeks_back in 0:(reporting_weeks - 1)) {
|
|
target_week <- end_date - lubridate::weeks(weeks_back)
|
|
wwy <- get_iso_week_year(target_week)
|
|
|
|
# Check for KPI file for this week
|
|
kpi_pattern <- sprintf("week_%02d_%d", wwy$week, wwy$year)
|
|
kpi_files <- list.files(kpi_dir, pattern = kpi_pattern)
|
|
has_kpis <- length(kpi_files) > 0
|
|
file_count <- length(kpi_files)
|
|
|
|
if (!has_kpis) missing_count <- missing_count + 1
|
|
|
|
kpis_df <- rbind(kpis_df, data.frame(
|
|
week = wwy$week,
|
|
year = wwy$year,
|
|
date = target_week,
|
|
has_kpis = has_kpis,
|
|
file_count = file_count
|
|
))
|
|
}
|
|
|
|
return(list(
|
|
kpi_dir = kpi_dir,
|
|
kpis_df = kpis_df,
|
|
missing_count = missing_count
|
|
))
|
|
}
|
|
|
|
# ==============================================================================
|
|
# SECTION 8: PROJECT INITIALIZATION
|
|
# ==============================================================================
|
|
|
|
#' Initialize the project
|
|
#'
|
|
#' Sets up directory structure, logging, and loads configuration
|
|
#'
|
|
#' @param project_dir Character. Project name
|
|
#' @param data_source Character. "merged_tif" or "merged_tif_8b"
|
|
#' @return List with all project directories and settings
|
|
initialize_project <- function(project_dir, data_source = "merged_tif") {
|
|
dirs <- setup_project_directories(project_dir, data_source = data_source)
|
|
logging <- setup_logging(dirs$log_dir)
|
|
|
|
return(list(
|
|
dirs = dirs,
|
|
logging = logging,
|
|
project_dir = project_dir,
|
|
client_type = get_client_type(project_dir)
|
|
))
|
|
}
|
|
|
|
# ==============================================================================
|
|
# AUTO-INITIALIZATION: Set project_dir global if not already set
|
|
# ==============================================================================
|
|
|
|
if (exists("project_dir")) {
|
|
assign("project_dir", project_dir, envir = .GlobalEnv)
|
|
} else {
|
|
warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
|
|
}
|
|
|
|
# ==============================================================================
|
|
# END PARAMETERS_PROJECT_2.R
|
|
# ==============================================================================
|