SmartCane/r_app/parameters_project.R

841 lines
29 KiB
R

# ==============================================================================
# PARAMETERS_PROJECT_2.R (CLEANED VERSION)
# ==============================================================================
# PURPOSE:
# Project configuration, directory structure setup, and helper functions
# for centralized path management and project initialization.
#
# SECTION 1: Libraries & Dependencies
# SECTION 2: Client Type Mapping & Configuration
# SECTION 3: Directory Structure Setup
# SECTION 4: Date/Week Utility Functions (centralized)
# SECTION 5: Field Boundary & Harvest Data Loaders
# SECTION 6: Project Initialization & Logging
# SECTION 7: Mosaic & KPI Verification Helpers
#
# NOTE: Duplicate functions (safe_log, smartcane_debug, smartcane_warn,
# load_field_boundaries, date_list, repair_geojson_geometries)
# have been REMOVED - they belong in 00_common_utils.R.
# Source 00_common_utils.R after parameters_project.R to get those.
# ==============================================================================
# ==============================================================================
# SECTION 1: LIBRARIES & DEPENDENCIES
# ==============================================================================
suppressPackageStartupMessages({
library(here)
library(readxl)
library(sf)
library(dplyr)
library(tidyr)
library(lubridate)
library(jsonlite) # For reading tiling_config.json
})
# ==============================================================================
# SECTION 2: GLOBAL AGRONOMIC THRESHOLDS & CLIENT TYPE MAPPING
# ==============================================================================
# Maturity threshold for yield prediction: crop age in Days After Harvest (DAH)
# Only fields >= DAH_MATURITY_THRESHOLD days old receive yield forecasts
# ~240 days ≈ 8 months, typical sugarcane maturity window
DAH_MATURITY_THRESHOLD <- 240
# Maps project names to client types for pipeline control
# This determines which scripts run and what outputs they produce
CLIENT_TYPE_MAP <- list(
"angata" = "cane_supply",
"chemba" = "agronomic_support",
"xinavane" = "agronomic_support",
"esa" = "agronomic_support",
"simba" = "agronomic_support",
"john" = "agronomic_support",
"huss" = "agronomic_support",
"aura" = "agronomic_support"
)
#' Get client type for a project
#' @param project_name Character project name
#' @return Character client type ("cane_supply" or "agronomic_support")
get_client_type <- function(project_name) {
client_type <- CLIENT_TYPE_MAP[[project_name]]
if (is.null(client_type)) {
warning(paste0("Project '", project_name, "' not found in CLIENT_TYPE_MAP. Defaulting to 'agronomic_support'.", sep=""))
return("agronomic_support") # Default for all unlisted projects
}
return(client_type)
}
# Client-specific KPI configurations
# Defines which KPIs and outputs are required for each client type
CLIENT_TYPE_CONFIGS <- list(
# Aura (agronomic_support): Farm-level KPI summaries for agronomists
"agronomic_support" = list(
client_type = "agronomic_support",
description = "Farm-level KPI summaries for agronomic decision support",
kpi_calculations = c(
"field_uniformity",
"area_change",
"tch_forecasted",
"growth_decline",
"weed_presence",
"gap_filling"
),
outputs = c("kpi_summary_tables", "field_details"),
requires_harvest_data = FALSE,
script_90_compatible = TRUE,
script_91_compatible = FALSE
),
# Cane Supply (cane_supply): Per-field analysis with harvest prediction
"cane_supply" = list(
client_type = "cane_supply",
description = "Per-field analysis with harvest prediction and phase assignment",
kpi_calculations = c(
"per_field_analysis",
"phase_assignment",
"harvest_prediction",
"status_triggers"
),
outputs = c("field_analysis_excel", "field_analysis_summary"),
requires_harvest_data = TRUE,
script_90_compatible = FALSE,
script_91_compatible = TRUE
)
)
#' Get KPI configuration for a specific client type
#' @param client_type Character (e.g., "agronomic_support", "cane_supply")
#' @return List containing configuration for that client type
get_client_kpi_config <- function(client_type) {
config <- CLIENT_TYPE_CONFIGS[[client_type]]
if (is.null(config)) {
warning(sprintf("Client type '%s' not found - using cane_supply defaults", client_type))
return(CLIENT_TYPE_CONFIGS[["cane_supply"]])
}
return(config)
}
# ==============================================================================
# SECTION 3: DIRECTORY STRUCTURE SETUP
# ==============================================================================
# CENTRALIZED PATH MANAGEMENT: All file paths in the entire pipeline
# are derived from setup_project_directories().
# This is the single source of truth for 8 tiers of directories.
#' Setup complete project directory structure
#'
#' Creates all 8 tiers of directories and returns a comprehensive list
#' of paths for use throughout the pipeline.
#'
#' @param project_dir Character. Project name (e.g., "angata", "aura")
#' @param data_source Character. "merged_tif" (default) or "merged_tif_8b"
#' @return List containing all 8 tiers of paths
#'
#' @details
#' TIER 1: Raw data (merged_tif) - Python download output
#' TIER 2: Per-field TIFFs - Script 10 output
#' TIER 3: CI extraction - Script 20 output
#' TIER 4: Growth model - Script 30 output
#' TIER 5: Mosaics - Script 40 output
#' TIER 6: KPI & reports - Scripts 80/90/91 output
#' TIER 7: Support files - GeoJSON, Excel, logs
#' TIER 8: Metadata - Config, CRS info
setup_project_directories <- function(project_dir, data_source = "merged_tif") {
# BASE DIRECTORIES
laravel_storage_dir <- here("laravel_app", "storage", "app", project_dir)
# TIER 1: RAW DATA (Script 00 output - Python download)
merged_tif_folder <- here(laravel_storage_dir, "merged_tif")
# TIER 2: PER-FIELD TIFFS (Script 10 output)
field_tiles_dir <- here(laravel_storage_dir, "field_tiles")
field_tiles_ci_dir <- here(laravel_storage_dir, "field_tiles_CI")
daily_tiles_split_dir <- here(laravel_storage_dir, "daily_tiles_split")
# SUPPORT TIER: DATA DIRECTORY (define early for use in later tiers)
data_dir <- here(laravel_storage_dir, "Data")
# TIER 3: CI EXTRACTION (Script 20 output)
# Structure: Data/extracted_ci/daily_vals/{FIELD}/{DATE}.rds (per-field daily CI values)
extracted_ci_base_dir <- here(data_dir, "extracted_ci")
daily_ci_vals_dir <- here(extracted_ci_base_dir, "daily_vals") # Per-field structure
cumulative_ci_vals_dir <- here(extracted_ci_base_dir, "cumulative_vals")
ci_for_python_dir <- here(extracted_ci_base_dir, "ci_data_for_python")
# TIER 4: GROWTH MODEL (Script 30 output)
growth_model_interpolated_dir <- here(data_dir, "growth_model_interpolated")
# TIER 5: MOSAICS (Script 40 output)
weekly_mosaic_dir <- here(laravel_storage_dir, "weekly_mosaic")
# TIER 6: KPI & REPORTING (Scripts 80/90/91 output)
reports_dir <- here(laravel_storage_dir, "reports")
kpi_reports_dir <- here(reports_dir, "kpis", "field_level")
kpi_field_stats_dir <- here(reports_dir, "kpis", "field_stats")
kpi_field_analysis_dir <- here(reports_dir, "kpis", "field_analysis")
# TIER 7: SUPPORT (various scripts)
vrt_dir <- here(data_dir, "vrt") # Virtual Raster files created during CI extraction
harvest_dir <- here(data_dir, "harvest") # Harvest data directory
log_dir <- here(laravel_storage_dir, "logs")
# Create all directories
all_dirs <- c(
merged_tif_folder, field_tiles_dir, field_tiles_ci_dir, daily_tiles_split_dir,
extracted_ci_base_dir, daily_ci_vals_dir, cumulative_ci_vals_dir, ci_for_python_dir,
growth_model_interpolated_dir,
weekly_mosaic_dir,
reports_dir, kpi_reports_dir, kpi_field_stats_dir, kpi_field_analysis_dir,
data_dir, vrt_dir, harvest_dir, log_dir
)
for (dir_path in all_dirs) {
dir.create(dir_path, showWarnings = FALSE, recursive = TRUE)
}
# TIER 8: CONFIG & METADATA PATHS
field_boundaries_path <- here(data_dir, "pivot.geojson")
# Return comprehensive list
return(list(
# PROJECT ROOT
laravel_storage_dir = laravel_storage_dir,
# TIER 1: Raw data
merged_tif_folder = merged_tif_folder,
# TIER 2: Per-field TIFFs
field_tiles_dir = field_tiles_dir,
field_tiles_ci_dir = field_tiles_ci_dir,
daily_tiles_split_dir = daily_tiles_split_dir,
# TIER 3: CI extraction
extracted_ci_base_dir = extracted_ci_base_dir,
daily_ci_vals_dir = daily_ci_vals_dir,
cumulative_ci_vals_dir = cumulative_ci_vals_dir,
ci_for_python_dir = ci_for_python_dir,
# TIER 4: Growth model
growth_model_interpolated_dir = growth_model_interpolated_dir,
# TIER 5: Mosaics
weekly_mosaic_dir = weekly_mosaic_dir,
# TIER 6: KPI & reporting
reports_dir = reports_dir,
kpi_reports_dir = kpi_reports_dir,
kpi_field_stats_dir = kpi_field_stats_dir,
kpi_field_analysis_dir = kpi_field_analysis_dir,
# TIER 7: Support
data_dir = data_dir,
vrt_dir = vrt_dir,
harvest_dir = harvest_dir,
log_dir = log_dir,
# TIER 8: Metadata
field_boundaries_path = field_boundaries_path
))
}
# ==============================================================================
# SECTION 4: DATE/WEEK UTILITY FUNCTIONS
# ==============================================================================
# ISO 8601 week/year functions for consistent date handling across scripts
#' Extract ISO week number from a date
#' @param date Date object or string convertible to Date
#' @return Numeric ISO week number (1-53)
get_iso_week <- function(date) {
as.numeric(format(date, "%V"))
}
#' Extract ISO year from a date
#' @param date Date object or string convertible to Date
#' @return Numeric ISO year
get_iso_year <- function(date) {
as.numeric(format(date, "%G"))
}
#' Extract both ISO week and year as a list
#' @param date Date object or string convertible to Date
#' @return List with elements: week (1-53), year
get_iso_week_year <- function(date) {
list(
week = as.numeric(format(date, "%V")),
year = as.numeric(format(date, "%G"))
)
}
#' Format date as a week/year label
#' @param date Date object or string convertible to Date
#' @param separator Character. Separator between week and year (default "_")
#' @return Character in format "week##_YYYY" (e.g., "week03_2025")
format_week_label <- function(date, separator = "_") {
wwy <- get_iso_week_year(date)
sprintf("week%02d%s%d", wwy$week, separator, wwy$year)
}
# ==============================================================================
# SECTION 5: FIELD BOUNDARY & HARVEST DATA LOADERS
# ==============================================================================
# IMPORTANT: These functions are also defined in 00_common_utils.R
# to avoid duplication. Source 00_common_utils.R AFTER parameters_project.R
# to override these stub definitions with the full implementations.
#' Load Field Boundaries from GeoJSON
#'
#' Loads field polygon geometries from GeoJSON file (pivot.geojson or pivot_2.geojson).
#' Handles CRS validation and column standardization.
#'
#' @param data_dir Directory containing GeoJSON file
#' @return List with elements:
#' - field_boundaries_sf: sf (Simple Features) object
#' - field_boundaries: terra SpatVect object (if conversion successful, else sf fallback)
#'
load_field_boundaries <- function(data_dir) {
field_boundaries_path <- file.path(data_dir, "pivot.geojson")
if (!file.exists(field_boundaries_path)) {
stop("Field boundaries file not found at:", field_boundaries_path)
}
tryCatch({
boundaries_sf <- sf::st_read(field_boundaries_path, quiet = TRUE)
# Filter out features with empty geometries
boundaries_sf <- boundaries_sf[!st_is_empty(boundaries_sf), ]
# Repair geometries if needed
if (!all(sf::st_is_valid(boundaries_sf))) {
boundaries_sf <- sf::st_make_valid(boundaries_sf)
}
# Convert to terra SpatVect
boundaries_spat <- terra::vect(boundaries_sf)
return(list(
field_boundaries_sf = boundaries_sf,
field_boundaries = boundaries_spat
))
}, error = function(e) {
stop("Error loading field boundaries:", e$message)
})
}
#' Create Date List
#'
#' Creates a sequence of dates from end_date going back offset days
#'
#' @param end_date End date (Date object)
#' @param offset Number of days to go back
#' @return Character vector of dates in YYYY-MM-DD format
date_list <- function(end_date, offset) {
start_date <- end_date - offset
date_seq <- seq(start_date, end_date, by = "day")
format(date_seq, "%Y-%m-%d")
}
#' Repair GeoJSON Geometries
#'
#' Validates and repairs invalid geometries in sf object
#'
#' @param sf_object sf object with potentially invalid geometries
#' @return sf object with repaired geometries
repair_geojson_geometries <- function(sf_object) {
if (!all(sf::st_is_valid(sf_object))) {
sf_object <- sf::st_make_valid(sf_object)
}
sf_object
}
#' Load harvesting data from Excel
#'
#' Loads crop harvest schedule from harvest.xlsx file.
#' Handles flexible date formats (numeric, YYYY-MM-DD, DD/MM/YYYY, etc.).
#'
#' @param data_dir Directory containing harvest.xlsx file
#' @return Data frame with columns: field, sub_field, year, season_start,
#' season_end, age (weeks), sub_area, tonnage_ha. Returns NULL if not found.
load_harvesting_data <- function(data_dir) {
harvest_file <- file.path(data_dir, "harvest.xlsx")
if (!file.exists(harvest_file)) {
warning(paste("Harvest data file not found at path:", harvest_file))
return(NULL)
}
# Helper function to parse dates with multiple format detection
parse_flexible_date <- function(x) {
if (is.na(x) || is.null(x)) return(NA_real_)
if (inherits(x, "Date")) return(x)
if (inherits(x, "POSIXct")) return(as.Date(x))
if (is.numeric(x)) {
return(as.Date(x, origin = "1899-12-30"))
}
x_char <- as.character(x)
formats <- c("%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%Y-%m-%d %H:%M:%S")
for (fmt in formats) {
result <- suppressWarnings(as.Date(x_char, format = fmt))
if (!is.na(result)) return(result)
}
return(NA)
}
tryCatch({
harvesting_data <- read_excel(harvest_file) %>%
dplyr::select(c(
"field", "sub_field", "year",
"season_start", "season_end", "age", "sub_area", "tonnage_ha"
)) %>%
mutate(
field = as.character(field),
sub_field = as.character(sub_field),
year = as.numeric(year),
season_start = sapply(season_start, parse_flexible_date),
season_end = sapply(season_end, parse_flexible_date),
season_start = as.Date(season_start, origin = "1970-01-01"),
season_end = as.Date(season_end, origin = "1970-01-01"),
age = as.numeric(age),
sub_area = as.character(sub_area),
tonnage_ha = as.numeric(tonnage_ha)
) %>%
mutate(
season_end = case_when(
season_end > Sys.Date() ~ Sys.Date(),
is.na(season_end) ~ Sys.Date(),
TRUE ~ season_end
),
age = round(as.numeric(season_end - season_start) / 7, 0)
)
return(harvesting_data)
}, error = function(e) {
warning(paste("Error loading harvesting data:", e$message))
return(NULL)
})
}
# ==============================================================================
# SECTION 6: LOGGING SYSTEM SETUP
# ==============================================================================
# Create default logging functions
#' Safe Logging Function
#'
#' Generic logging with [LEVEL] prefix. Works standalone without any framework.
#' Consistent with SmartCane logging standard.
#'
#' @param message The message to log
#' @param level The log level (default: "INFO"). Options: "INFO", "WARNING", "ERROR", "DEBUG"
#' @return NULL (invisible, used for side effects)
#'
safe_log <- function(message, level = "INFO") {
prefix <- sprintf("[%s]", level)
cat(sprintf("%s %s\n", prefix, message))
}
#' SmartCane Debug Logging (Conditional)
#'
#' Logs DEBUG-level messages only if verbose=TRUE or SMARTCANE_DEBUG env var is set.
#'
#' @param message The message to log
#' @param verbose Whether to output regardless of SMARTCANE_DEBUG (default: FALSE)
#' @return NULL (invisible, used for side effects)
#'
smartcane_debug <- function(message, verbose = FALSE) {
if (!verbose && Sys.getenv("SMARTCANE_DEBUG") != "TRUE") {
return(invisible(NULL))
}
safe_log(message, level = "DEBUG")
}
#' SmartCane Warning Logging
#'
#' Logs WARN-level messages. Convenience wrapper around safe_log().
#'
#' @param message The message to log
#' @return NULL (invisible, used for side effects)
#'
smartcane_warn <- function(message) {
safe_log(message, level = "WARN")
}
log_message <- function(message, level = "INFO") {
prefix <- sprintf("[%s]", level)
cat(sprintf("%s %s\n", prefix, message))
}
log_head <- function(data, level = "INFO") {
log_message(paste(capture.output(str(head(data))), collapse = "\n"), level)
}
#' Setup full logging system with file output
#'
#' Creates log directory and returns logging functions that write to both
#' console and log file.
#'
#' @param log_dir Character. Directory for log files
#' @return List with log_file path, log_message function, and log_head function
setup_logging <- function(log_dir) {
dir.create(log_dir, showWarnings = FALSE, recursive = TRUE)
log_file <- file.path(log_dir, sprintf(
"smartcane_%s.log",
format(Sys.time(), "%Y%m%d_%H%M%S")
))
log_message <- function(message, level = "INFO") {
prefix <- sprintf("[%s] [%s]", level, format(Sys.time(), "%Y-%m-%d %H:%M:%S"))
formatted_message <- sprintf("%s %s", prefix, message)
cat(formatted_message, "\n")
cat(formatted_message, "\n", file = log_file, append = TRUE)
}
log_head <- function(list, level = "INFO") {
log_message(paste(capture.output(str(head(list))), collapse = "\n"), level)
}
assign("log_message", log_message, envir = .GlobalEnv)
assign("log_head", log_head, envir = .GlobalEnv)
return(list(
log_file = log_file,
log_message = log_message,
log_head = log_head
))
}
# ==============================================================================
# SECTION 6B: DATA SOURCE DETECTION
# ==============================================================================
#' Detect data source for project
#'
#' Returns the data source directory (always "merged_tif" for consistency)
#'
#' @param project_dir Character. Project name
#' @return Character. "merged_tif"
detect_data_source <- function(project_dir) {
return("merged_tif")
}
#' Detect tile structure from merged TIF directory
#'
#' Checks if tiles exist by looking for:
#' 1. tiling_config.json metadata file (most reliable)
#' 2. Grid subdirectories (5x5, 10x10, etc.)
#' 3. Tile-named files (*_XX.tif pattern)
#'
#' @param merged_final_tif_dir Character. Path to merged TIF directory
#' @param daily_tiles_split_dir Character. Optional path to tiles directory
#' @return List with has_tiles (logical), detected_tiles, total_files, source, grid_size
detect_tile_structure_from_merged_final <- function(merged_final_tif_dir, daily_tiles_split_dir = NULL) {
# PRIORITY 1: Check for tiling_config.json metadata file from script 10
if (!is.null(daily_tiles_split_dir) && dir.exists(daily_tiles_split_dir)) {
config_files <- list.files(daily_tiles_split_dir,
pattern = "tiling_config\\.json$",
recursive = TRUE,
full.names = TRUE)
if (length(config_files) > 0) {
config_file <- config_files[which.max(file.info(config_files)$mtime)]
tryCatch({
config_json <- jsonlite::read_json(config_file)
return(list(
has_tiles = config_json$has_tiles %||% TRUE,
detected_tiles = character(),
total_files = 0,
source = "tiling_config.json",
grid_size = config_json$grid_size %||% "unknown"
))
}, error = function(e) {
warning("Error reading tiling_config.json: ", e$message)
})
}
}
# PRIORITY 2: File-based detection (fallback)
if (!dir.exists(merged_final_tif_dir)) {
return(list(
has_tiles = FALSE,
detected_tiles = character(),
total_files = 0,
source = "directory_not_found"
))
}
# Check for grid-size subdirectories (5x5, 10x10, etc.)
grid_subfolders <- list.dirs(merged_final_tif_dir, full.names = FALSE, recursive = FALSE)
grid_patterns <- grep("^\\d+x\\d+$", grid_subfolders, value = TRUE)
if (length(grid_patterns) > 0) {
grid_size <- grid_patterns[1]
grid_dir <- file.path(merged_final_tif_dir, grid_size)
sample_tiles <- list.files(grid_dir, pattern = "\\.tif$", recursive = TRUE)[1:3]
return(list(
has_tiles = TRUE,
detected_tiles = sample_tiles,
total_files = length(sample_tiles),
source = "grid_subdirectory_detection",
grid_size = grid_size,
grid_path = grid_dir
))
}
# Check for tile-named files (*_XX.tif pattern)
tif_files <- list.files(merged_final_tif_dir, pattern = "\\.tif$", full.names = FALSE)
if (length(tif_files) == 0) {
return(list(
has_tiles = FALSE,
detected_tiles = character(),
total_files = 0,
source = "no_files_found"
))
}
tile_pattern <- "_(\\d{2})\\.tif$"
tile_files <- tif_files[grepl(tile_pattern, tif_files)]
has_tiles <- length(tile_files) > 0
return(list(
has_tiles = has_tiles,
detected_tiles = tile_files,
total_files = length(tif_files),
source = "file_pattern_detection"
))
}
# ==============================================================================
# SECTION 7: MOSAIC & KPI VERIFICATION HELPERS
# ==============================================================================
# Centralized helper functions for run_full_pipeline.R to avoid hardcoding paths
#' Detect mosaic mode from project structure
#'
#' Determines if project uses "tiled" (legacy) or "single-file" (per-field) mosaics
#'
#' @param project_dir Character. Project name
#' @return Character. "tiled" or "single-file"
detect_mosaic_mode <- function(project_dir) {
# Per-field architecture is standard - always return "single-file"
# unless weekly_tile_max directory exists with content
mosaic_tiled_dir <- file.path("laravel_app", "storage", "app", project_dir, "weekly_tile_max")
if (dir.exists(mosaic_tiled_dir) && length(list.files(mosaic_tiled_dir)) > 0) {
return("tiled")
}
return("single-file")
}
#' Detect grid size from tile directory structure
#'
#' For per-field architecture, returns "unknown" (grid-based tiling is legacy)
#'
#' @param project_dir Character. Project name
#' @return Character. Grid size ("unknown" for per-field)
detect_grid_size <- function(project_dir) {
# Per-field architecture doesn't use grid-based organization
return("unknown")
}
#' Get project storage path
#'
#' @param project_dir Character. Project name
#' @param subdir Character. Optional subdirectory (default NULL)
#' @return Character. Full path
get_project_storage_path <- function(project_dir, subdir = NULL) {
path <- file.path("laravel_app", "storage", "app", project_dir)
if (!is.null(subdir)) {
path <- file.path(path, subdir)
}
return(path)
}
#' Get mosaic directory
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
get_mosaic_dir <- function(project_dir, mosaic_mode = "auto") {
if (mosaic_mode == "auto") {
mosaic_mode <- detect_mosaic_mode(project_dir)
}
if (mosaic_mode == "tiled") {
get_project_storage_path(project_dir, "weekly_tile_max")
} else {
get_project_storage_path(project_dir, "weekly_mosaic")
}
}
#' Get KPI directory based on client type
#'
#' @param project_dir Character. Project name
#' @param client_type Character. Client type
#' @return Character. Full path to KPI directory
get_kpi_dir <- function(project_dir, client_type) {
base_path <- get_project_storage_path(project_dir, "reports/kpis")
if (client_type == "agronomic_support") {
return(file.path(base_path, "field_level"))
} else {
return(file.path(base_path, "field_analysis"))
}
}
#' Get expected output path for harvest imminent file
#'
#' @param project_dir Character. Project name
#' @param week_num Integer. ISO week number
#' @param year_num Integer. Year
#' @return Character. Full path to expected harvest imminent CSV file
get_harvest_output_path <- function(project_dir, week_num, year_num) {
file.path(
"laravel_app", "storage", "app", project_dir, "reports", "kpis", "field_stats",
sprintf("%s_harvest_imminent_week_%02d_%d.csv", project_dir, week_num, year_num)
)
}
#' Check if harvest output file exists for a specific week
#'
#' @param project_dir Character. Project name
#' @param week_num Integer. ISO week number
#' @param year_num Integer. Year
#' @return Logical. TRUE if file exists
check_harvest_output_exists <- function(project_dir, week_num, year_num) {
path <- get_harvest_output_path(project_dir, week_num, year_num)
file.exists(path)
}
#' Get mosaic verification directory
#'
#' @param project_dir Character. Project name
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return Character. Full path to mosaic directory
get_mosaic_verification_dir <- function(project_dir, mosaic_mode) {
base <- file.path("laravel_app", "storage", "app", project_dir)
if (mosaic_mode == "tiled") {
file.path(base, "weekly_tile_max")
} else {
file.path(base, "weekly_mosaic")
}
}
#' Check if mosaic files exist for a specific week
#'
#' @param project_dir Character. Project name
#' @param week_num Integer. ISO week number
#' @param year_num Integer. Year
#' @param mosaic_mode Character. "tiled" or "single-file"
#' @return List with created (logical), file_count (int), sample_files (char vector)
check_mosaic_exists <- function(project_dir, week_num, year_num, mosaic_mode) {
mosaic_dir <- get_mosaic_verification_dir(project_dir, mosaic_mode)
if (!dir.exists(mosaic_dir)) {
return(list(created = FALSE, file_count = 0, sample_files = character()))
}
week_pattern <- sprintf("week_%02d_%d\\.tif", week_num, year_num)
mosaic_files <- list.files(mosaic_dir, pattern = week_pattern, recursive = TRUE, full.names = FALSE)
list(
created = length(mosaic_files) > 0,
file_count = length(mosaic_files),
sample_files = head(mosaic_files, 3)
)
}
#' Check KPI completeness for reporting window
#'
#' @param project_dir Character. Project name
#' @param client_type Character. Client type
#' @param end_date Date. End date of reporting window
#' @param reporting_weeks Integer. Number of weeks to check
#' @return List with kpi_dir, kpis_df, missing_count
check_kpi_completeness <- function(project_dir, client_type, end_date, reporting_weeks = 1) {
kpi_dir <- get_kpi_dir(project_dir, client_type)
if (!dir.exists(kpi_dir)) {
dir.create(kpi_dir, showWarnings = FALSE, recursive = TRUE)
}
kpis_df <- data.frame()
missing_count <- 0
for (weeks_back in 0:(reporting_weeks - 1)) {
target_week <- end_date - lubridate::weeks(weeks_back)
wwy <- get_iso_week_year(target_week)
# Check for KPI file for this week
kpi_pattern <- sprintf("week_%02d_%d", wwy$week, wwy$year)
kpi_files <- list.files(kpi_dir, pattern = kpi_pattern)
has_kpis <- length(kpi_files) > 0
file_count <- length(kpi_files)
if (!has_kpis) missing_count <- missing_count + 1
kpis_df <- rbind(kpis_df, data.frame(
week = wwy$week,
year = wwy$year,
date = target_week,
has_kpis = has_kpis,
file_count = file_count
))
}
return(list(
kpi_dir = kpi_dir,
kpis_df = kpis_df,
missing_count = missing_count
))
}
# ==============================================================================
# SECTION 8: PROJECT INITIALIZATION
# ==============================================================================
#' Initialize the project
#'
#' Sets up directory structure, logging, and loads configuration
#'
#' @param project_dir Character. Project name
#' @param data_source Character. "merged_tif" or "merged_tif_8b"
#' @return List with all project directories and settings
initialize_project <- function(project_dir, data_source = "merged_tif") {
dirs <- setup_project_directories(project_dir, data_source = data_source)
logging <- setup_logging(dirs$log_dir)
return(list(
dirs = dirs,
logging = logging,
project_dir = project_dir,
client_type = get_client_type(project_dir)
))
}
# ==============================================================================
# AUTO-INITIALIZATION: Set project_dir global if not already set
# ==============================================================================
if (exists("project_dir")) {
assign("project_dir", project_dir, envir = .GlobalEnv)
} else {
warning("project_dir variable not found. Please set project_dir before sourcing parameters_project.R")
}
# ==============================================================================
# END PARAMETERS_PROJECT_2.R
# ==============================================================================