SmartCane/r_app/growth_model_utils.R
2026-01-06 14:17:37 +01:00

236 lines
8.6 KiB
R

# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\growth_model_utils.R
#
# GROWTH_MODEL_UTILS.R
# ===================
# Utility functions for growth model interpolation and manipulation.
# These functions support the creation of continuous growth models from point measurements.
#' Safe logging function that works whether log_message exists or not
#'
#' @param message The message to log
#' @param level The log level (default: "INFO")
#' @return NULL (used for side effects)
#'
safe_log <- function(message, level = "INFO") {
if (exists("log_message")) {
log_message(message, level)
} else {
if (level %in% c("ERROR", "WARNING")) {
warning(message)
} else {
message(message)
}
}
}
#' Load and prepare the combined CI data
#'
#' @param data_dir Directory containing the combined CI data
#' @return Long-format dataframe with CI values by date
#'
load_combined_ci_data <- function(data_dir) {
file_path <- here::here(data_dir, "combined_CI_data.rds")
if (!file.exists(file_path)) {
stop(paste("Combined CI data file not found:", file_path))
}
safe_log(paste("Loading CI data from:", file_path))
# Load and transform the data to long format
pivot_stats <- readRDS(file_path) %>%
dplyr::ungroup() %>%
dplyr::group_by(field, sub_field) %>%
dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop")
pivot_stats_long <- pivot_stats %>%
tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>%
dplyr::mutate(
Date = lubridate::ymd(Date),
value = as.numeric(value)
) %>%
tidyr::drop_na(c("value", "Date")) %>%
dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names
dplyr::filter(!is.infinite(value)) %>%
dplyr::distinct()
safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points"))
return(pivot_stats_long)
}
#' Extract and interpolate CI data for a specific field and season
#'
#' @param field_name Name of the field or sub-field
#' @param harvesting_data Dataframe with harvesting information
#' @param field_CI_data Dataframe with CI measurements
#' @param season Year of the growing season
#' @return Dataframe with interpolated daily CI values
#'
extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) {
# Filter harvesting data for the given season and field name
filtered_harvesting_data <- harvesting_data %>%
dplyr::filter(year == season, sub_field == field_name)
if (nrow(filtered_harvesting_data) == 0) {
safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING")
return(data.frame())
}
# Filter field CI data for the given field name
filtered_field_CI_data <- field_CI_data %>%
dplyr::filter(sub_field == field_name)
# Return an empty data frame if no CI data is found
if (nrow(filtered_field_CI_data) == 0) {
safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING")
return(data.frame())
}
# Log season dates
season_start <- filtered_harvesting_data$season_start[1]
season_end <- filtered_harvesting_data$season_end[1]
ci_date_range <- paste(format(min(filtered_field_CI_data$Date), "%Y-%m-%d"),
"to",
format(max(filtered_field_CI_data$Date), "%Y-%m-%d"))
# Create a linear interpolation function for the CI data
tryCatch({
ApproxFun <- stats::approxfun(x = filtered_field_CI_data$Date, y = filtered_field_CI_data$value)
Dates <- seq.Date(min(filtered_field_CI_data$Date), max(filtered_field_CI_data$Date), by = 1)
LinearFit <- ApproxFun(Dates)
# Combine interpolated data with the original CI data
CI <- data.frame(Date = Dates, FitData = LinearFit) %>%
dplyr::left_join(filtered_field_CI_data, by = "Date") %>%
dplyr::filter(Date > filtered_harvesting_data$season_start & Date < filtered_harvesting_data$season_end)
# If CI is empty after filtering, return an empty dataframe
if (nrow(CI) == 0) {
safe_log(paste0("No CI data within season dates for field: ", field_name,
" (Season: ", season, ", dates: ",
format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"). Available CI data range: ", ci_date_range),
"WARNING")
return(data.frame())
}
# Add additional columns
CI <- CI %>%
dplyr::mutate(
DOY = seq(1, n(), 1),
model = paste0("Data", season, " : ", field_name),
season = season,
subField = field_name
)
# Log successful interpolation
safe_log(paste0("Successfully interpolated CI data for field: ", field_name,
" (Season: ", season, ", dates: ",
format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"). ", nrow(CI), " data points created."))
return(CI)
}, error = function(e) {
safe_log(paste0("Error interpolating CI data for field ", field_name,
" in season ", season,
" (", format(season_start, "%Y-%m-%d"), " to ",
format(season_end, "%Y-%m-%d"),
"): ", e$message), "ERROR")
return(data.frame())
})
}
#' Generate interpolated CI data for all fields and seasons
#'
#' @param years Vector of years to process
#' @param harvesting_data Dataframe with harvesting information
#' @param ci_data Long-format dataframe with CI measurements
#' @return Dataframe with interpolated daily CI values for all fields/seasons
#'
generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) {
safe_log("Starting CI data interpolation for all fields")
# Process each year
result <- purrr::map_df(years, function(yr) {
safe_log(paste("Processing year:", yr))
# Get the fields harvested in this year with valid season start dates
sub_fields <- harvesting_data %>%
dplyr::filter(year == yr, !is.na(season_start)) %>%
dplyr::pull(sub_field)
if (length(sub_fields) == 0) {
safe_log(paste("No fields with valid season data for year:", yr), "WARNING")
return(data.frame())
}
# Filter sub_fields to only include those with value data in ci_data
valid_sub_fields <- sub_fields %>%
purrr::keep(~ any(ci_data$sub_field == .x))
if (length(valid_sub_fields) == 0) {
safe_log(paste("No fields with CI data for year:", yr), "WARNING")
return(data.frame())
}
# Extract and interpolate data for each valid field
safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr))
result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x,
harvesting_data = harvesting_data,
field_CI_data = ci_data,
season = yr)) %>%
purrr::list_rbind()
safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr))
return(result)
})
safe_log(paste("Total interpolated data points:", nrow(result)))
return(result)
}
#' Calculate growth metrics for interpolated CI data
#'
#' @param interpolated_data Dataframe with interpolated CI values
#' @return Dataframe with added growth metrics (CI_per_day and cumulative_CI)
#'
calculate_growth_metrics <- function(interpolated_data) {
if (nrow(interpolated_data) == 0) {
safe_log("No data provided to calculate growth metrics", "WARNING")
return(interpolated_data)
}
result <- interpolated_data %>%
dplyr::group_by(model) %>%
dplyr::mutate(
CI_per_day = FitData - dplyr::lag(FitData),
cumulative_CI = cumsum(FitData)
)
return(result)
}
#' Save interpolated growth model data
#'
#' @param data Dataframe with interpolated growth data
#' @param output_dir Directory to save the output
#' @param file_name Filename for the output (default: "All_pivots_Cumulative_CI_quadrant_year_v2.rds")
#' @return Path to the saved file
#'
save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") {
# Create output directory if it doesn't exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Create full file path
file_path <- here::here(output_dir, file_name)
# Save the data
saveRDS(data, file_path)
safe_log(paste("Interpolated CI data saved to:", file_path))
return(file_path)
}