# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\growth_model_utils.R # # GROWTH_MODEL_UTILS.R # =================== # Utility functions for growth model interpolation and manipulation. # These functions support the creation of continuous growth models from point measurements. #' Safe logging function that works whether log_message exists or not #' #' @param message The message to log #' @param level The log level (default: "INFO") #' @return NULL (used for side effects) #' safe_log <- function(message, level = "INFO") { if (exists("log_message")) { log_message(message, level) } else { if (level %in% c("ERROR", "WARNING")) { warning(message) } else { message(message) } } } #' Load and prepare the combined CI data #' #' @param data_dir Directory containing the combined CI data #' @return Long-format dataframe with CI values by date #' load_combined_ci_data <- function(data_dir) { file_path <- here::here(data_dir, "combined_CI_data.rds") if (!file.exists(file_path)) { stop(paste("Combined CI data file not found:", file_path)) } safe_log(paste("Loading CI data from:", file_path)) # Load and transform the data to long format pivot_stats <- readRDS(file_path) %>% dplyr::ungroup() %>% dplyr::group_by(field, sub_field) %>% dplyr::summarise(dplyr::across(everything(), ~ first(stats::na.omit(.))), .groups = "drop") pivot_stats_long <- pivot_stats %>% tidyr::pivot_longer(cols = -c(field, sub_field), names_to = "Date", values_to = "value") %>% dplyr::mutate( Date = lubridate::ymd(Date), value = as.numeric(value) ) %>% tidyr::drop_na(c("value", "Date")) %>% dplyr::filter(!is.na(sub_field), !is.na(field)) %>% # Filter out NA field names dplyr::filter(!is.infinite(value)) %>% dplyr::distinct() safe_log(paste("Loaded", nrow(pivot_stats_long), "CI data points")) return(pivot_stats_long) } #' Extract and interpolate CI data for a specific field and season #' #' @param field_name Name of the field or sub-field #' @param harvesting_data Dataframe with harvesting information #' @param field_CI_data Dataframe with CI measurements #' @param season Year of the growing season #' @return Dataframe with interpolated daily CI values #' extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season) { # Filter harvesting data for the given season and field name filtered_harvesting_data <- harvesting_data %>% dplyr::filter(year == season, sub_field == field_name) if (nrow(filtered_harvesting_data) == 0) { safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } # Filter field CI data for the given field name filtered_field_CI_data <- field_CI_data %>% dplyr::filter(sub_field == field_name) # Return an empty data frame if no CI data is found if (nrow(filtered_field_CI_data) == 0) { safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } # Log season dates season_start <- filtered_harvesting_data$season_start[1] season_end <- filtered_harvesting_data$season_end[1] ci_date_range <- paste(format(min(filtered_field_CI_data$Date), "%Y-%m-%d"), "to", format(max(filtered_field_CI_data$Date), "%Y-%m-%d")) # Create a linear interpolation function for the CI data tryCatch({ ApproxFun <- stats::approxfun(x = filtered_field_CI_data$Date, y = filtered_field_CI_data$value) Dates <- seq.Date(min(filtered_field_CI_data$Date), max(filtered_field_CI_data$Date), by = 1) LinearFit <- ApproxFun(Dates) # Combine interpolated data with the original CI data CI <- data.frame(Date = Dates, FitData = LinearFit) %>% dplyr::left_join(filtered_field_CI_data, by = "Date") %>% dplyr::filter(Date > filtered_harvesting_data$season_start & Date < filtered_harvesting_data$season_end) # If CI is empty after filtering, return an empty dataframe if (nrow(CI) == 0) { safe_log(paste0("No CI data within season dates for field: ", field_name, " (Season: ", season, ", dates: ", format(season_start, "%Y-%m-%d"), " to ", format(season_end, "%Y-%m-%d"), "). Available CI data range: ", ci_date_range), "WARNING") return(data.frame()) } # Add additional columns CI <- CI %>% dplyr::mutate( DOY = seq(1, n(), 1), model = paste0("Data", season, " : ", field_name), season = season, subField = field_name ) # Log successful interpolation safe_log(paste0("Successfully interpolated CI data for field: ", field_name, " (Season: ", season, ", dates: ", format(season_start, "%Y-%m-%d"), " to ", format(season_end, "%Y-%m-%d"), "). ", nrow(CI), " data points created.")) return(CI) }, error = function(e) { safe_log(paste0("Error interpolating CI data for field ", field_name, " in season ", season, " (", format(season_start, "%Y-%m-%d"), " to ", format(season_end, "%Y-%m-%d"), "): ", e$message), "ERROR") return(data.frame()) }) } #' Generate interpolated CI data for all fields and seasons #' #' @param years Vector of years to process #' @param harvesting_data Dataframe with harvesting information #' @param ci_data Long-format dataframe with CI measurements #' @return Dataframe with interpolated daily CI values for all fields/seasons #' generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) { safe_log("Starting CI data interpolation for all fields") # Process each year result <- purrr::map_df(years, function(yr) { safe_log(paste("Processing year:", yr)) # Get the fields harvested in this year with valid season start dates sub_fields <- harvesting_data %>% dplyr::filter(year == yr, !is.na(season_start)) %>% dplyr::pull(sub_field) if (length(sub_fields) == 0) { safe_log(paste("No fields with valid season data for year:", yr), "WARNING") return(data.frame()) } # Filter sub_fields to only include those with value data in ci_data valid_sub_fields <- sub_fields %>% purrr::keep(~ any(ci_data$sub_field == .x)) if (length(valid_sub_fields) == 0) { safe_log(paste("No fields with CI data for year:", yr), "WARNING") return(data.frame()) } # Extract and interpolate data for each valid field safe_log(paste("Processing", length(valid_sub_fields), "fields for year:", yr)) result <- purrr::map(valid_sub_fields, ~ extract_CI_data(.x, harvesting_data = harvesting_data, field_CI_data = ci_data, season = yr)) %>% purrr::list_rbind() safe_log(paste("Generated", nrow(result), "interpolated data points for year:", yr)) return(result) }) safe_log(paste("Total interpolated data points:", nrow(result))) return(result) } #' Calculate growth metrics for interpolated CI data #' #' @param interpolated_data Dataframe with interpolated CI values #' @return Dataframe with added growth metrics (CI_per_day and cumulative_CI) #' calculate_growth_metrics <- function(interpolated_data) { if (nrow(interpolated_data) == 0) { safe_log("No data provided to calculate growth metrics", "WARNING") return(interpolated_data) } result <- interpolated_data %>% dplyr::group_by(model) %>% dplyr::mutate( CI_per_day = FitData - dplyr::lag(FitData), cumulative_CI = cumsum(FitData) ) return(result) } #' Save interpolated growth model data #' #' @param data Dataframe with interpolated growth data #' @param output_dir Directory to save the output #' @param file_name Filename for the output (default: "All_pivots_Cumulative_CI_quadrant_year_v2.rds") #' @return Path to the saved file #' save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") { # Create output directory if it doesn't exist dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) # Create full file path file_path <- here::here(output_dir, file_name) # Save the data saveRDS(data, file_path) safe_log(paste("Interpolated CI data saved to:", file_path)) return(file_path) }