# filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\growth_model_utils.R # # GROWTH_MODEL_UTILS.R # =================== # Utility functions for growth model interpolation and manipulation. # These functions support the creation of continuous growth models from point measurements. #' Load and prepare the combined CI data (Per-Field Architecture) #' #' @param daily_vals_dir Directory containing per-field daily RDS files (Data/extracted_ci/daily_vals) #' @return Long-format dataframe with CI values by date and field #' load_combined_ci_data <- function(daily_vals_dir) { # For per-field architecture: daily_vals_dir = Data/extracted_ci/daily_vals # Structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds if (!dir.exists(daily_vals_dir)) { stop(paste("Daily values directory not found:", daily_vals_dir)) } safe_log(paste("Loading per-field CI data from:", daily_vals_dir)) # Find all daily RDS files recursively (per-field structure) # IMPORTANT: Only load files matching the per-field format YYYY-MM-DD.rds in field subdirectories all_daily_files <- list.files( path = daily_vals_dir, pattern = "^\\d{4}-\\d{2}-\\d{2}\\.rds$", # Only YYYY-MM-DD.rds format full.names = TRUE, recursive = TRUE ) # Further filter: only keep files that are in a subdirectory (per-field structure) # Exclude legacy files at the root level like "extracted_2024-02-29_whole_field.rds" all_daily_files <- all_daily_files[basename(dirname(all_daily_files)) != "daily_vals"] if (length(all_daily_files) == 0) { stop(paste("No per-field daily RDS files found in:", daily_vals_dir)) } safe_log(sprintf("Found %d per-field daily RDS files to load (filtered from legacy format)", length(all_daily_files))) # Rebuild with explicit date and field tracking # File structure: daily_vals/{FIELD_NAME}/{YYYY-MM-DD}.rds combined_long <- data.frame() for (file in all_daily_files) { tryCatch({ # Extract date from filename: {YYYY-MM-DD}.rds filename <- basename(file) date_str <- tools::file_path_sans_ext(filename) # Parse date - handle various formats parsed_date <- NA if (nchar(date_str) == 10 && grepl("^\\d{4}-\\d{2}-\\d{2}$", date_str)) { parsed_date <- as.Date(date_str, format = "%Y-%m-%d") } else { safe_log(sprintf("Warning: Could not parse date from filename: %s", filename), "WARNING") next } if (is.na(parsed_date)) { safe_log(sprintf("Warning: Invalid date parsed from: %s", filename), "WARNING") next } # Read RDS file rds_data <- tryCatch({ readRDS(file) }, error = function(e) { safe_log(sprintf("Error reading RDS file %s: %s", file, e$message), "WARNING") return(NULL) }) if (is.null(rds_data) || nrow(rds_data) == 0) { next } # Add date column to the data rds_data <- rds_data %>% dplyr::mutate(Date = parsed_date) combined_long <- rbind(combined_long, rds_data) }, error = function(e) { safe_log(sprintf("Error processing file %s: %s", file, e$message), "WARNING") }) } if (nrow(combined_long) == 0) { safe_log("Warning: No valid CI data loaded from daily files", "WARNING") return(data.frame()) } # Reshape to long format using ci_mean as the main CI value # Only keep rows where ci_mean has valid data pivot_stats_long <- combined_long %>% dplyr::select(field, sub_field, ci_mean, Date) %>% dplyr::rename(value = ci_mean) %>% dplyr::mutate(value = as.numeric(value)) %>% # Keep rows even if ci_mean is NA or 0 (might be valid), but drop if Date is missing tidyr::drop_na(Date) %>% dplyr::filter(!is.na(sub_field), !is.na(field)) %>% dplyr::filter(!is.infinite(value)) %>% dplyr::distinct() safe_log(sprintf("Loaded %d CI data points from %d daily files", nrow(pivot_stats_long), length(all_daily_files))) return(pivot_stats_long) } #' Extract and interpolate CI data for a specific field and season #' #' @param field_name Name of the field or sub-field #' @param harvesting_data Dataframe with harvesting information #' @param field_CI_data Dataframe with CI measurements #' @param season Year of the growing season #' @param verbose Logical: whether to log warnings/info (default TRUE). Set to FALSE during progress bar iteration. #' @return Dataframe with interpolated daily CI values #' extract_CI_data <- function(field_name, harvesting_data, field_CI_data, season, verbose = TRUE) { # Filter harvesting data for the given season and field name filtered_harvesting_data <- harvesting_data %>% dplyr::filter(year == season, sub_field == field_name) if (nrow(filtered_harvesting_data) == 0) { if (verbose) safe_log(paste("No harvesting data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } # Filter field CI data for the given field name filtered_field_CI_data <- field_CI_data %>% dplyr::filter(sub_field == field_name) # Return an empty data frame if no CI data is found if (nrow(filtered_field_CI_data) == 0) { if (verbose) safe_log(paste("No CI data found for field:", field_name, "in season:", season), "WARNING") return(data.frame()) } # Log season dates season_start <- filtered_harvesting_data$season_start[1] season_end <- filtered_harvesting_data$season_end[1] ci_date_range <- paste(format(min(filtered_field_CI_data$Date), "%Y-%m-%d"), "to", format(max(filtered_field_CI_data$Date), "%Y-%m-%d")) # Create a linear interpolation function for the CI data tryCatch({ ApproxFun <- stats::approxfun(x = filtered_field_CI_data$Date, y = filtered_field_CI_data$value) Dates <- seq.Date(min(filtered_field_CI_data$Date), max(filtered_field_CI_data$Date), by = 1) LinearFit <- ApproxFun(Dates) # Combine interpolated data with the original CI data CI <- data.frame(Date = Dates, FitData = LinearFit) %>% dplyr::left_join(filtered_field_CI_data, by = "Date") %>% dplyr::filter(Date > filtered_harvesting_data$season_start & Date < filtered_harvesting_data$season_end) # If CI is empty after filtering, return an empty dataframe if (nrow(CI) == 0) { if (verbose) { safe_log(paste0("No CI data within season dates for field: ", field_name, " (Season: ", season, ", dates: ", format(season_start, "%Y-%m-%d"), " to ", format(season_end, "%Y-%m-%d"), "). Available CI data range: ", ci_date_range), "WARNING") } return(data.frame()) } # Add additional columns CI <- CI %>% dplyr::mutate( DOY = seq(1, n(), 1), model = paste0("Data", season, " : ", field_name), season = season, subField = field_name ) # Return data with success status return(CI) }, error = function(e) { # Return empty dataframe on error (will be tracked separately) if (verbose) { safe_log(paste0("Error interpolating CI data for field ", field_name, " in season ", season, " (", format(season_start, "%Y-%m-%d"), " to ", format(season_end, "%Y-%m-%d"), "): ", e$message), "ERROR") } return(data.frame()) }) } #' Generate interpolated CI data for all fields and seasons #' #' @param years Vector of years to process #' @param harvesting_data Dataframe with harvesting information #' @param ci_data Long-format dataframe with CI measurements #' @return Dataframe with interpolated daily CI values for all fields/seasons #' generate_interpolated_ci_data <- function(years, harvesting_data, ci_data) { safe_log("Starting CI data interpolation for all fields") # Track failed fields for end-of-run summary failed_fields <- list() total_fields <- 0 successful_fields <- 0 # Process each year result <- purrr::map_df(years, function(yr) { # Get the fields harvested in this year with valid season start dates sub_fields <- harvesting_data %>% dplyr::filter(year == yr, !is.na(season_start)) %>% dplyr::pull(sub_field) if (length(sub_fields) == 0) { return(data.frame()) } # Filter sub_fields to only include those with value data in ci_data valid_sub_fields <- sub_fields %>% purrr::keep(~ any(ci_data$sub_field == .x)) if (length(valid_sub_fields) == 0) { return(data.frame()) } # Initialize progress bar for this year total_fields <<- total_fields + length(valid_sub_fields) pb <- txtProgressBar(min = 0, max = length(valid_sub_fields), style = 3, width = 50) counter <- 0 # Extract and interpolate data for each valid field with progress bar result_list <- list() for (field in valid_sub_fields) { counter <- counter + 1 setTxtProgressBar(pb, counter) # Call with verbose=FALSE to suppress warnings during progress bar iteration field_result <- extract_CI_data(field, harvesting_data = harvesting_data, field_CI_data = ci_data, season = yr, verbose = FALSE) if (nrow(field_result) > 0) { successful_fields <<- successful_fields + 1 result_list[[field]] <- field_result } else { # Track failed field failed_fields[[length(failed_fields) + 1]] <<- list( field = field, season = yr, reason = "Unable to generate interpolated data" ) } } close(pb) cat("\n") # Newline after progress bar # Combine all results for this year if (length(result_list) > 0) { purrr::list_rbind(result_list) } else { data.frame() } }) # Print summary safe_log(sprintf("\n=== Interpolation Summary ===")) safe_log(sprintf("Successfully interpolated: %d/%d fields", successful_fields, total_fields)) if (length(failed_fields) > 0) { safe_log(sprintf("Failed to interpolate: %d fields", length(failed_fields))) for (failure in failed_fields) { safe_log(sprintf(" - Field %s (Season %d): %s", failure$field, failure$season, failure$reason), "WARNING") } } safe_log(sprintf("Total interpolated data points: %d", nrow(result))) return(result) } #' Calculate growth metrics for interpolated CI data #' #' @param interpolated_data Dataframe with interpolated CI values #' @return Dataframe with added growth metrics (CI_per_day and cumulative_CI) #' calculate_growth_metrics <- function(interpolated_data) { if (nrow(interpolated_data) == 0) { safe_log("No data provided to calculate growth metrics", "WARNING") return(interpolated_data) } result <- interpolated_data %>% dplyr::group_by(model) %>% dplyr::mutate( CI_per_day = FitData - dplyr::lag(FitData), cumulative_CI = cumsum(FitData) ) return(result) } #' Save interpolated growth model data #' #' @param data Dataframe with interpolated growth data #' @param output_dir Directory to save the output #' @param file_name Filename for the output (default: "All_pivots_Cumulative_CI_quadrant_year_v2.rds") #' @return Path to the saved file #' save_growth_model <- function(data, output_dir, file_name = "All_pivots_Cumulative_CI_quadrant_year_v2.rds") { # Validate input if (is.null(output_dir) || !is.character(output_dir) || length(output_dir) == 0) { stop("output_dir must be a non-empty character string") } # Normalize path separators for Windows compatibility output_dir <- normalizePath(output_dir, winslash = "/", mustWork = FALSE) # Create output directory if it doesn't exist dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) # Create full file path using file.path (more robust than here::here for absolute paths) file_path <- file.path(output_dir, file_name) # Save the data saveRDS(data, file_path) safe_log(paste("Interpolated CI data saved to:", file_path)) return(file_path) }