- Updated all CI maps to use tm_scale_continuous() for proper tmap v4 compatibility - Added fixed color scale limits (1-8 for CI, -3 to +3 for differences) for consistent field comparison - Fixed YAML header formatting issues in CI_report_dashboard_planet.Rmd - Positioned RGB map before CI overview map as requested - Removed all obsolete use_breaks parameter references - Enhanced error handling and logging throughout the pipeline - Added new experimental analysis scripts and improvements to mosaic creation
422 lines
15 KiB
R
422 lines
15 KiB
R
# Optimal CI Analysis - Day 30 CI values with quadratic fitting
|
|
# Author: SmartCane Analysis Team
|
|
# Date: 2025-06-12
|
|
|
|
# Load required libraries
|
|
library(ggplot2)
|
|
library(dplyr)
|
|
library(readr)
|
|
|
|
# Set file path
|
|
rds_file_path <- "C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane/laravel_app/storage/app/chemba/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
|
|
|
# Check if file exists
|
|
if (!file.exists(rds_file_path)) {
|
|
stop("RDS file not found at specified path: ", rds_file_path)
|
|
}
|
|
|
|
# Load the data
|
|
cat("Loading RDS file...\n")
|
|
ci_data <- readRDS(rds_file_path)
|
|
|
|
# Display structure of the data to understand it better
|
|
cat("Data structure:\n")
|
|
str(ci_data)
|
|
cat("\nFirst few rows:\n")
|
|
head(ci_data)
|
|
cat("\nColumn names:\n")
|
|
print(colnames(ci_data))
|
|
|
|
# Filter data based on requirements
|
|
cat("\nApplying data filters...\n")
|
|
|
|
# 1. Filter out models that don't reach at least DOY 300
|
|
model_doy_max <- ci_data %>%
|
|
group_by(model) %>%
|
|
summarise(max_doy = max(DOY, na.rm = TRUE), .groups = 'drop')
|
|
|
|
valid_models <- model_doy_max %>%
|
|
filter(max_doy >= 300) %>%
|
|
pull(model)
|
|
|
|
cat(paste("Models before filtering:", n_distinct(ci_data$model), "\n"))
|
|
cat(paste("Models reaching at least DOY 300:", length(valid_models), "\n"))
|
|
|
|
ci_data <- ci_data %>%
|
|
filter(model %in% valid_models)
|
|
|
|
# 2. Apply IQR filtering per DOY (remove outliers outside IQR)
|
|
cat("Applying IQR filtering per DOY...\n")
|
|
original_rows <- nrow(ci_data)
|
|
|
|
ci_data <- ci_data %>%
|
|
group_by(DOY) %>%
|
|
mutate(
|
|
Q1 = quantile(FitData, 0.25, na.rm = TRUE),
|
|
Q3 = quantile(FitData, 0.75, na.rm = TRUE),
|
|
IQR = Q3 - Q1,
|
|
lower_bound = Q1 - 1.5 * IQR,
|
|
upper_bound = Q3 + 1.5 * IQR
|
|
) %>%
|
|
filter(FitData >= lower_bound & FitData <= upper_bound) %>%
|
|
select(-Q1, -Q3, -IQR, -lower_bound, -upper_bound) %>%
|
|
ungroup()
|
|
|
|
filtered_rows <- nrow(ci_data)
|
|
cat(paste("Rows before IQR filtering:", original_rows, "\n"))
|
|
cat(paste("Rows after IQR filtering:", filtered_rows, "\n"))
|
|
cat(paste("Removed", original_rows - filtered_rows, "outliers (",
|
|
round(100 * (original_rows - filtered_rows) / original_rows, 1), "%)\n"))
|
|
|
|
# Check what day values are available after filtering
|
|
if ("DOY" %in% colnames(ci_data)) {
|
|
cat("\nUnique DOY values after filtering:\n")
|
|
print(sort(unique(ci_data$DOY)))
|
|
} else {
|
|
cat("\nNo 'DOY' column found. Available columns:\n")
|
|
print(colnames(ci_data))
|
|
}
|
|
|
|
# Extract CI values at day 30 for each field
|
|
cat("\nExtracting day 30 CI values...\n")
|
|
|
|
# Set column names based on known structure
|
|
day_col <- "DOY"
|
|
ci_col <- "FitData"
|
|
field_col <- "model"
|
|
|
|
# Try different possible field column name combinations
|
|
if ("field" %in% colnames(ci_data)) {
|
|
field_col <- "field"
|
|
} else if ("Field" %in% colnames(ci_data)) {
|
|
field_col <- "Field"
|
|
} else if ("pivot" %in% colnames(ci_data)) {
|
|
field_col <- "pivot"
|
|
} else if ("Pivot" %in% colnames(ci_data)) {
|
|
field_col <- "Pivot"
|
|
} else if ("field_id" %in% colnames(ci_data)) {
|
|
field_col <- "field_id"
|
|
} else if ("Field_ID" %in% colnames(ci_data)) {
|
|
field_col <- "Field_ID"
|
|
}
|
|
|
|
# Check if we found the required columns
|
|
if (!("DOY" %in% colnames(ci_data))) {
|
|
stop("DOY column not found in data")
|
|
}
|
|
if (!("FitData" %in% colnames(ci_data))) {
|
|
stop("FitData column not found in data")
|
|
}
|
|
if (is.null(field_col)) {
|
|
cat("Could not automatically identify field column. Please check column names.\n")
|
|
cat("Available columns: ", paste(colnames(ci_data), collapse = ", "), "\n")
|
|
stop("Manual field column identification required")
|
|
}
|
|
|
|
cat(paste("Using columns - DOY:", day_col, "Field:", field_col, "FitData:", ci_col, "\n"))
|
|
|
|
# Extract day 30 data
|
|
day_30_data <- ci_data %>%
|
|
filter(DOY %% 30 == 0) %>%
|
|
select(field = !!sym(field_col), ci = !!sym(ci_col), DOY) %>%
|
|
na.omit() %>%
|
|
arrange(field)
|
|
|
|
cat(paste("Found", nrow(day_30_data), "fields with day 30 CI values\n"))
|
|
|
|
if (nrow(day_30_data) == 0) {
|
|
stop("No data found for day 30. Check if day 30 exists in the dataset.")
|
|
}
|
|
|
|
# Display summary of day 30 data
|
|
cat("\nSummary of day 30 CI values:\n")
|
|
print(summary(day_30_data))
|
|
|
|
# Add field index for plotting (assuming fields represent some spatial or sequential order)
|
|
#day_30_data$field_index <- 1:nrow(day_30_data)
|
|
|
|
# Create scatter plot
|
|
p1 <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
|
|
geom_point(color = "blue", size = 3, alpha = 0.7) +
|
|
labs(
|
|
title = "CI Values at Day 30 for All Fields",
|
|
x = "Day of Year (DOY)",
|
|
y = "CI Value",
|
|
subtitle = paste("Total fields:", nrow(day_30_data))
|
|
) +
|
|
theme_minimal() +
|
|
theme(
|
|
plot.title = element_text(size = 14, face = "bold"),
|
|
axis.title = element_text(size = 12),
|
|
axis.text = element_text(size = 10)
|
|
)
|
|
|
|
print(p1)
|
|
|
|
# Try multiple curve fitting approaches
|
|
cat("\nTrying different curve fitting approaches...\n")
|
|
|
|
# Aggregate data by DOY (take mean CI for each DOY to reduce noise)
|
|
aggregated_data <- day_30_data %>%
|
|
group_by(DOY) %>%
|
|
summarise(
|
|
mean_ci = mean(ci, na.rm = TRUE),
|
|
median_ci = median(ci, na.rm = TRUE),
|
|
count = n(),
|
|
.groups = 'drop'
|
|
) %>%
|
|
filter(count >= 5) # Only keep DOY values with sufficient data points
|
|
|
|
cat(paste("Aggregated to", nrow(aggregated_data), "DOY points with sufficient data\n"))
|
|
|
|
# Create prediction data for smooth curves
|
|
x_smooth <- seq(min(aggregated_data$DOY), max(aggregated_data$DOY), length.out = 100)
|
|
|
|
# 1. Quadratic model (as requested)
|
|
cat("\n1. Fitting quadratic model...\n")
|
|
quad_model <- lm(mean_ci ~ poly(DOY, 2, raw = TRUE), data = aggregated_data)
|
|
quad_pred <- predict(quad_model, newdata = data.frame(DOY = x_smooth))
|
|
quad_r2 <- summary(quad_model)$r.squared
|
|
cat(paste("Quadratic R² =", round(quad_r2, 3), "\n"))
|
|
|
|
# 2. Logistic growth model: y = K / (1 + exp(-r*(x-x0))) - biologically realistic
|
|
cat("\n2. Fitting logistic growth model...\n")
|
|
tryCatch({
|
|
# Estimate starting values
|
|
K_start <- max(aggregated_data$mean_ci) * 1.1 # Carrying capacity
|
|
r_start <- 0.05 # Growth rate
|
|
x0_start <- mean(aggregated_data$DOY) # Inflection point
|
|
|
|
logistic_model <- nls(mean_ci ~ K / (1 + exp(-r * (DOY - x0))),
|
|
data = aggregated_data,
|
|
start = list(K = K_start, r = r_start, x0 = x0_start),
|
|
control = nls.control(maxiter = 1000))
|
|
|
|
logistic_pred <- predict(logistic_model, newdata = data.frame(DOY = x_smooth))
|
|
logistic_r2 <- 1 - sum(residuals(logistic_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
|
cat(paste("Logistic growth R² =", round(logistic_r2, 3), "\n"))
|
|
}, error = function(e) {
|
|
cat("Logistic growth model failed to converge\n")
|
|
logistic_model <- NULL
|
|
logistic_pred <- NULL
|
|
logistic_r2 <- NA
|
|
})
|
|
|
|
# 3. Beta function model: good for crop growth curves with clear peak
|
|
cat("\n3. Fitting Beta function model...\n")
|
|
tryCatch({
|
|
# Normalize DOY to 0-1 range for Beta function
|
|
doy_min <- min(aggregated_data$DOY)
|
|
doy_max <- max(aggregated_data$DOY)
|
|
aggregated_data$doy_norm <- (aggregated_data$DOY - doy_min) / (doy_max - doy_min)
|
|
|
|
# Beta function: y = a * (x^(p-1)) * ((1-x)^(q-1)) + c
|
|
beta_model <- nls(mean_ci ~ a * (doy_norm^(p-1)) * ((1-doy_norm)^(q-1)) + c,
|
|
data = aggregated_data,
|
|
start = list(a = max(aggregated_data$mean_ci) * 20, p = 2, q = 3, c = min(aggregated_data$mean_ci)),
|
|
control = nls.control(maxiter = 1000))
|
|
|
|
# Predict on normalized scale then convert back
|
|
x_smooth_norm <- (x_smooth - doy_min) / (doy_max - doy_min)
|
|
beta_pred <- predict(beta_model, newdata = data.frame(doy_norm = x_smooth_norm))
|
|
beta_r2 <- 1 - sum(residuals(beta_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
|
cat(paste("Beta function R² =", round(beta_r2, 3), "\n"))
|
|
}, error = function(e) {
|
|
cat("Beta function model failed to converge\n")
|
|
beta_model <- NULL
|
|
beta_pred <- NULL
|
|
beta_r2 <- NA
|
|
})
|
|
|
|
# 4. Gaussian (normal) curve: good for symmetric growth patterns
|
|
cat("\n4. Fitting Gaussian curve...\n")
|
|
tryCatch({
|
|
# Gaussian: y = a * exp(-((x-mu)^2)/(2*sigma^2)) + c
|
|
gaussian_model <- nls(mean_ci ~ a * exp(-((DOY - mu)^2)/(2 * sigma^2)) + c,
|
|
data = aggregated_data,
|
|
start = list(a = max(aggregated_data$mean_ci),
|
|
mu = aggregated_data$DOY[which.max(aggregated_data$mean_ci)],
|
|
sigma = 50,
|
|
c = min(aggregated_data$mean_ci)),
|
|
control = nls.control(maxiter = 1000))
|
|
|
|
gaussian_pred <- predict(gaussian_model, newdata = data.frame(DOY = x_smooth))
|
|
gaussian_r2 <- 1 - sum(residuals(gaussian_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
|
cat(paste("Gaussian R² =", round(gaussian_r2, 3), "\n"))
|
|
}, error = function(e) {
|
|
cat("Gaussian model failed to converge\n")
|
|
gaussian_model <- NULL
|
|
gaussian_pred <- NULL
|
|
gaussian_r2 <- NA
|
|
})
|
|
|
|
# 5. LOESS (local regression) - for comparison
|
|
cat("\n5. Fitting LOESS smoothing...\n")
|
|
loess_model <- loess(mean_ci ~ DOY, data = aggregated_data, span = 0.5)
|
|
loess_pred <- predict(loess_model, newdata = data.frame(DOY = x_smooth))
|
|
loess_r2 <- 1 - sum(residuals(loess_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
|
cat(paste("LOESS R² =", round(loess_r2, 3), "\n"))
|
|
|
|
# Calculate confidence intervals for both models
|
|
cat("\nCalculating confidence intervals...\n")
|
|
|
|
# Function to calculate confidence intervals using residual-based method
|
|
calculate_ci <- function(model, data, newdata, alpha = 0.5) {
|
|
# Get model predictions
|
|
pred_vals <- predict(model, newdata = newdata)
|
|
|
|
# For parametric models (lm), use built-in prediction intervals
|
|
if(class(model)[1] == "lm") {
|
|
pred_intervals <- predict(model, newdata = newdata, interval = "confidence", level = 1 - alpha)
|
|
return(list(
|
|
lower = pred_intervals[, "lwr"],
|
|
upper = pred_intervals[, "upr"]
|
|
))
|
|
}
|
|
|
|
# For LOESS, calculate confidence intervals using residual bootstrap
|
|
if(class(model)[1] == "loess") {
|
|
# Calculate residuals from the original model
|
|
fitted_vals <- fitted(model)
|
|
residuals_vals <- residuals(model)
|
|
residual_sd <- sd(residuals_vals, na.rm = TRUE)
|
|
|
|
# Use normal approximation for confidence intervals
|
|
# For 50% CI, use 67% quantile (approximately 0.67 standard deviations)
|
|
margin <- qnorm(1 - alpha/2) * residual_sd
|
|
|
|
return(list(
|
|
lower = pred_vals - margin,
|
|
upper = pred_vals + margin
|
|
))
|
|
}
|
|
|
|
# Fallback method
|
|
residual_sd <- sd(residuals(model), na.rm = TRUE)
|
|
margin <- qnorm(1 - alpha/2) * residual_sd
|
|
return(list(
|
|
lower = pred_vals - margin,
|
|
upper = pred_vals + margin
|
|
))
|
|
}# Calculate CIs for quadratic model using aggregated data (same as model fitting)
|
|
quad_ci <- calculate_ci(quad_model, aggregated_data, data.frame(DOY = x_smooth))
|
|
|
|
# Calculate CIs for LOESS model using aggregated data (same as model fitting)
|
|
loess_ci <- calculate_ci(loess_model, aggregated_data, data.frame(DOY = x_smooth))
|
|
|
|
# Create separate plots for LOESS and Quadratic models
|
|
cat("\nCreating LOESS plot with confidence intervals...\n")
|
|
|
|
# LOESS plot
|
|
p_loess <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
|
|
geom_point(color = "lightblue", size = 1.5, alpha = 0.4) +
|
|
geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci),
|
|
color = "darkblue", size = 3, alpha = 0.8) +
|
|
geom_ribbon(data = data.frame(DOY = x_smooth,
|
|
lower = loess_ci$lower,
|
|
upper = loess_ci$upper),
|
|
aes(x = DOY, ymin = lower, ymax = upper),
|
|
alpha = 0.3, fill = "purple", inherit.aes = FALSE) +
|
|
geom_line(data = data.frame(DOY = x_smooth, loess = loess_pred),
|
|
aes(x = DOY, y = loess),
|
|
color = "purple", size = 1.5) +
|
|
labs(
|
|
title = "LOESS Model - CI Values Over Growing Season",
|
|
x = "Day of Year (DOY)",
|
|
y = "CI Value",
|
|
subtitle = paste("LOESS R² =", round(loess_r2, 3), "| 50% Confidence Interval")
|
|
) +
|
|
theme_minimal() +
|
|
theme(
|
|
plot.title = element_text(size = 14, face = "bold"),
|
|
axis.title = element_text(size = 12),
|
|
axis.text = element_text(size = 10)
|
|
)
|
|
|
|
# Find optimal point for LOESS
|
|
loess_max_idx <- which.max(loess_pred)
|
|
loess_optimal_doy <- x_smooth[loess_max_idx]
|
|
loess_optimal_ci <- loess_pred[loess_max_idx]
|
|
|
|
p_loess <- p_loess +
|
|
geom_point(aes(x = loess_optimal_doy, y = loess_optimal_ci),
|
|
color = "red", size = 5, shape = 8) +
|
|
annotate("text",
|
|
x = loess_optimal_doy + 30,
|
|
y = loess_optimal_ci,
|
|
label = paste("Optimal: DOY", round(loess_optimal_doy, 1), "\nCI =", round(loess_optimal_ci, 3)),
|
|
color = "red", size = 4, fontface = "bold")
|
|
|
|
print(p_loess)
|
|
|
|
cat("\nCreating Quadratic plot with confidence intervals...\n")
|
|
|
|
# Quadratic plot
|
|
p_quadratic <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
|
|
geom_point(color = "lightcoral", size = 1.5, alpha = 0.4) +
|
|
geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci),
|
|
color = "darkred", size = 3, alpha = 0.8) +
|
|
geom_ribbon(data = data.frame(DOY = x_smooth,
|
|
lower = quad_ci$lower,
|
|
upper = quad_ci$upper),
|
|
aes(x = DOY, ymin = lower, ymax = upper),
|
|
alpha = 0.3, fill = "red", inherit.aes = FALSE) +
|
|
geom_line(data = data.frame(DOY = x_smooth, quadratic = quad_pred),
|
|
aes(x = DOY, y = quadratic),
|
|
color = "red", size = 1.5) +
|
|
labs(
|
|
title = "Quadratic Model - CI Values Over Growing Season",
|
|
x = "Day of Year (DOY)",
|
|
y = "CI Value",
|
|
subtitle = paste("Quadratic R² =", round(quad_r2, 3), "| 50% Confidence Interval")
|
|
) +
|
|
theme_minimal() +
|
|
theme(
|
|
plot.title = element_text(size = 14, face = "bold"),
|
|
axis.title = element_text(size = 12),
|
|
axis.text = element_text(size = 10)
|
|
)
|
|
|
|
# Find optimal point for Quadratic
|
|
quad_coeffs <- coef(quad_model)
|
|
a <- quad_coeffs[3]
|
|
b <- quad_coeffs[2]
|
|
|
|
if(a != 0) {
|
|
quad_optimal_doy <- -b / (2*a)
|
|
doy_range <- range(aggregated_data$DOY)
|
|
if(quad_optimal_doy >= doy_range[1] && quad_optimal_doy <= doy_range[2]) {
|
|
quad_optimal_ci <- predict(quad_model, newdata = data.frame(DOY = quad_optimal_doy))
|
|
} else {
|
|
quad_optimal_doy <- x_smooth[which.max(quad_pred)]
|
|
quad_optimal_ci <- max(quad_pred)
|
|
}
|
|
} else {
|
|
quad_optimal_doy <- x_smooth[which.max(quad_pred)]
|
|
quad_optimal_ci <- max(quad_pred)
|
|
}
|
|
|
|
p_quadratic <- p_quadratic +
|
|
geom_point(aes(x = quad_optimal_doy, y = quad_optimal_ci),
|
|
color = "darkred", size = 5, shape = 8) +
|
|
annotate("text",
|
|
x = quad_optimal_doy + 30,
|
|
y = quad_optimal_ci,
|
|
label = paste("Optimal: DOY", round(quad_optimal_doy, 1), "\nCI =", round(quad_optimal_ci, 3)),
|
|
color = "darkred", size = 4, fontface = "bold")
|
|
|
|
print(p_quadratic)
|
|
print(p_loess)
|
|
|
|
# Print results summary
|
|
cat("\n=== RESULTS SUMMARY ===\n")
|
|
cat(paste("LOESS Model - R² =", round(loess_r2, 3), "\n"))
|
|
cat(paste(" Optimal DOY:", round(loess_optimal_doy, 1), "\n"))
|
|
cat(paste(" Optimal CI:", round(loess_optimal_ci, 4), "\n\n"))
|
|
|
|
cat(paste("Quadratic Model - R² =", round(quad_r2, 3), "\n"))
|
|
cat(paste(" Optimal DOY:", round(quad_optimal_doy, 1), "\n"))
|
|
cat(paste(" Optimal CI:", round(quad_optimal_ci, 4), "\n"))
|
|
|
|
|