SmartCane/r_app/CI_report_dashboard_planet.Rmd
Timon bb2a599075 Enhanced SmartCane executive summary report with explanatory text and fixed priority map coloring
Added explanatory text for all visualizations
Fixed priority map color scheme (red=high priority, green=low priority)
Improved error handling in farm health data calculations
Added fallback mechanisms for missing data
2025-04-23 09:47:19 +02:00

568 lines
21 KiB
Plaintext

---
params:
ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx
report_date: "2024-08-28"
data_dir: "Chemba"
mail_day: "Wednesday"
borders: TRUE
use_breaks: FALSE
output:
# html_document:
# toc: yes
# df_print: paged
word_document:
reference_docx: !expr file.path("word-styles-reference-var1.docx")
toc: yes
editor_options:
chunk_output_type: console
---
```{r setup_parameters, include=FALSE}
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
# Environment setup notes (commented out)
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# # Optioneel: Herstel de omgeving als dat nodig is
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
```
```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE}
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Path management
library(here)
# Spatial data libraries
library(sf)
library(terra)
library(exactextractr)
# library(raster) - Removed as it's no longer maintained
# Data manipulation and visualization
library(tidyverse) # Includes dplyr, ggplot2, etc.
library(tmap)
library(lubridate)
library(zoo)
# Machine learning
library(rsample)
library(caret)
library(randomForest)
library(CAST)
# Load custom utility functions
tryCatch({
source("report_utils.R")
}, error = function(e) {
message(paste("Error loading report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "report_utils.R"))
}, error = function(e) {
stop("Could not load report_utils.R from either location: ", e$message)
})
})
```
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
# Set the project directory from parameters
project_dir <- params$data_dir
# Source project parameters with error handling
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Log initial configuration
safe_log("Starting the R Markdown script")
safe_log(paste("mail_day params:", params$mail_day))
safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
```
```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE}
# Set locale for consistent date formatting
Sys.setlocale("LC_TIME", "C")
# Initialize date variables from parameters
today <- as.character(report_date)
mail_day_as_character <- as.character(mail_day)
# Calculate week days
report_date_as_week_day <- weekdays(lubridate::ymd(today))
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Calculate initial week number
week <- lubridate::week(today)
safe_log(paste("Initial week calculation:", week, "today:", today))
# Calculate previous dates for comparisons
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
# Log the weekday calculations for debugging
safe_log(paste("Report date weekday:", report_date_as_week_day))
safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
safe_log(paste("Mail day:", mail_day_as_character))
safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
# Adjust week calculation based on mail day
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
safe_log("Adjusting weeks because of mail day")
week <- lubridate::week(today) + 1
today_minus_1 <- as.character(lubridate::ymd(today))
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
}
# Generate subtitle for report
subtitle_var <- paste("Report generated on", Sys.Date())
# Calculate week numbers for previous weeks
week_minus_1 <- week - 1
week_minus_2 <- week - 2
week_minus_3 <- week - 3
# Format current week with leading zeros
week <- sprintf("%02d", week)
# Get years for each date
year <- lubridate::year(today)
year_1 <- lubridate::year(today_minus_1)
year_2 <- lubridate::year(today_minus_2)
year_3 <- lubridate::year(today_minus_3)
```
```{r data, message=TRUE, warning=TRUE, include=FALSE}
# Load CI index data with error handling
tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data")
}, error = function(e) {
stop("Error loading CI quadrant data: ", e$message)
})
# Get file paths for different weeks using the utility function
tryCatch({
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
# Log the calculated paths
safe_log("Required mosaic paths:")
safe_log(paste("Path to current week:", path_to_week_current))
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
# Validate that files exist
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
# Load raster data with terra functions
CI <- terra::rast(path_to_week_current)$CI
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
}, error = function(e) {
stop("Error loading raster data: ", e$message)
})
```
```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE}
# Calculate difference rasters for comparisons
tryCatch({
# Calculate weekly difference
last_week_dif_raster_abs <- (CI - CI_m1)
safe_log("Calculated weekly difference raster")
# Calculate three-week difference
three_week_dif_raster_abs <- (CI - CI_m3)
safe_log("Calculated three-week difference raster")
}, error = function(e) {
safe_log(paste("Error calculating difference rasters:", e$message), "ERROR")
# Create placeholder rasters if calculations fail
if (!exists("last_week_dif_raster_abs")) {
last_week_dif_raster_abs <- CI * 0
}
if (!exists("three_week_dif_raster_abs")) {
three_week_dif_raster_abs <- CI * 0
}
})
```
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
# Load field boundaries from parameters
tryCatch({
AllPivots0 <- field_boundaries_sf
safe_log("Successfully loaded field boundaries")
}, error = function(e) {
stop("Error loading field boundaries: ", e$message)
})
```
`r subtitle_var`
\pagebreak
# Explanation of the Report
This report provides a detailed analysis of your sugarcane fields based on satellite imagery, helping you monitor crop health and development throughout the growing season. The data is processed weekly to give you timely insights for optimal farm management decisions.
## What is the Chlorophyll Index (CI)?
The **Chlorophyll Index (CI)** is a vegetation index that measures the relative amount of chlorophyll in plant leaves. Chlorophyll is the green pigment responsible for photosynthesis in plants. Higher CI values indicate:
* Greater photosynthetic activity
* Healthier plant tissue
* Better nitrogen uptake
* More vigorous crop growth
CI values typically range from 0 (bare soil or severely stressed vegetation) to 7+ (very healthy, dense vegetation). For sugarcane, values between 3-7 generally indicate good crop health, depending on the growth stage.
## What You'll Find in This Report:
1. **Chlorophyll Index Overview Map**: A comprehensive view of all your fields showing current CI values. This helps identify which fields are performing well and which might need attention.
2. **Weekly Difference Map**: Shows changes in CI values over the past week. Positive values (green) indicate improving crop health, while negative values (red) may signal stress or decline.
3. **Field-by-Field Analysis**: Detailed maps for each field showing:
* CI values for the current week and two previous weeks
* Week-to-week changes in CI values
* Three-week change in CI values to track longer-term trends
4. **Growth Trend Graphs**: Time-series visualizations showing how CI values have changed throughout the growing season for each section of your fields.
5. **Yield Prediction**: For mature crops (over 300 days), we provide estimated yield predictions based on historical data and current CI measurements.
Use these insights to identify areas that may need irrigation, fertilization, or other interventions, and to track the effectiveness of your management practices over time.
\pagebreak
# Chlorophyll Index (CI) Overview Map - Current Week
```{r render_ci_overview_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE}
# Create overview chlorophyll index map
tryCatch({
# Base shape
map <- tmap::tm_shape(CI, unit = "m")
# Add raster layer with either breaks or continuous spectrum based on parameter
if (use_breaks) {
map <- map + tmap::tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf),
palette = "RdYlGn",
midpoint = NA,
legend.is.portrait = FALSE,
title = "Chlorophyll Index (CI)")
} else {
map <- map + tmap::tm_raster(palette = "RdYlGn",
style = "cont",
midpoint = NA,
legend.is.portrait = FALSE,
title = "Chlorophyll Index (CI)")
}
# Complete the map with layout and other elements
map <- map + tmap::tm_layout(legend.outside = TRUE,
legend.outside.position = "bottom",
legend.show = TRUE) +
tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "black") +
tmap::tm_text("sub_field", size = 0.6, col = "black")
# Print the map
print(map)
}, error = function(e) {
safe_log(paste("Error creating CI overview map:", e$message), "ERROR")
plot(1, type="n", axes=FALSE, xlab="", ylab="")
text(1, 1, "Error creating CI overview map", cex=1.5)
})
```
\newpage
# Weekly Chlorophyll Index Difference Map
```{r render_ci_difference_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE}
# Create chlorophyll index difference map
tryCatch({
# Base shape
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m")
# Add raster layer with either breaks or continuous spectrum based on parameter
if (use_breaks) {
map <- map + tmap::tm_raster(breaks = c(-3,-2,-1,0,1,2,3),
palette = "RdYlGn",
midpoint = 0,
legend.is.portrait = FALSE,
title = "Chlorophyll Index (CI) Change")
} else {
map <- map + tmap::tm_raster(palette = "RdYlGn",
style = "cont",
midpoint = 0,
legend.is.portrait = FALSE,
title = "Chlorophyll Index (CI) Change")
}
# Complete the map with layout and other elements
map <- map + tmap::tm_layout(legend.outside = TRUE,
legend.outside.position = "bottom",
legend.show = TRUE) +
tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "black") +
tmap::tm_text("sub_field", size = 0.6, col = "black")
# Print the map
print(map)
}, error = function(e) {
safe_log(paste("Error creating CI difference map:", e$message), "ERROR")
plot(1, type="n", axes=FALSE, xlab="", ylab="")
text(1, 1, "Error creating CI difference map", cex=1.5)
})
```
\newpage
\newpage
```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
# Generate detailed visualizations for each field
tryCatch({
# Merge field polygons for processing
AllPivots_merged <- AllPivots0 %>%
dplyr::group_by(field) %>%
dplyr::summarise(.groups = 'drop')
# Generate plots for each field
purrr::walk(AllPivots_merged$field, function(field_name) {
tryCatch({
cat("\n") # Add an empty line for better spacing
# Call ci_plot with explicit parameters
ci_plot(
pivotName = field_name,
field_boundaries = AllPivots0,
current_ci = CI,
ci_minus_1 = CI_m1,
ci_minus_2 = CI_m2,
last_week_diff = last_week_dif_raster_abs,
three_week_diff = three_week_dif_raster_abs,
harvesting_data = harvesting_data,
week = week,
week_minus_1 = week_minus_1,
week_minus_2 = week_minus_2,
week_minus_3 = week_minus_3,
use_breaks = use_breaks,
borders = borders
)
cat("\n")
# Call cum_ci_plot with explicit parameters
cum_ci_plot(
pivotName = field_name,
ci_quadrant_data = CI_quadrant,
plot_type = "value",
facet_on = FALSE
)
}, error = function(e) {
safe_log(paste("Error generating plots for field", field_name, ":", e$message), "ERROR")
cat(paste("## Error generating plots for field", field_name, "\n"))
cat(paste(e$message, "\n"))
})
})
}, error = function(e) {
safe_log(paste("Error in field visualization section:", e$message), "ERROR")
cat("Error generating field plots. See log for details.\n")
})
```
```{r generate_subarea_visualizations, echo=FALSE, fig.height=3.8, fig.width=10, message=FALSE, warning=FALSE, results='asis', eval=FALSE}
# Alternative visualization grouped by sub-area (disabled by default)
tryCatch({
# Group pivots by sub-area
pivots_grouped <- AllPivots0
# Iterate over each subgroup
for (subgroup in unique(pivots_grouped$sub_area)) {
# Add subgroup heading
cat("\n")
cat("## Subgroup: ", subgroup, "\n")
# Filter data for current subgroup
subset_data <- dplyr::filter(pivots_grouped, sub_area == subgroup)
# Generate visualizations for each field in the subgroup
purrr::walk(subset_data$field, function(field_name) {
cat("\n")
ci_plot(field_name)
cat("\n")
cum_ci_plot(field_name)
cat("\n")
})
# Add page break after each subgroup
cat("\\pagebreak\n")
}
}, error = function(e) {
safe_log(paste("Error in subarea visualization section:", e$message), "ERROR")
cat("Error generating subarea plots. See log for details.\n")
})
```
# Yield prediction
The below table shows estimates of the biomass if you would harvest them now.
```{r yield_data_training, message=FALSE, warning=FALSE, include=FALSE}
# Load and prepare yield prediction data with error handling
tryCatch({
# Load CI quadrant data and fill missing values
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds")) %>%
dplyr::group_by(model) %>%
tidyr::fill(field, sub_field, .direction = "downup") %>%
dplyr::ungroup()
# Check if tonnage_ha is empty
if (all(is.na(harvesting_data$tonnage_ha))) {
safe_log("Lacking historic harvest data, please provide for yield prediction calculation", "WARNING")
knitr::knit_exit() # Exit the chunk if tonnage_ha is empty
}
# Rename year column to season for consistency
harvesting_data <- harvesting_data %>% dplyr::rename(season = year)
# Join CI and yield data
CI_and_yield <- dplyr::left_join(CI_quadrant, harvesting_data, by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
# Define predictors and response variables
predictors <- c("cumulative_CI", "DOY", "CI_per_day")
response <- "tonnage_ha"
# Prepare test and validation datasets
CI_and_yield_test <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
CI_and_yield_validation <- CI_and_yield_test
# Prepare prediction dataset (fields without harvest data)
prediction_yields <- CI_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha))
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = TRUE
)
# Train the model with feature selection
set.seed(202) # For reproducibility
model_ffs_rf <- CAST::ffs(
CI_and_yield_test[, predictors],
CI_and_yield_test[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5,
na.rm = TRUE
)
# Function to prepare predictions with consistent naming and formatting
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
# Predict yields for the validation dataset
pred_ffs_rf <- prepare_predictions(stats::predict(model_ffs_rf, newdata = CI_and_yield_validation), CI_and_yield_validation)
# Predict yields for the current season (focus on mature fields over 300 days)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days > 300) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
safe_log("Successfully completed yield prediction calculations")
}, error = function(e) {
safe_log(paste("Error in yield prediction:", e$message), "ERROR")
# Create empty dataframes to prevent errors in subsequent chunks
pred_ffs_rf <- data.frame()
pred_rf_current_season <- data.frame()
})
```
```{r plotting_yield_data, echo=FALSE, fig.height=5, fig.width=8, message=FALSE, warning=FALSE}
# Display yield prediction visualizations with error handling
tryCatch({
if (nrow(pred_ffs_rf) > 0) {
# Plot model performance (predicted vs actual)
ggplot2::ggplot(pred_ffs_rf, ggplot2::aes(y = predicted_Tcha, x = tonnage_ha)) +
ggplot2::geom_point(size = 2, alpha = 0.6) +
ggplot2::geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
ggplot2::scale_x_continuous(limits = c(0, 200)) +
ggplot2::scale_y_continuous(limits = c(0, 200)) +
ggplot2::labs(title = "Model Performance: \nPredicted vs Actual Tonnage/ha",
x = "Actual tonnage/ha (Tcha)",
y = "Predicted tonnage/ha (Tcha)") +
ggplot2::theme_minimal()
}
if (nrow(pred_rf_current_season) > 0) {
# Plot predicted yields by age
ggplot2::ggplot(pred_rf_current_season, ggplot2::aes(x = Age_days, y = predicted_Tcha)) +
ggplot2::geom_point(size = 2, alpha = 0.6) +
ggplot2::labs(title = "Predicted Yields for Fields Over 300 Days \nOld Yet to Be Harvested",
x = "Age (days)",
y = "Predicted tonnage/ha (Tcha)") +
ggplot2::scale_y_continuous(limits = c(0, 200)) +
ggplot2::theme_minimal()
# Display prediction table
knitr::kable(pred_rf_current_season,
digits = 0,
caption = "Predicted Tonnage/ha for Fields Over 300 Days Old")
} else {
cat("No fields over 300 days old without harvest data available for yield prediction.")
}
}, error = function(e) {
safe_log(paste("Error in yield prediction visualization:", e$message), "ERROR")
cat("Error generating yield prediction visualizations. See log for details.")
})
```