Fix CI report pipeline: update tmap v4 syntax, add continuous color scales, fix formatting
- Updated all CI maps to use tm_scale_continuous() for proper tmap v4 compatibility - Added fixed color scale limits (1-8 for CI, -3 to +3 for differences) for consistent field comparison - Fixed YAML header formatting issues in CI_report_dashboard_planet.Rmd - Positioned RGB map before CI overview map as requested - Removed all obsolete use_breaks parameter references - Enhanced error handling and logging throughout the pipeline - Added new experimental analysis scripts and improvements to mosaic creation
This commit is contained in:
parent
bb2a599075
commit
6efcc8cfec
513
.Rhistory
513
.Rhistory
|
|
@ -1 +1,512 @@
|
|||
install.packages("xfun")
|
||||
})
|
||||
# Verify farm_health_data exists and has content
|
||||
if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
|
||||
safe_log("farm_health_data not found or empty, generating default data", "WARNING")
|
||||
# Create minimal fallback data
|
||||
tryCatch({
|
||||
# Get fields from boundaries
|
||||
fields <- unique(AllPivots0$field)
|
||||
# Create basic data frame with just field names
|
||||
farm_health_data <- data.frame(
|
||||
field = fields,
|
||||
mean_ci = rep(NA, length(fields)),
|
||||
ci_change = rep(NA, length(fields)),
|
||||
ci_uniformity = rep(NA, length(fields)),
|
||||
status = rep("Unknown", length(fields)),
|
||||
anomaly_type = rep("Unknown", length(fields)),
|
||||
priority_level = rep(5, length(fields)), # Low priority
|
||||
age_weeks = rep(NA, length(fields)),
|
||||
harvest_readiness = rep("Unknown", length(fields)),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
safe_log("Created fallback farm_health_data with basic field information")
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
|
||||
farm_health_data <<- data.frame(
|
||||
field = character(),
|
||||
mean_ci = numeric(),
|
||||
ci_change = numeric(),
|
||||
ci_uniformity = numeric(),
|
||||
status = character(),
|
||||
anomaly_type = character(),
|
||||
priority_level = numeric(),
|
||||
age_weeks = numeric(),
|
||||
harvest_readiness = character(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
})
|
||||
}
|
||||
# Calculate farm health summary metrics
|
||||
tryCatch({
|
||||
# Generate farm health summary data
|
||||
farm_health_data <- generate_farm_health_summary(
|
||||
field_boundaries = AllPivots0,
|
||||
ci_current = CI,
|
||||
ci_previous = CI_m1,
|
||||
harvesting_data = harvesting_data
|
||||
)
|
||||
# Log the summary data
|
||||
safe_log(paste("Generated farm health summary with", nrow(farm_health_data), "fields"))
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error in farm health calculation:", e$message), "ERROR")
|
||||
# Create empty dataframe if calculation failed
|
||||
farm_health_data <- data.frame(
|
||||
field = character(),
|
||||
mean_ci = numeric(),
|
||||
ci_change = numeric(),
|
||||
ci_uniformity = numeric(),
|
||||
status = character(),
|
||||
anomaly_type = character(),
|
||||
priority_level = numeric(),
|
||||
age_weeks = numeric(),
|
||||
harvest_readiness = character(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
})
|
||||
# Render the velocity and acceleration indicators
|
||||
tryCatch({
|
||||
# Create and display the indicators using the imported utility function
|
||||
velocity_plot <- create_velocity_acceleration_indicator(
|
||||
health_data = farm_health_data,
|
||||
ci_current = CI,
|
||||
ci_prev1 = CI_m1,
|
||||
ci_prev2 = CI_m2,
|
||||
ci_prev3 = CI_m3,
|
||||
field_boundaries = AllPivots0
|
||||
)
|
||||
# Print the visualization
|
||||
print(velocity_plot)
|
||||
# Create a table of fields with significant velocity changes
|
||||
field_ci_metrics <- list()
|
||||
# Process each field to get metrics
|
||||
fields <- unique(AllPivots0$field)
|
||||
for (field_name in fields) {
|
||||
tryCatch({
|
||||
# Get field boundary
|
||||
field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
|
||||
if (nrow(field_shape) == 0) next
|
||||
# Extract CI values
|
||||
ci_curr_values <- terra::extract(CI, field_shape)
|
||||
ci_prev1_values <- terra::extract(CI_m1, field_shape)
|
||||
# Calculate metrics
|
||||
mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
|
||||
mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
|
||||
velocity <- mean_ci_curr - mean_ci_prev1
|
||||
# Store in list
|
||||
field_ci_metrics[[field_name]] <- list(
|
||||
field = field_name,
|
||||
ci_current = mean_ci_curr,
|
||||
ci_prev1 = mean_ci_prev1,
|
||||
velocity = velocity
|
||||
)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error processing field", field_name, "for velocity table:", e$message), "WARNING")
|
||||
})
|
||||
}
|
||||
# Convert list to data frame
|
||||
velocity_df <- do.call(rbind, lapply(field_ci_metrics, function(x) {
|
||||
data.frame(
|
||||
field = x$field,
|
||||
ci_current = round(x$ci_current, 2),
|
||||
ci_prev1 = round(x$ci_prev1, 2),
|
||||
velocity = round(x$velocity, 2),
|
||||
direction = ifelse(x$velocity >= 0, "Improving", "Declining")
|
||||
)
|
||||
}))
|
||||
# Select top 5 positive and top 5 negative velocity fields
|
||||
top_positive <- velocity_df %>%
|
||||
dplyr::filter(velocity > 0) %>%
|
||||
dplyr::arrange(desc(velocity)) %>%
|
||||
dplyr::slice_head(n = 5)
|
||||
top_negative <- velocity_df %>%
|
||||
dplyr::filter(velocity < 0) %>%
|
||||
dplyr::arrange(velocity) %>%
|
||||
dplyr::slice_head(n = 5)
|
||||
# Display the tables if we have data
|
||||
if (nrow(top_positive) > 0) {
|
||||
cat("<h4>Fields with Fastest Improvement</h4>")
|
||||
knitr::kable(top_positive %>%
|
||||
dplyr::select(Field = field,
|
||||
`Current CI` = ci_current,
|
||||
`Previous CI` = ci_prev1,
|
||||
`Weekly Change` = velocity))
|
||||
}
|
||||
if (nrow(top_negative) > 0) {
|
||||
cat("<h4>Fields with Fastest Decline</h4>")
|
||||
knitr::kable(top_negative %>%
|
||||
dplyr::select(Field = field,
|
||||
`Current CI` = ci_current,
|
||||
`Previous CI` = ci_prev1,
|
||||
`Weekly Change` = velocity))
|
||||
}
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error rendering velocity visualization:", e$message), "ERROR")
|
||||
cat("<div class='alert alert-danger'>Error generating velocity visualization.</div>")
|
||||
})
|
||||
# Generate anomaly timeline visualization
|
||||
tryCatch({
|
||||
# Use the imported function to create the anomaly timeline
|
||||
anomaly_timeline <- create_anomaly_timeline(
|
||||
field_boundaries = AllPivots0,
|
||||
ci_data = CI_quadrant,
|
||||
days_to_include = 90 # Show last 90 days of data
|
||||
)
|
||||
# Display the timeline
|
||||
print(anomaly_timeline)
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error generating anomaly timeline:", e$message), "ERROR")
|
||||
cat("<div class='alert alert-danger'>Error generating anomaly timeline visualization.</div>")
|
||||
})
|
||||
# Verify farm_health_data exists and has content
|
||||
if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
|
||||
safe_log("farm_health_data not found or empty, generating default data", "WARNING")
|
||||
# Create minimal fallback data
|
||||
tryCatch({
|
||||
# Get fields from boundaries
|
||||
fields <- unique(AllPivots0$field)
|
||||
# Create basic data frame with just field names
|
||||
farm_health_data <- data.frame(
|
||||
field = fields,
|
||||
mean_ci = rep(NA, length(fields)),
|
||||
ci_change = rep(NA, length(fields)),
|
||||
ci_uniformity = rep(NA, length(fields)),
|
||||
status = rep("Unknown", length(fields)),
|
||||
anomaly_type = rep("Unknown", length(fields)),
|
||||
priority_level = rep(5, length(fields)), # Low priority
|
||||
age_weeks = rep(NA, length(fields)),
|
||||
harvest_readiness = rep("Unknown", length(fields)),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
safe_log("Created fallback farm_health_data with basic field information")
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
|
||||
farm_health_data <<- data.frame(
|
||||
field = character(),
|
||||
mean_ci = numeric(),
|
||||
ci_change = numeric(),
|
||||
ci_uniformity = numeric(),
|
||||
status = character(),
|
||||
anomaly_type = character(),
|
||||
priority_level = numeric(),
|
||||
age_weeks = numeric(),
|
||||
harvest_readiness = character(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
})
|
||||
}
|
||||
# ADVANCED ANALYTICS FUNCTIONS
|
||||
# Note: These functions are now imported from executive_report_utils.R
|
||||
# The utility file contains functions for velocity/acceleration indicators,
|
||||
# anomaly timeline creation, age cohort mapping, and cohort performance charts
|
||||
safe_log("Using analytics functions from executive_report_utils.R")
|
||||
# Chunk 1: setup_parameters
|
||||
# Set up basic report parameters from input values
|
||||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
|
||||
# Environment setup notes (commented out)
|
||||
# # Activeer de renv omgeving
|
||||
# renv::activate()
|
||||
# renv::deactivate()
|
||||
# # Optioneel: Herstel de omgeving als dat nodig is
|
||||
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
|
||||
# renv::restore()
|
||||
# Chunk 2: load_libraries
|
||||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
# Path management
|
||||
library(here)
|
||||
# Spatial data libraries
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
# library(raster) - Removed as it's no longer maintained
|
||||
# Data manipulation and visualization
|
||||
library(tidyverse) # Includes dplyr, ggplot2, etc.
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
# Machine learning
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
# Load custom utility functions
|
||||
tryCatch({
|
||||
source("report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
# Load executive report utilities
|
||||
tryCatch({
|
||||
source("executive_report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading executive_report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "executive_report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load executive_report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
# Chunk 1: setup_parameters
|
||||
# Set up basic report parameters from input values
|
||||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
|
||||
# Environment setup notes (commented out)
|
||||
# # Activeer de renv omgeving
|
||||
# renv::activate()
|
||||
# renv::deactivate()
|
||||
# # Optioneel: Herstel de omgeving als dat nodig is
|
||||
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
|
||||
# renv::restore()
|
||||
# Chunk 2: load_libraries
|
||||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
# Path management
|
||||
library(here)
|
||||
# Spatial data libraries
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
# library(raster) - Removed as it's no longer maintained
|
||||
# Data manipulation and visualization
|
||||
library(tidyverse) # Includes dplyr, ggplot2, etc.
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
# Machine learning
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
# Load custom utility functions
|
||||
tryCatch({
|
||||
source("report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
# Load executive report utilities
|
||||
tryCatch({
|
||||
source("executive_report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading executive_report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "executive_report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load executive_report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
# Chunk 1: setup_parameters
|
||||
# Set up basic report parameters from input values
|
||||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
|
||||
# Environment setup notes (commented out)
|
||||
# # Activeer de renv omgeving
|
||||
# renv::activate()
|
||||
# renv::deactivate()
|
||||
# # Optioneel: Herstel de omgeving als dat nodig is
|
||||
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
|
||||
# renv::restore()
|
||||
# Chunk 2: load_libraries
|
||||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
# Path management
|
||||
library(here)
|
||||
# Spatial data libraries
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
# library(raster) - Removed as it's no longer maintained
|
||||
# Data manipulation and visualization
|
||||
library(tidyverse) # Includes dplyr, ggplot2, etc.
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
# Machine learning
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
# Load custom utility functions
|
||||
# tryCatch({
|
||||
# source("report_utils.R")
|
||||
# }, error = function(e) {
|
||||
# message(paste("Error loading report_utils.R:", e$message))
|
||||
# # Try alternative path if the first one fails
|
||||
# tryCatch({
|
||||
source(here::here("r_app", "report_utils.R"))
|
||||
# }, error = function(e) {
|
||||
# stop("Could not load report_utils.R from either location: ", e$message)
|
||||
# })
|
||||
# })
|
||||
# Load executive report utilities
|
||||
# tryCatch({
|
||||
# source("executive_report_utils.R")
|
||||
# }, error = function(e) {
|
||||
# message(paste("Error loading executive_report_utils.R:", e$message))
|
||||
# # Try alternative path if the first one fails
|
||||
# tryCatch({
|
||||
source(here::here("r_app", "executive_report_utils.R"))
|
||||
# Chunk 1: setup_parameters
|
||||
# Set up basic report parameters from input values
|
||||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
|
||||
# Environment setup notes (commented out)
|
||||
# # Activeer de renv omgeving
|
||||
# renv::activate()
|
||||
# renv::deactivate()
|
||||
# # Optioneel: Herstel de omgeving als dat nodig is
|
||||
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
|
||||
# renv::restore()
|
||||
# Chunk 2: load_libraries
|
||||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
# Path management
|
||||
library(here)
|
||||
# Spatial data libraries
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
# library(raster) - Removed as it's no longer maintained
|
||||
# Data manipulation and visualization
|
||||
library(tidyverse) # Includes dplyr, ggplot2, etc.
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
# Machine learning
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
# Load custom utility functions
|
||||
# tryCatch({
|
||||
# source("report_utils.R")
|
||||
# }, error = function(e) {
|
||||
# message(paste("Error loading report_utils.R:", e$message))
|
||||
# # Try alternative path if the first one fails
|
||||
# tryCatch({
|
||||
source(here::here("r_app", "report_utils.R"))
|
||||
# }, error = function(e) {
|
||||
# stop("Could not load report_utils.R from either location: ", e$message)
|
||||
# })
|
||||
# })
|
||||
# Load executive report utilities
|
||||
# tryCatch({
|
||||
# source("executive_report_utils.R")
|
||||
# }, error = function(e) {
|
||||
# message(paste("Error loading executive_report_utils.R:", e$message))
|
||||
# # Try alternative path if the first one fails
|
||||
# tryCatch({
|
||||
source(here::here("r_app", "exec_dashboard", "executive_report_utils.R"))
|
||||
# }, error = function(e) {
|
||||
# stop("Could not load executive_report_utils.R from either location: ", e$message)
|
||||
# })
|
||||
# })
|
||||
# Chunk 3: initialize_project_config
|
||||
# Set the project directory from parameters
|
||||
project_dir <- params$data_dir
|
||||
# Source project parameters with error handling
|
||||
# tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
# }, error = function(e) {
|
||||
# stop("Error loading parameters_project.R: ", e$message)
|
||||
# })
|
||||
# Log initial configuration
|
||||
safe_log("Starting the R Markdown script")
|
||||
safe_log(paste("mail_day params:", params$mail_day))
|
||||
safe_log(paste("report_date params:", params$report_date))
|
||||
safe_log(paste("mail_day variable:", mail_day))
|
||||
# Set locale for consistent date formatting
|
||||
Sys.setlocale("LC_TIME", "C")
|
||||
# Initialize date variables from parameters
|
||||
today <- as.character(report_date)
|
||||
mail_day_as_character <- as.character(mail_day)
|
||||
# Calculate week days
|
||||
report_date_as_week_day <- weekdays(lubridate::ymd(today))
|
||||
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
|
||||
# Calculate initial week number
|
||||
week <- lubridate::week(today)
|
||||
safe_log(paste("Initial week calculation:", week, "today:", today))
|
||||
# Calculate previous dates for comparisons
|
||||
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
|
||||
# Log the weekday calculations for debugging
|
||||
safe_log(paste("Report date weekday:", report_date_as_week_day))
|
||||
safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
|
||||
safe_log(paste("Mail day:", mail_day_as_character))
|
||||
safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
|
||||
# Adjust week calculation based on mail day
|
||||
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
|
||||
safe_log("Adjusting weeks because of mail day")
|
||||
week <- lubridate::week(today) + 1
|
||||
today_minus_1 <- as.character(lubridate::ymd(today))
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
|
||||
}
|
||||
# Generate subtitle for report
|
||||
subtitle_var <- paste("Report generated on", Sys.Date())
|
||||
# Calculate week numbers for previous weeks
|
||||
week_minus_1 <- week - 1
|
||||
week_minus_2 <- week - 2
|
||||
week_minus_3 <- week - 3
|
||||
# Format current week with leading zeros
|
||||
week <- sprintf("%02d", week)
|
||||
# Get years for each date
|
||||
year <- lubridate::year(today)
|
||||
year_1 <- lubridate::year(today_minus_1)
|
||||
year_2 <- lubridate::year(today_minus_2)
|
||||
year_3 <- lubridate::year(today_minus_3)
|
||||
# Load CI index data with error handling
|
||||
tryCatch({
|
||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
safe_log("Successfully loaded CI quadrant data")
|
||||
}, error = function(e) {
|
||||
stop("Error loading CI quadrant data: ", e$message)
|
||||
})
|
||||
# Get file paths for different weeks using the utility function
|
||||
tryCatch({
|
||||
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
|
||||
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
|
||||
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
|
||||
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
|
||||
# Log the calculated paths
|
||||
safe_log("Required mosaic paths:")
|
||||
safe_log(paste("Path to current week:", path_to_week_current))
|
||||
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
|
||||
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
|
||||
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
|
||||
# Validate that files exist
|
||||
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
|
||||
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
|
||||
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
|
||||
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
|
||||
# Load raster data with terra functions
|
||||
CI <- terra::rast(path_to_week_current)$CI
|
||||
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
|
||||
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
|
||||
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
|
||||
}, error = function(e) {
|
||||
stop("Error loading raster data: ", e$message)
|
||||
})
|
||||
|
|
|
|||
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"python.REPL.enableREPLSmartSend": false
|
||||
}
|
||||
Binary file not shown.
BIN
Rplots.pdf
BIN
Rplots.pdf
Binary file not shown.
BIN
figure/sub_chunk_8433-1.png
Normal file
BIN
figure/sub_chunk_8433-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 7.9 KiB |
File diff suppressed because one or more lines are too long
1097
python_app/planet_download_with_ocm.ipynb
Normal file
1097
python_app/planet_download_with_ocm.ipynb
Normal file
File diff suppressed because it is too large
Load diff
319
python_app/planet_ocm_processor.py
Normal file
319
python_app/planet_ocm_processor.py
Normal file
|
|
@ -0,0 +1,319 @@
|
|||
import os
|
||||
import argparse
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from osgeo import gdal
|
||||
import rasterio as rio
|
||||
from rasterio.enums import Resampling
|
||||
from rasterio.warp import reproject
|
||||
from osgeo import osr
|
||||
|
||||
# Attempt to import OmniCloudMask and set a flag
|
||||
try:
|
||||
from omnicloudmask import predict_from_array, load_multiband
|
||||
HAS_OCM = True
|
||||
except ImportError:
|
||||
HAS_OCM = False
|
||||
|
||||
def calculate_utm_zone_and_hemisphere(longitude, latitude):
|
||||
"""
|
||||
Calculate the UTM zone and hemisphere based on longitude and latitude.
|
||||
"""
|
||||
utm_zone = int((longitude + 180) / 6) + 1
|
||||
is_southern = latitude < 0
|
||||
return utm_zone, is_southern
|
||||
|
||||
def reproject_to_projected_crs(input_path, output_path):
|
||||
"""
|
||||
Reprojects a raster to a projected coordinate system (e.g., UTM).
|
||||
"""
|
||||
input_ds = gdal.Open(str(input_path))
|
||||
if not input_ds:
|
||||
raise ValueError(f"Failed to open input raster: {input_path}")
|
||||
|
||||
# Get the source spatial reference
|
||||
source_srs = osr.SpatialReference()
|
||||
source_srs.ImportFromWkt(input_ds.GetProjection())
|
||||
|
||||
# Get the geographic coordinates of the image's center
|
||||
geo_transform = input_ds.GetGeoTransform()
|
||||
width = input_ds.RasterXSize
|
||||
height = input_ds.RasterYSize
|
||||
center_x = geo_transform[0] + (width / 2) * geo_transform[1]
|
||||
center_y = geo_transform[3] + (height / 2) * geo_transform[5]
|
||||
|
||||
# Calculate the UTM zone and hemisphere dynamically
|
||||
utm_zone, is_southern = calculate_utm_zone_and_hemisphere(center_x, center_y)
|
||||
|
||||
# Define the target spatial reference
|
||||
target_srs = osr.SpatialReference()
|
||||
target_srs.SetWellKnownGeogCS("WGS84")
|
||||
target_srs.SetUTM(utm_zone, is_southern)
|
||||
|
||||
# Create the warp options
|
||||
warp_options = gdal.WarpOptions(
|
||||
dstSRS=target_srs.ExportToWkt(),
|
||||
format="GTiff"
|
||||
)
|
||||
|
||||
# Perform the reprojection
|
||||
gdal.Warp(str(output_path), input_ds, options=warp_options)
|
||||
input_ds = None # Close the dataset
|
||||
print(f"Reprojected raster saved to: {output_path}")
|
||||
return output_path
|
||||
|
||||
def resample_image(input_path, output_path, resolution=(10, 10), resample_alg="bilinear"):
|
||||
"""
|
||||
Resamples a raster to a specified resolution using gdal.Translate.
|
||||
"""
|
||||
print(f"Resampling {input_path} to {resolution}m resolution -> {output_path}")
|
||||
|
||||
# Reproject the input image to a projected CRS
|
||||
reprojected_path = str(Path(output_path).with_name(f"{Path(output_path).stem}_reprojected.tif"))
|
||||
reproject_to_projected_crs(input_path, reprojected_path)
|
||||
|
||||
# Open the reprojected dataset
|
||||
input_ds = gdal.Open(reprojected_path)
|
||||
if not input_ds:
|
||||
raise ValueError(f"Failed to open reprojected raster: {reprojected_path}")
|
||||
|
||||
# Perform the resampling
|
||||
result = gdal.Translate(
|
||||
str(output_path),
|
||||
input_ds,
|
||||
xRes=resolution[0],
|
||||
yRes=resolution[1],
|
||||
resampleAlg=resample_alg
|
||||
)
|
||||
input_ds = None # Explicitly dereference the GDAL dataset
|
||||
if result is None:
|
||||
raise ValueError(f"Failed to resample image to {output_path}")
|
||||
print(f"Successfully resampled image saved to: {output_path}")
|
||||
return output_path
|
||||
|
||||
def run_ocm_on_image(image_path_10m, ocm_output_dir, save_mask=True):
|
||||
"""
|
||||
Processes a 10m resolution image with OmniCloudMask.
|
||||
Adapted from process_with_ocm in the notebook.
|
||||
"""
|
||||
if not HAS_OCM:
|
||||
print("OmniCloudMask not available. Please install with: pip install omnicloudmask")
|
||||
return None, None
|
||||
|
||||
image_path_10m = Path(image_path_10m)
|
||||
ocm_output_dir = Path(ocm_output_dir)
|
||||
ocm_output_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
mask_10m_path = ocm_output_dir / f"{image_path_10m.stem}_ocm_mask_10m.tif"
|
||||
|
||||
try:
|
||||
# Open the image to check dimensions
|
||||
with rio.open(image_path_10m) as src:
|
||||
width, height = src.width, src.height
|
||||
|
||||
# Check if the image is too small for OmniCloudMask
|
||||
if width < 50 or height < 50:
|
||||
print(f"Warning: Image {image_path_10m} is too small for OmniCloudMask (width: {width}, height: {height}). Skipping.")
|
||||
return None, None
|
||||
|
||||
# PlanetScope 4-band images are typically [B,G,R,NIR]
|
||||
# OCM expects [R,G,NIR] for its default model.
|
||||
# Band numbers for load_multiband are 1-based.
|
||||
# If original is B(1),G(2),R(3),NIR(4), then R=3, G=2, NIR=4
|
||||
band_order = [3, 2, 4]
|
||||
|
||||
print(f"Loading 10m image for OCM: {image_path_10m}")
|
||||
# load_multiband resamples if resample_res is different from source,
|
||||
# but here image_path_10m is already 10m.
|
||||
# We pass resample_res=None to use the image's own resolution.
|
||||
rgn_data, profile = load_multiband(
|
||||
input_path=str(image_path_10m),
|
||||
resample_res=10, # Explicitly set target resolution for OCM
|
||||
band_order=band_order
|
||||
)
|
||||
|
||||
print("Applying OmniCloudMask...")
|
||||
prediction = predict_from_array(rgn_data)
|
||||
|
||||
if save_mask:
|
||||
profile.update(count=1, dtype='uint8')
|
||||
with rio.open(mask_10m_path, 'w', **profile) as dst:
|
||||
dst.write(prediction.astype('uint8'), 1)
|
||||
print(f"Saved 10m OCM mask to: {mask_10m_path}")
|
||||
|
||||
# Summary (optional, can be removed for cleaner script output)
|
||||
n_total = prediction.size
|
||||
n_clear = np.sum(prediction == 0)
|
||||
n_thick = np.sum(prediction == 1)
|
||||
n_thin = np.sum(prediction == 2)
|
||||
n_shadow = np.sum(prediction == 3)
|
||||
print(f" OCM: Clear: {100*n_clear/n_total:.1f}%, Thick: {100*n_thick/n_total:.1f}%, Thin: {100*n_thin/n_total:.1f}%, Shadow: {100*n_shadow/n_total:.1f}%")
|
||||
|
||||
return str(mask_10m_path), profile
|
||||
except Exception as e:
|
||||
print(f"Error processing 10m image with OmniCloudMask: {str(e)}")
|
||||
return None, None
|
||||
|
||||
|
||||
def upsample_mask_to_3m(mask_10m_path, target_3m_image_path, output_3m_mask_path):
|
||||
"""
|
||||
Upsamples a 10m OCM mask to match the 3m target image.
|
||||
Adapted from upsample_mask_to_highres in the notebook.
|
||||
"""
|
||||
print(f"Upsampling 10m mask {mask_10m_path} to 3m, referencing {target_3m_image_path}")
|
||||
with rio.open(mask_10m_path) as src_mask, rio.open(target_3m_image_path) as src_img_3m:
|
||||
mask_data_10m = src_mask.read(1)
|
||||
|
||||
img_shape_3m = (src_img_3m.height, src_img_3m.width)
|
||||
img_transform_3m = src_img_3m.transform
|
||||
img_crs_3m = src_img_3m.crs
|
||||
|
||||
upsampled_mask_3m_data = np.zeros(img_shape_3m, dtype=mask_data_10m.dtype)
|
||||
|
||||
reproject(
|
||||
source=mask_data_10m,
|
||||
destination=upsampled_mask_3m_data,
|
||||
src_transform=src_mask.transform,
|
||||
src_crs=src_mask.crs,
|
||||
dst_transform=img_transform_3m,
|
||||
dst_crs=img_crs_3m,
|
||||
resampling=Resampling.nearest
|
||||
)
|
||||
|
||||
profile_3m_mask = src_img_3m.profile.copy()
|
||||
profile_3m_mask.update({
|
||||
'count': 1,
|
||||
'dtype': upsampled_mask_3m_data.dtype
|
||||
})
|
||||
|
||||
with rio.open(output_3m_mask_path, 'w', **profile_3m_mask) as dst:
|
||||
dst.write(upsampled_mask_3m_data, 1)
|
||||
print(f"Upsampled 3m OCM mask saved to: {output_3m_mask_path}")
|
||||
return str(output_3m_mask_path)
|
||||
|
||||
|
||||
def apply_3m_mask_to_3m_image(image_3m_path, mask_3m_path, final_masked_output_path):
|
||||
"""
|
||||
Applies an upsampled 3m OCM mask to the original 3m image.
|
||||
Adapted from apply_upsampled_mask_to_highres in the notebook.
|
||||
"""
|
||||
print(f"Applying 3m mask {mask_3m_path} to 3m image {image_3m_path}")
|
||||
image_3m_path = Path(image_3m_path)
|
||||
mask_3m_path = Path(mask_3m_path)
|
||||
final_masked_output_path = Path(final_masked_output_path)
|
||||
final_masked_output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
with rio.open(image_3m_path) as src_img_3m, rio.open(mask_3m_path) as src_mask_3m:
|
||||
img_data_3m = src_img_3m.read()
|
||||
img_profile_3m = src_img_3m.profile.copy()
|
||||
mask_data_3m = src_mask_3m.read(1)
|
||||
|
||||
if img_data_3m.shape[1:] != mask_data_3m.shape:
|
||||
print(f"Warning: 3m image shape {img_data_3m.shape[1:]} and 3m mask shape {mask_data_3m.shape} do not match.")
|
||||
# This should ideally not happen if upsampling was correct.
|
||||
|
||||
# OCM: 0=clear, 1=thick cloud, 2=thin cloud, 3=shadow
|
||||
# We want to mask out (set to nodata) pixels where OCM is > 0
|
||||
binary_mask = np.ones_like(mask_data_3m, dtype=np.uint8)
|
||||
binary_mask[mask_data_3m > 0] = 0 # 0 for cloud/shadow, 1 for clear
|
||||
|
||||
masked_img_data_3m = img_data_3m.copy()
|
||||
nodata_val = img_profile_3m.get('nodata', 0) # Use existing nodata or 0
|
||||
|
||||
for i in range(img_profile_3m['count']):
|
||||
masked_img_data_3m[i][binary_mask == 0] = nodata_val
|
||||
|
||||
# Ensure dtype of profile matches data to be written
|
||||
# If original image was float, but nodata is int (0), rasterio might complain
|
||||
# It's safer to use the original image's dtype for the output.
|
||||
img_profile_3m.update(dtype=img_data_3m.dtype)
|
||||
|
||||
with rio.open(final_masked_output_path, 'w', **img_profile_3m) as dst:
|
||||
dst.write(masked_img_data_3m)
|
||||
|
||||
print(f"Final masked 3m image saved to: {final_masked_output_path}")
|
||||
return str(final_masked_output_path)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error applying 3m mask to 3m image: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process PlanetScope 3m imagery with OmniCloudMask.")
|
||||
parser.add_argument("input_3m_image", type=str, help="Path to the input merged 3m PlanetScope GeoTIFF image.")
|
||||
parser.add_argument("output_dir", type=str, help="Directory to save processed files (10m image, masks, final 3m masked image).")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
# Resolve paths to absolute paths immediately
|
||||
input_3m_path = Path(args.input_3m_image).resolve(strict=True)
|
||||
# output_base_dir is the directory where outputs will be saved.
|
||||
# It should exist when the script is called (created by the notebook).
|
||||
output_base_dir = Path(args.output_dir).resolve(strict=True)
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: Path resolution failed. Input image or output base directory may not exist or is not accessible: {e}")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Error resolving paths: {e}")
|
||||
return
|
||||
|
||||
# The check for input_3m_path.exists() is now covered by resolve(strict=True)
|
||||
|
||||
# Define intermediate and final file paths using absolute base paths
|
||||
intermediate_dir = output_base_dir / "intermediate_ocm_files"
|
||||
intermediate_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
image_10m_path = intermediate_dir / f"{input_3m_path.stem}_10m.tif"
|
||||
# OCM mask (10m) will be saved inside run_ocm_on_image, in a subdir of intermediate_dir
|
||||
ocm_mask_output_dir = intermediate_dir / "ocm_10m_mask_output"
|
||||
|
||||
# Upsampled OCM mask (3m)
|
||||
mask_3m_upsampled_path = intermediate_dir / f"{input_3m_path.stem}_ocm_mask_3m_upsampled.tif"
|
||||
|
||||
# Final masked image (3m)
|
||||
final_masked_3m_path = output_base_dir / f"{input_3m_path.stem}_ocm_masked_3m.tif"
|
||||
|
||||
print(f"--- Starting OCM processing for {input_3m_path.name} ---")
|
||||
print(f"Input 3m image (absolute): {input_3m_path}")
|
||||
print(f"Output base directory (absolute): {output_base_dir}")
|
||||
print(f"Intermediate 10m image path: {image_10m_path}")
|
||||
|
||||
# 1. Resample 3m input to 10m for OCM
|
||||
try:
|
||||
resample_image(input_3m_path, image_10m_path, resolution=(10, 10))
|
||||
except Exception as e:
|
||||
print(f"Failed to resample to 10m: {e}")
|
||||
return
|
||||
|
||||
# 2. Run OCM on the 10m image
|
||||
mask_10m_generated_path, _ = run_ocm_on_image(image_10m_path, ocm_mask_output_dir)
|
||||
if not mask_10m_generated_path:
|
||||
print("OCM processing failed. Exiting.")
|
||||
return
|
||||
|
||||
# 3. Upsample the 10m OCM mask to 3m
|
||||
try:
|
||||
upsample_mask_to_3m(mask_10m_generated_path, input_3m_path, mask_3m_upsampled_path)
|
||||
except Exception as e:
|
||||
print(f"Failed to upsample 10m OCM mask to 3m: {e}")
|
||||
return
|
||||
|
||||
# 4. Apply the 3m upsampled mask to the original 3m image
|
||||
try:
|
||||
apply_3m_mask_to_3m_image(input_3m_path, mask_3m_upsampled_path, final_masked_3m_path)
|
||||
except Exception as e:
|
||||
print(f"Failed to apply 3m mask to 3m image: {e}")
|
||||
return
|
||||
|
||||
print(f"--- Successfully completed OCM processing for {input_3m_path.name} ---")
|
||||
print(f"Final 3m masked output: {final_masked_3m_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not HAS_OCM:
|
||||
print("OmniCloudMask library is not installed. Please install it to run this script.")
|
||||
print("You can typically install it using: pip install omnicloudmask")
|
||||
else:
|
||||
main()
|
||||
|
|
@ -2,18 +2,17 @@
|
|||
params:
|
||||
ref: "word-styles-reference-var1.docx"
|
||||
output_file: CI_report.docx
|
||||
report_date: "2024-08-28"
|
||||
data_dir: "Chemba"
|
||||
report_date: "2024-07-18"
|
||||
data_dir: "chemba"
|
||||
mail_day: "Wednesday"
|
||||
borders: TRUE
|
||||
use_breaks: FALSE
|
||||
output:
|
||||
# html_document:
|
||||
# toc: yes
|
||||
# df_print: paged
|
||||
word_document:
|
||||
reference_docx: !expr file.path("word-styles-reference-var1.docx")
|
||||
toc: yes
|
||||
toc: no
|
||||
editor_options:
|
||||
chunk_output_type: console
|
||||
---
|
||||
|
|
@ -23,7 +22,6 @@ editor_options:
|
|||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
|
||||
|
||||
# Environment setup notes (commented out)
|
||||
# # Activeer de renv omgeving
|
||||
|
|
@ -38,26 +36,21 @@ use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum
|
|||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
|
||||
# Path management
|
||||
library(here)
|
||||
|
||||
# Spatial data libraries
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
# library(raster) - Removed as it's no longer maintained
|
||||
|
||||
# Data manipulation and visualization
|
||||
library(tidyverse) # Includes dplyr, ggplot2, etc.
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
|
||||
# Machine learning
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
# Load all packages at once with suppressPackageStartupMessages
|
||||
suppressPackageStartupMessages({
|
||||
library(here)
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
library(tidyverse)
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
})
|
||||
|
||||
# Load custom utility functions
|
||||
tryCatch({
|
||||
|
|
@ -127,9 +120,6 @@ if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_
|
|||
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
|
||||
}
|
||||
|
||||
# Generate subtitle for report
|
||||
subtitle_var <- paste("Report generated on", Sys.Date())
|
||||
|
||||
# Calculate week numbers for previous weeks
|
||||
week_minus_1 <- week - 1
|
||||
week_minus_2 <- week - 2
|
||||
|
|
@ -217,7 +207,95 @@ tryCatch({
|
|||
})
|
||||
```
|
||||
|
||||
`r subtitle_var`
|
||||
```{r create_front_page_variables, include=FALSE}
|
||||
# Create variables for the front page
|
||||
farm_name <- stringr::str_to_title(gsub("_", " ", project_dir))
|
||||
|
||||
# Format dates for display
|
||||
report_date_formatted <- format(as.Date(report_date), "%B %d, %Y")
|
||||
current_year <- format(Sys.Date(), "%Y")
|
||||
|
||||
# Get total field count and area if available
|
||||
tryCatch({
|
||||
total_fields <- length(unique(AllPivots0$field))
|
||||
total_area_ha <- round(sum(sf::st_area(AllPivots0)) / 10000, 1) # Convert to hectares
|
||||
}, error = function(e) {
|
||||
total_fields <- "N/A"
|
||||
total_area_ha <- "N/A"
|
||||
})
|
||||
```
|
||||
|
||||
---
|
||||
title: ""
|
||||
---
|
||||
|
||||
```{=openxml}
|
||||
<w:p>
|
||||
<w:pPr>
|
||||
<w:jc w:val="center"/>
|
||||
<w:spacing w:after="720"/>
|
||||
</w:pPr>
|
||||
<w:r>
|
||||
<w:rPr>
|
||||
<w:sz w:val="48"/>
|
||||
<w:b/>
|
||||
</w:rPr>
|
||||
<w:t>SUGARCANE CROP MONITORING REPORT</w:t>
|
||||
</w:r>
|
||||
</w:p>
|
||||
```
|
||||
|
||||
<div style="text-align: center; margin-top: 2cm; margin-bottom: 2cm;">
|
||||
|
||||
**`r farm_name`**
|
||||
|
||||
**Chlorophyll Index Analysis**
|
||||
|
||||
Report Date: **`r report_date_formatted`**
|
||||
|
||||
---
|
||||
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 3cm; margin-bottom: 2cm;">
|
||||
|
||||
## Report Summary
|
||||
|
||||
**Farm Location:** `r farm_name`
|
||||
**Report Period:** Week `r week` of `r current_year`
|
||||
**Data Source:** Planet Labs Satellite Imagery
|
||||
**Analysis Type:** Chlorophyll Index (CI) Monitoring
|
||||
|
||||
**Field Coverage:**
|
||||
- Total Fields Monitored: `r total_fields`
|
||||
- Total Area: `r total_area_ha` hectares
|
||||
|
||||
**Report Generated:** `r format(Sys.Date(), "%B %d, %Y at %H:%M")`
|
||||
|
||||
---
|
||||
|
||||
## About This Report
|
||||
|
||||
This automated report provides weekly analysis of sugarcane crop health using satellite-derived Chlorophyll Index (CI) measurements. The analysis helps identify:
|
||||
|
||||
- Field-level crop health variations
|
||||
- Weekly changes in crop vigor
|
||||
- Areas requiring agricultural attention
|
||||
- Growth patterns across different field sections
|
||||
|
||||
**Key Features:**
|
||||
- High-resolution satellite imagery analysis
|
||||
- Week-over-week change detection
|
||||
- Individual field performance metrics
|
||||
- Actionable insights for crop management
|
||||
|
||||
</div>
|
||||
|
||||
\pagebreak
|
||||
|
||||
<!-- Original content starts here -->
|
||||
|
||||
|
||||
|
||||
\pagebreak
|
||||
# Explanation of the Report
|
||||
|
|
@ -253,34 +331,104 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
|
|||
Use these insights to identify areas that may need irrigation, fertilization, or other interventions, and to track the effectiveness of your management practices over time.
|
||||
|
||||
\pagebreak
|
||||
# Chlorophyll Index (CI) Overview Map - Current Week
|
||||
```{r render_ci_overview_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE}
|
||||
# Create overview chlorophyll index map
|
||||
# RGB Satellite Image - Current Week (if available)
|
||||
```{r render_rgb_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
|
||||
# Check if RGB bands are available and create RGB map
|
||||
tryCatch({
|
||||
# Base shape
|
||||
map <- tmap::tm_shape(CI, unit = "m")
|
||||
# Load the full raster to check available bands
|
||||
full_raster <- terra::rast(path_to_week_current)
|
||||
available_bands <- names(full_raster)
|
||||
|
||||
# Add raster layer with either breaks or continuous spectrum based on parameter
|
||||
if (use_breaks) {
|
||||
map <- map + tmap::tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf),
|
||||
palette = "RdYlGn",
|
||||
midpoint = NA,
|
||||
legend.is.portrait = FALSE,
|
||||
title = "Chlorophyll Index (CI)")
|
||||
} else {
|
||||
map <- map + tmap::tm_raster(palette = "RdYlGn",
|
||||
style = "cont",
|
||||
midpoint = NA,
|
||||
legend.is.portrait = FALSE,
|
||||
title = "Chlorophyll Index (CI)")
|
||||
# Check if RGB bands are available (look for red, green, blue or similar naming)
|
||||
rgb_bands_available <- any(grepl("red|Red|RED", available_bands, ignore.case = TRUE)) &&
|
||||
any(grepl("green|Green|GREEN", available_bands, ignore.case = TRUE)) &&
|
||||
any(grepl("blue|Blue|BLUE", available_bands, ignore.case = TRUE))
|
||||
|
||||
# Alternative check for numbered bands that might be RGB (e.g., band_1, band_2, band_3)
|
||||
if (!rgb_bands_available && length(available_bands) >= 3) {
|
||||
# Check if we have at least 3 bands that could potentially be RGB
|
||||
potential_rgb_bands <- grep("band_[1-3]|B[1-3]|[1-3]", available_bands, ignore.case = TRUE)
|
||||
rgb_bands_available <- length(potential_rgb_bands) >= 3
|
||||
}
|
||||
|
||||
if (rgb_bands_available) {
|
||||
safe_log("RGB bands detected - creating RGB visualization")
|
||||
|
||||
# Try to extract RGB bands (prioritize named bands first)
|
||||
red_band <- NULL
|
||||
green_band <- NULL
|
||||
blue_band <- NULL
|
||||
|
||||
# Look for named RGB bands first
|
||||
red_candidates <- grep("red|Red|RED", available_bands, ignore.case = TRUE, value = TRUE)
|
||||
green_candidates <- grep("green|Green|GREEN", available_bands, ignore.case = TRUE, value = TRUE)
|
||||
blue_candidates <- grep("blue|Blue|BLUE", available_bands, ignore.case = TRUE, value = TRUE)
|
||||
|
||||
if (length(red_candidates) > 0) red_band <- red_candidates[1]
|
||||
if (length(green_candidates) > 0) green_band <- green_candidates[1]
|
||||
if (length(blue_candidates) > 0) blue_band <- blue_candidates[1]
|
||||
|
||||
# Fallback to numbered bands if named bands not found
|
||||
if (is.null(red_band) || is.null(green_band) || is.null(blue_band)) {
|
||||
if (length(available_bands) >= 3) {
|
||||
# Assume first 3 bands are RGB (common convention)
|
||||
red_band <- available_bands[1]
|
||||
green_band <- available_bands[2]
|
||||
blue_band <- available_bands[3]
|
||||
}
|
||||
}
|
||||
|
||||
if (!is.null(red_band) && !is.null(green_band) && !is.null(blue_band)) {
|
||||
# Extract RGB bands
|
||||
rgb_raster <- c(full_raster[[red_band]], full_raster[[green_band]], full_raster[[blue_band]])
|
||||
names(rgb_raster) <- c("red", "green", "blue")
|
||||
|
||||
# Create RGB map
|
||||
map <- tmap::tm_shape(rgb_raster, unit = "m") +
|
||||
tmap::tm_rgb() +
|
||||
tmap::tm_scalebar(position = c("right", "bottom"), text.color = "white") +
|
||||
tmap::tm_compass(position = c("right", "bottom"), text.color = "white") +
|
||||
tmap::tm_shape(AllPivots0) +
|
||||
tmap::tm_borders(col = "white", lwd = 2) +
|
||||
tmap::tm_text("sub_field", size = 0.6, col = "white") +
|
||||
tmap::tm_layout(main.title = paste0("RGB Satellite Image - Week ", week),
|
||||
main.title.size = 0.8,
|
||||
main.title.color = "black")
|
||||
|
||||
# Print the map
|
||||
print(map)
|
||||
|
||||
safe_log("RGB map created successfully")
|
||||
} else {
|
||||
safe_log("Could not identify RGB bands despite detection", "WARNING")
|
||||
cat("RGB bands detected but could not be properly identified. Skipping RGB visualization.\n")
|
||||
}
|
||||
} else {
|
||||
safe_log("No RGB bands available in the current week mosaic")
|
||||
cat("**Note:** RGB satellite imagery is not available for this week. Only spectral index data is available.\n\n")
|
||||
}
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating RGB map:", e$message), "ERROR")
|
||||
cat("**Note:** Could not create RGB visualization for this week.\n\n")
|
||||
})
|
||||
```
|
||||
|
||||
\pagebreak
|
||||
# Chlorophyll Index (CI) Overview Map - Current Week
|
||||
```{r render_ci_overview_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
|
||||
# Create overview chlorophyll index map
|
||||
tryCatch({ # Base shape
|
||||
map <- tmap::tm_shape(CI, unit = "m") # Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison)
|
||||
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
|
||||
limits = c(1, 8)),
|
||||
col.legend = tm_legend(title = "Chlorophyll Index (CI)",
|
||||
orientation = "landscape",
|
||||
position = tm_pos_out("center", "bottom")))
|
||||
|
||||
# Complete the map with layout and other elements
|
||||
map <- map + tmap::tm_layout(legend.outside = TRUE,
|
||||
legend.outside.position = "bottom",
|
||||
legend.show = TRUE) +
|
||||
tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
map <- map +
|
||||
tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_compass(position = c("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_shape(AllPivots0) +
|
||||
tmap::tm_borders(col = "black") +
|
||||
tmap::tm_text("sub_field", size = 0.6, col = "black")
|
||||
|
|
@ -293,36 +441,24 @@ tryCatch({
|
|||
text(1, 1, "Error creating CI overview map", cex=1.5)
|
||||
})
|
||||
```
|
||||
\newpage
|
||||
\pagebreak
|
||||
|
||||
# Weekly Chlorophyll Index Difference Map
|
||||
```{r render_ci_difference_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE}
|
||||
```{r render_ci_difference_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
|
||||
# Create chlorophyll index difference map
|
||||
tryCatch({
|
||||
# Base shape
|
||||
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m")
|
||||
|
||||
# Add raster layer with either breaks or continuous spectrum based on parameter
|
||||
if (use_breaks) {
|
||||
map <- map + tmap::tm_raster(breaks = c(-3,-2,-1,0,1,2,3),
|
||||
palette = "RdYlGn",
|
||||
tryCatch({ # Base shape
|
||||
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m") # Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale)
|
||||
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
|
||||
midpoint = 0,
|
||||
legend.is.portrait = FALSE,
|
||||
title = "Chlorophyll Index (CI) Change")
|
||||
} else {
|
||||
map <- map + tmap::tm_raster(palette = "RdYlGn",
|
||||
style = "cont",
|
||||
midpoint = 0,
|
||||
legend.is.portrait = FALSE,
|
||||
title = "Chlorophyll Index (CI) Change")
|
||||
}
|
||||
limits = c(-3, 3)),
|
||||
col.legend = tm_legend(title = "Chlorophyll Index (CI) Change",
|
||||
orientation = "landscape",
|
||||
position = tm_pos_out("center", "bottom")))
|
||||
|
||||
# Complete the map with layout and other elements
|
||||
map <- map + tmap::tm_layout(legend.outside = TRUE,
|
||||
legend.outside.position = "bottom",
|
||||
legend.show = TRUE) +
|
||||
tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
|
||||
map <- map +
|
||||
tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_compass(position = c("right", "bottom"), text.color = "black") +
|
||||
tmap::tm_shape(AllPivots0) +
|
||||
tmap::tm_borders(col = "black") +
|
||||
tmap::tm_text("sub_field", size = 0.6, col = "black")
|
||||
|
|
@ -335,8 +471,8 @@ tryCatch({
|
|||
text(1, 1, "Error creating CI difference map", cex=1.5)
|
||||
})
|
||||
```
|
||||
\newpage
|
||||
\newpage
|
||||
\pagebreak
|
||||
|
||||
|
||||
```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
|
||||
# Generate detailed visualizations for each field
|
||||
|
|
@ -347,7 +483,7 @@ tryCatch({
|
|||
dplyr::summarise(.groups = 'drop')
|
||||
|
||||
# Generate plots for each field
|
||||
purrr::walk(AllPivots_merged$field, function(field_name) {
|
||||
purrr::walk(AllPivots_merged$field[1:5], function(field_name) {
|
||||
tryCatch({
|
||||
cat("\n") # Add an empty line for better spacing
|
||||
|
||||
|
|
@ -358,14 +494,12 @@ tryCatch({
|
|||
current_ci = CI,
|
||||
ci_minus_1 = CI_m1,
|
||||
ci_minus_2 = CI_m2,
|
||||
last_week_diff = last_week_dif_raster_abs,
|
||||
three_week_diff = three_week_dif_raster_abs,
|
||||
last_week_diff = last_week_dif_raster_abs, three_week_diff = three_week_dif_raster_abs,
|
||||
harvesting_data = harvesting_data,
|
||||
week = week,
|
||||
week_minus_1 = week_minus_1,
|
||||
week_minus_2 = week_minus_2,
|
||||
week_minus_3 = week_minus_3,
|
||||
use_breaks = use_breaks,
|
||||
borders = borders
|
||||
)
|
||||
|
||||
|
|
@ -513,7 +647,7 @@ tryCatch({
|
|||
|
||||
# Predict yields for the current season (focus on mature fields over 300 days)
|
||||
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
|
||||
dplyr::filter(Age_days > 300) %>%
|
||||
dplyr::filter(Age_days > 1) %>%
|
||||
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
|
||||
|
||||
safe_log("Successfully completed yield prediction calculations")
|
||||
|
|
@ -565,3 +699,7 @@ tryCatch({
|
|||
})
|
||||
```
|
||||
|
||||
|
||||
|
||||
\pagebreak
|
||||
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 152 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 180 KiB |
File diff suppressed because it is too large
Load diff
BIN
r_app/Rplots.pdf
BIN
r_app/Rplots.pdf
Binary file not shown.
|
|
@ -32,12 +32,12 @@ main <- function() {
|
|||
end_date <- as.Date(args[1])
|
||||
if (is.na(end_date)) {
|
||||
warning("Invalid end_date provided. Using default (current date).")
|
||||
#end_date <- Sys.Date()
|
||||
end_date <- "2023-10-01"
|
||||
end_date <- Sys.Date()
|
||||
#end_date <- "2023-10-01"
|
||||
}
|
||||
} else {
|
||||
#end_date <- Sys.Date()
|
||||
end_date <- "2023-10-01"
|
||||
end_date <- Sys.Date()
|
||||
#end_date <- "2023-10-01"
|
||||
}
|
||||
|
||||
# Process offset argument
|
||||
|
|
@ -58,6 +58,10 @@ main <- function() {
|
|||
project_dir <- "chemba"
|
||||
}
|
||||
|
||||
# Make project_dir available globally so parameters_project.R can use it
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
|
||||
# 3. Initialize project configuration
|
||||
# --------------------------------
|
||||
new_project_question <- FALSE
|
||||
|
|
@ -68,8 +72,10 @@ main <- function() {
|
|||
}, error = function(e) {
|
||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
||||
tryCatch({
|
||||
source("r_app/parameters_project.R")
|
||||
source("r_app/ci_extraction_utils.R")
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "ci_extraction_utils.R"))
|
||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
||||
|
||||
}, error = function(e) {
|
||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
||||
})
|
||||
|
|
@ -106,6 +112,6 @@ main <- function() {
|
|||
})
|
||||
}
|
||||
|
||||
# Run the main function if the script is executed directly
|
||||
main()
|
||||
|
||||
if (sys.nframe() == 0) {
|
||||
main()
|
||||
}
|
||||
572
r_app/experiments/ci_extraction_and_yield_prediction.R
Normal file
572
r_app/experiments/ci_extraction_and_yield_prediction.R
Normal file
|
|
@ -0,0 +1,572 @@
|
|||
# CI_EXTRACTION_AND_YIELD_PREDICTION.R
|
||||
# =====================================
|
||||
#
|
||||
# This standalone script demonstrates:
|
||||
# 1. How Chlorophyll Index (CI) is extracted from satellite imagery
|
||||
# 2. How yield prediction is performed based on CI values
|
||||
#
|
||||
# Created for sharing with colleagues to illustrate the core functionality
|
||||
# of the SmartCane monitoring system.
|
||||
#
|
||||
|
||||
# -----------------------------
|
||||
# PART 1: LIBRARY DEPENDENCIES
|
||||
# -----------------------------
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
# Spatial data processing
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
|
||||
# Data manipulation
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(here)
|
||||
|
||||
# Machine learning for yield prediction
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
})
|
||||
|
||||
# ----------------------------------
|
||||
# PART 2: LOGGING & UTILITY FUNCTIONS
|
||||
# ----------------------------------
|
||||
|
||||
#' Safe logging function that works in any environment
|
||||
#'
|
||||
#' @param message The message to log
|
||||
#' @param level The log level (default: "INFO")
|
||||
#' @return NULL (used for side effects)
|
||||
#'
|
||||
safe_log <- function(message, level = "INFO") {
|
||||
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
||||
formatted_msg <- paste0("[", timestamp, "][", level, "] ", message)
|
||||
|
||||
if (level %in% c("ERROR", "WARNING")) {
|
||||
warning(formatted_msg)
|
||||
} else {
|
||||
message(formatted_msg)
|
||||
}
|
||||
}
|
||||
|
||||
#' Generate a sequence of dates for processing
|
||||
#'
|
||||
#' @param end_date The end date for the sequence (Date object)
|
||||
#' @param offset Number of days to look back from end_date
|
||||
#' @return A list containing week number, year, and a sequence of dates for filtering
|
||||
#'
|
||||
date_list <- function(end_date, offset) {
|
||||
# Input validation
|
||||
if (!lubridate::is.Date(end_date)) {
|
||||
end_date <- as.Date(end_date)
|
||||
if (is.na(end_date)) {
|
||||
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
|
||||
}
|
||||
}
|
||||
|
||||
offset <- as.numeric(offset)
|
||||
if (is.na(offset) || offset < 1) {
|
||||
stop("Invalid offset provided. Expected a positive number.")
|
||||
}
|
||||
|
||||
# Calculate date range
|
||||
offset <- offset - 1 # Adjust offset to include end_date
|
||||
start_date <- end_date - lubridate::days(offset)
|
||||
|
||||
# Extract week and year information
|
||||
week <- lubridate::week(start_date)
|
||||
year <- lubridate::year(start_date)
|
||||
|
||||
# Generate sequence of dates
|
||||
days_filter <- seq(from = start_date, to = end_date, by = "day")
|
||||
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
|
||||
|
||||
# Log the date range
|
||||
safe_log(paste("Date range generated from", start_date, "to", end_date))
|
||||
|
||||
return(list(
|
||||
"week" = week,
|
||||
"year" = year,
|
||||
"days_filter" = days_filter,
|
||||
"start_date" = start_date,
|
||||
"end_date" = end_date
|
||||
))
|
||||
}
|
||||
|
||||
# -----------------------------
|
||||
# PART 3: CI EXTRACTION PROCESS
|
||||
# -----------------------------
|
||||
|
||||
#' Find satellite imagery files within a specific date range
|
||||
#'
|
||||
#' @param image_folder Path to the folder containing satellite images
|
||||
#' @param date_filter Vector of dates to filter by (in YYYY-MM-DD format)
|
||||
#' @return Vector of file paths matching the date filter
|
||||
#'
|
||||
find_satellite_images <- function(image_folder, date_filter) {
|
||||
# Validate inputs
|
||||
if (!dir.exists(image_folder)) {
|
||||
stop(paste("Image folder not found:", image_folder))
|
||||
}
|
||||
|
||||
# List all files in the directory
|
||||
all_files <- list.files(image_folder, pattern = "\\.tif$", full.names = TRUE, recursive = TRUE)
|
||||
|
||||
if (length(all_files) == 0) {
|
||||
safe_log("No TIF files found in the specified directory", "WARNING")
|
||||
return(character(0))
|
||||
}
|
||||
|
||||
# Filter files by date pattern in filename
|
||||
filtered_files <- character(0)
|
||||
|
||||
for (date in date_filter) {
|
||||
# Format date for matching (remove dashes)
|
||||
date_pattern <- gsub("-", "", date)
|
||||
|
||||
# Find files with matching date pattern
|
||||
matching_files <- all_files[grepl(date_pattern, all_files)]
|
||||
|
||||
if (length(matching_files) > 0) {
|
||||
filtered_files <- c(filtered_files, matching_files)
|
||||
safe_log(paste("Found", length(matching_files), "files for date", date))
|
||||
}
|
||||
}
|
||||
|
||||
return(filtered_files)
|
||||
}
|
||||
|
||||
#' Create a Chlorophyll Index (CI) from satellite imagery
|
||||
#'
|
||||
#' @param raster_obj A SpatRaster object with Red, Green, Blue, and NIR bands
|
||||
#' @return A SpatRaster object with a CI band
|
||||
#'
|
||||
calculate_ci <- function(raster_obj) {
|
||||
# Validate input has required bands
|
||||
if (terra::nlyr(raster_obj) < 4) {
|
||||
stop("Raster must have at least 4 bands (Red, Green, Blue, NIR)")
|
||||
}
|
||||
|
||||
# Extract bands (assuming standard order: B, G, R, NIR)
|
||||
blue_band <- raster_obj[[1]]
|
||||
green_band <- raster_obj[[2]]
|
||||
red_band <- raster_obj[[3]]
|
||||
nir_band <- raster_obj[[4]]
|
||||
|
||||
# CI formula: (NIR / Red) - 1
|
||||
# This highlights chlorophyll content in vegetation
|
||||
ci_raster <- (nir_band / red_band) - 1
|
||||
|
||||
# Filter extreme values that may result from division operations
|
||||
ci_raster[ci_raster > 10] <- 10 # Cap max value
|
||||
ci_raster[ci_raster < 0] <- 0 # Cap min value
|
||||
|
||||
# Name the layer
|
||||
names(ci_raster) <- "CI"
|
||||
|
||||
return(ci_raster)
|
||||
}
|
||||
|
||||
#' Create a mask for cloudy pixels and shadows using thresholds
|
||||
#'
|
||||
#' @param raster_obj A SpatRaster object with multiple bands
|
||||
#' @return A binary mask where 1=clear pixel, 0=cloudy or shadow pixel
|
||||
#'
|
||||
create_cloud_mask <- function(raster_obj) {
|
||||
# Extract bands
|
||||
blue_band <- raster_obj[[1]]
|
||||
green_band <- raster_obj[[2]]
|
||||
red_band <- raster_obj[[3]]
|
||||
nir_band <- raster_obj[[4]]
|
||||
|
||||
# Create initial mask (all pixels valid)
|
||||
mask <- blue_band * 0 + 1
|
||||
|
||||
# Calculate indices used for detection
|
||||
ndvi <- (nir_band - red_band) / (nir_band + red_band)
|
||||
brightness <- (blue_band + green_band + red_band) / 3
|
||||
|
||||
# CLOUD DETECTION CRITERIA
|
||||
# ------------------------
|
||||
# Clouds are typically very bright in all bands
|
||||
bright_pixels <- (blue_band > 0.3) & (green_band > 0.3) & (red_band > 0.3)
|
||||
|
||||
# Snow/high reflectance clouds have high blue values
|
||||
blue_dominant <- blue_band > (red_band * 1.2)
|
||||
|
||||
# Low NDVI areas that are bright are likely clouds
|
||||
low_ndvi <- ndvi < 0.1
|
||||
|
||||
# Combine cloud criteria
|
||||
cloud_pixels <- bright_pixels & (blue_dominant | low_ndvi)
|
||||
|
||||
# SHADOW DETECTION CRITERIA
|
||||
# ------------------------
|
||||
# Shadows typically have:
|
||||
# 1. Low overall brightness across all bands
|
||||
# 2. Lower NIR reflectance
|
||||
# 3. Can still have reasonable NDVI (if over vegetation)
|
||||
|
||||
# Dark pixels in visible spectrum
|
||||
dark_pixels <- brightness < 0.1
|
||||
|
||||
# Low NIR reflectance
|
||||
low_nir <- nir_band < 0.15
|
||||
|
||||
# Shadows often have higher blue proportion relative to NIR
|
||||
blue_nir_ratio <- blue_band / (nir_band + 0.01) # Add small constant to avoid division by zero
|
||||
blue_enhanced <- blue_nir_ratio > 0.8
|
||||
|
||||
# Combine shadow criteria
|
||||
shadow_pixels <- dark_pixels & (low_nir | blue_enhanced)
|
||||
|
||||
# Update mask (0 for cloud or shadow pixels)
|
||||
mask[cloud_pixels | shadow_pixels] <- 0
|
||||
|
||||
# Optional: create different values for clouds vs shadows for visualization
|
||||
# mask[cloud_pixels] <- 0 # Clouds
|
||||
# mask[shadow_pixels] <- 0 # Shadows
|
||||
|
||||
return(mask)
|
||||
}
|
||||
|
||||
#' Process satellite image, calculate CI, and crop to field boundaries
|
||||
#'
|
||||
#' @param file Path to the satellite image file
|
||||
#' @param field_boundaries Field boundaries vector object
|
||||
#' @param output_dir Directory to save the processed raster
|
||||
#' @return Path to the processed raster file
|
||||
#'
|
||||
process_satellite_image <- function(file, field_boundaries, output_dir) {
|
||||
# Validate inputs
|
||||
if (!file.exists(file)) {
|
||||
stop(paste("File not found:", file))
|
||||
}
|
||||
|
||||
if (is.null(field_boundaries)) {
|
||||
stop("Field boundaries are required but were not provided")
|
||||
}
|
||||
|
||||
# Create output filename
|
||||
basename_no_ext <- tools::file_path_sans_ext(basename(file))
|
||||
output_file <- here::here(output_dir, paste0(basename_no_ext, "_CI.tif"))
|
||||
|
||||
# Process with error handling
|
||||
tryCatch({
|
||||
# Load and prepare raster
|
||||
loaded_raster <- terra::rast(file)
|
||||
|
||||
# Calculate CI
|
||||
ci_raster <- calculate_ci(loaded_raster)
|
||||
|
||||
# Create cloud mask
|
||||
cloud_mask <- create_cloud_mask(loaded_raster)
|
||||
|
||||
# Apply cloud mask to CI
|
||||
ci_masked <- ci_raster * cloud_mask
|
||||
|
||||
# Crop to field boundaries extent (for efficiency)
|
||||
field_extent <- terra::ext(field_boundaries)
|
||||
ci_cropped <- terra::crop(ci_masked, field_extent)
|
||||
|
||||
# Write output
|
||||
terra::writeRaster(ci_cropped, output_file, overwrite = TRUE)
|
||||
|
||||
safe_log(paste("Successfully processed", basename(file)))
|
||||
|
||||
return(output_file)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error processing", basename(file), ":", e$message), "ERROR")
|
||||
return(NULL)
|
||||
})
|
||||
}
|
||||
|
||||
#' Extract CI statistics for each field
|
||||
#'
|
||||
#' @param ci_raster A SpatRaster with CI values
|
||||
#' @param field_boundaries An sf object with field polygons
|
||||
#' @return A data frame with CI statistics by field
|
||||
#'
|
||||
extract_ci_by_field <- function(ci_raster, field_boundaries) {
|
||||
# Validate inputs
|
||||
if (is.null(ci_raster)) {
|
||||
stop("CI raster is required but was NULL")
|
||||
}
|
||||
|
||||
if (is.null(field_boundaries) || nrow(field_boundaries) == 0) {
|
||||
stop("Field boundaries are required but were empty")
|
||||
}
|
||||
|
||||
# Extract statistics using exact extraction (weighted by coverage)
|
||||
ci_stats <- exactextractr::exact_extract(
|
||||
ci_raster,
|
||||
field_boundaries,
|
||||
fun = c("mean", "median", "min", "max", "stdev", "count"),
|
||||
progress = FALSE
|
||||
)
|
||||
|
||||
# Add field identifiers
|
||||
ci_stats$field <- field_boundaries$field
|
||||
if ("sub_field" %in% names(field_boundaries)) {
|
||||
ci_stats$sub_field <- field_boundaries$sub_field
|
||||
} else {
|
||||
ci_stats$sub_field <- field_boundaries$field
|
||||
}
|
||||
|
||||
# Add date info
|
||||
ci_stats$date <- Sys.Date()
|
||||
|
||||
# Clean up names
|
||||
names(ci_stats) <- gsub("CI\\.", "", names(ci_stats))
|
||||
|
||||
return(ci_stats)
|
||||
}
|
||||
|
||||
# -----------------------------------------
|
||||
# PART 4: YIELD PREDICTION IMPLEMENTATION
|
||||
# -----------------------------------------
|
||||
|
||||
#' Prepare data for yield prediction model
|
||||
#'
|
||||
#' @param ci_data Data frame with cumulative CI values
|
||||
#' @param harvest_data Data frame with harvest information
|
||||
#' @return Data frame ready for modeling
|
||||
#'
|
||||
prepare_yield_prediction_data <- function(ci_data, harvest_data) {
|
||||
# Join CI and yield data
|
||||
ci_and_yield <- dplyr::left_join(ci_data, harvest_data, by = c("field", "sub_field", "season")) %>%
|
||||
dplyr::group_by(sub_field, season) %>%
|
||||
dplyr::slice(which.max(DOY)) %>%
|
||||
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
|
||||
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
|
||||
|
||||
# Split into training and prediction sets
|
||||
ci_and_yield_train <- ci_and_yield %>%
|
||||
as.data.frame() %>%
|
||||
dplyr::filter(!is.na(tonnage_ha))
|
||||
|
||||
prediction_yields <- ci_and_yield %>%
|
||||
as.data.frame() %>%
|
||||
dplyr::filter(is.na(tonnage_ha))
|
||||
|
||||
return(list(
|
||||
train = ci_and_yield_train,
|
||||
predict = prediction_yields
|
||||
))
|
||||
}
|
||||
|
||||
#' Train a random forest model for yield prediction
|
||||
#'
|
||||
#' @param training_data Data frame with training data
|
||||
#' @param predictors Vector of predictor variable names
|
||||
#' @param response Name of the response variable
|
||||
#' @return Trained model
|
||||
#'
|
||||
train_yield_model <- function(training_data, predictors = c("cumulative_CI", "DOY", "CI_per_day"), response = "tonnage_ha") {
|
||||
# Configure model training parameters
|
||||
ctrl <- caret::trainControl(
|
||||
method = "cv",
|
||||
savePredictions = TRUE,
|
||||
allowParallel = TRUE,
|
||||
number = 5,
|
||||
verboseIter = TRUE
|
||||
)
|
||||
|
||||
# Train the model with feature selection
|
||||
set.seed(202) # For reproducibility
|
||||
model_ffs_rf <- CAST::ffs(
|
||||
training_data[, predictors],
|
||||
training_data[, response],
|
||||
method = "rf",
|
||||
trControl = ctrl,
|
||||
importance = TRUE,
|
||||
withinSE = TRUE,
|
||||
tuneLength = 5,
|
||||
na.rm = TRUE
|
||||
)
|
||||
|
||||
return(model_ffs_rf)
|
||||
}
|
||||
|
||||
#' Format predictions into a clean data frame
|
||||
#'
|
||||
#' @param predictions Raw prediction results
|
||||
#' @param newdata Original data frame with field information
|
||||
#' @return Formatted predictions data frame
|
||||
#'
|
||||
prepare_predictions <- function(predictions, newdata) {
|
||||
return(predictions %>%
|
||||
as.data.frame() %>%
|
||||
dplyr::rename(predicted_Tcha = ".") %>%
|
||||
dplyr::mutate(
|
||||
sub_field = newdata$sub_field,
|
||||
field = newdata$field,
|
||||
Age_days = newdata$DOY,
|
||||
total_CI = round(newdata$cumulative_CI, 0),
|
||||
predicted_Tcha = round(predicted_Tcha, 0),
|
||||
season = newdata$season
|
||||
) %>%
|
||||
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
|
||||
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
|
||||
)
|
||||
}
|
||||
|
||||
#' Predict yields for mature fields
|
||||
#'
|
||||
#' @param model Trained model
|
||||
#' @param prediction_data Data frame with fields to predict
|
||||
#' @param min_age Minimum age in days to qualify as mature (default: 300)
|
||||
#' @return Data frame with yield predictions
|
||||
#'
|
||||
predict_yields <- function(model, prediction_data, min_age = 300) {
|
||||
# Make predictions
|
||||
predictions <- stats::predict(model, newdata = prediction_data)
|
||||
|
||||
# Format predictions
|
||||
pred_formatted <- prepare_predictions(predictions, prediction_data) %>%
|
||||
dplyr::filter(Age_days > min_age) %>%
|
||||
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
|
||||
|
||||
return(pred_formatted)
|
||||
}
|
||||
|
||||
# ------------------------------
|
||||
# PART 5: DEMONSTRATION WORKFLOW
|
||||
# ------------------------------
|
||||
|
||||
#' Demonstration workflow showing how to use the functions
|
||||
#'
|
||||
#' @param end_date The end date for processing satellite images
|
||||
#' @param offset Number of days to look back
|
||||
#' @param image_folder Path to the folder containing satellite images
|
||||
#' @param field_boundaries_path Path to field boundaries shapefile
|
||||
#' @param output_dir Path to save processed outputs
|
||||
#' @param harvest_data_path Path to historical harvest data
|
||||
#'
|
||||
demo_workflow <- function(end_date = Sys.Date(), offset = 7,
|
||||
image_folder = "path/to/satellite/images",
|
||||
field_boundaries_path = "path/to/field_boundaries.shp",
|
||||
output_dir = "path/to/output",
|
||||
harvest_data_path = "path/to/harvest_data.csv") {
|
||||
|
||||
# Step 1: Generate date list for processing
|
||||
dates <- date_list(end_date, offset)
|
||||
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
|
||||
|
||||
# Step 2: Load field boundaries
|
||||
field_boundaries <- sf::read_sf(field_boundaries_path)
|
||||
safe_log(paste("Loaded", nrow(field_boundaries), "field boundaries"))
|
||||
|
||||
# Step 3: Find satellite images for the specified date range
|
||||
image_files <- find_satellite_images(image_folder, dates$days_filter)
|
||||
safe_log(paste("Found", length(image_files), "satellite images for processing"))
|
||||
|
||||
# Step 4: Process each satellite image and calculate CI
|
||||
ci_files <- list()
|
||||
for (file in image_files) {
|
||||
ci_file <- process_satellite_image(file, field_boundaries, output_dir)
|
||||
if (!is.null(ci_file)) {
|
||||
ci_files <- c(ci_files, ci_file)
|
||||
}
|
||||
}
|
||||
|
||||
# Step 5: Extract CI statistics for each field
|
||||
ci_stats_list <- list()
|
||||
for (ci_file in ci_files) {
|
||||
ci_raster <- terra::rast(ci_file)
|
||||
ci_stats <- extract_ci_by_field(ci_raster, field_boundaries)
|
||||
ci_stats_list[[basename(ci_file)]] <- ci_stats
|
||||
}
|
||||
|
||||
# Combine all stats
|
||||
all_ci_stats <- dplyr::bind_rows(ci_stats_list)
|
||||
safe_log(paste("Extracted CI statistics for", nrow(all_ci_stats), "field-date combinations"))
|
||||
|
||||
# Step 6: Prepare for yield prediction
|
||||
if (file.exists(harvest_data_path)) {
|
||||
# Load harvest data
|
||||
harvest_data <- read.csv(harvest_data_path)
|
||||
safe_log("Loaded harvest data for yield prediction")
|
||||
|
||||
# Make up cumulative_CI data for demonstration purposes
|
||||
# In a real scenario, this would come from accumulating CI values over time
|
||||
ci_data <- all_ci_stats %>%
|
||||
dplyr::group_by(field, sub_field) %>%
|
||||
dplyr::summarise(
|
||||
cumulative_CI = sum(mean, na.rm = TRUE),
|
||||
DOY = n(), # Days of year as the count of observations
|
||||
season = lubridate::year(max(date, na.rm = TRUE)),
|
||||
.groups = "drop"
|
||||
)
|
||||
|
||||
# Prepare data for modeling
|
||||
modeling_data <- prepare_yield_prediction_data(ci_data, harvest_data)
|
||||
|
||||
if (nrow(modeling_data$train) > 0) {
|
||||
# Train yield prediction model
|
||||
yield_model <- train_yield_model(modeling_data$train)
|
||||
safe_log("Trained yield prediction model")
|
||||
|
||||
# Predict yields for mature fields
|
||||
yield_predictions <- predict_yields(yield_model, modeling_data$predict)
|
||||
safe_log(paste("Generated yield predictions for", nrow(yield_predictions), "fields"))
|
||||
|
||||
# Return results
|
||||
return(list(
|
||||
ci_stats = all_ci_stats,
|
||||
yield_predictions = yield_predictions,
|
||||
model = yield_model
|
||||
))
|
||||
} else {
|
||||
safe_log("No training data available for yield prediction", "WARNING")
|
||||
return(list(ci_stats = all_ci_stats))
|
||||
}
|
||||
} else {
|
||||
safe_log("Harvest data not found, skipping yield prediction", "WARNING")
|
||||
return(list(ci_stats = all_ci_stats))
|
||||
}
|
||||
}
|
||||
|
||||
# ------------------------------
|
||||
# PART 6: USAGE EXAMPLE
|
||||
# ------------------------------
|
||||
|
||||
# Uncomment and modify paths to run the demo workflow
|
||||
# results <- demo_workflow(
|
||||
# end_date = "2023-10-01",
|
||||
# offset = 7,
|
||||
# image_folder = "data/satellite_images",
|
||||
# field_boundaries_path = "data/field_boundaries.shp",
|
||||
# output_dir = "output/processed",
|
||||
# harvest_data_path = "data/harvest_history.csv"
|
||||
# )
|
||||
#
|
||||
# # Access results
|
||||
# ci_stats <- results$ci_stats
|
||||
# yield_predictions <- results$yield_predictions
|
||||
#
|
||||
# # Example: Plot CI distribution by field
|
||||
# if (require(ggplot2)) {
|
||||
# ggplot(ci_stats, aes(x = field, y = mean, fill = field)) +
|
||||
# geom_boxplot() +
|
||||
# labs(title = "CI Distribution by Field",
|
||||
# x = "Field",
|
||||
# y = "Mean CI") +
|
||||
# theme_minimal() +
|
||||
# theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
||||
# }
|
||||
#
|
||||
# # Example: Plot predicted yield vs age
|
||||
# if (exists("yield_predictions") && require(ggplot2)) {
|
||||
# ggplot(yield_predictions, aes(x = Age_days, y = predicted_Tcha, color = field)) +
|
||||
# geom_point(size = 3) +
|
||||
# geom_text(aes(label = field), hjust = -0.2, vjust = -0.2) +
|
||||
# labs(title = "Predicted Yield by Field Age",
|
||||
# x = "Age (Days)",
|
||||
# y = "Predicted Yield (Tonnes/ha)") +
|
||||
# theme_minimal()
|
||||
# }
|
||||
556
r_app/experiments/delete_cloud_exploratoin
Normal file
556
r_app/experiments/delete_cloud_exploratoin
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
# Cloud and Shadow Detection Analysis
|
||||
# This script analyzes cloud and shadow detection parameters using the diagnostic GeoTIFF files
|
||||
# and polygon-based classification to help optimize the detection algorithms
|
||||
|
||||
# Load required packages
|
||||
library(terra)
|
||||
library(sf)
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(reshape2)
|
||||
library(exactextractr) # For accurate polygon extraction
|
||||
|
||||
# Define diagnostic directory
|
||||
diagnostic_dir <- "C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane/cloud_mask_diagnostics_20250515-164357"
|
||||
|
||||
# Simple logging function for this standalone script
|
||||
safe_log <- function(message, level = "INFO") {
|
||||
cat(paste0("[", level, "] ", message, "\n"))
|
||||
}
|
||||
|
||||
safe_log("Starting cloud detection analysis on diagnostic rasters")
|
||||
|
||||
# Load all diagnostic rasters
|
||||
safe_log("Loading diagnostic raster files...")
|
||||
|
||||
|
||||
# Load original bands
|
||||
red_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_red_band.tif"))
|
||||
green_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_green_band.tif"))
|
||||
blue_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_blue_band.tif"))
|
||||
nir_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_nir_band.tif"))
|
||||
|
||||
# Load derived indices
|
||||
brightness <- terra::rast(file.path(diagnostic_dir, "diagnostic_brightness.tif"))
|
||||
ndvi <- terra::rast(file.path(diagnostic_dir, "diagnostic_ndvi.tif"))
|
||||
blue_ratio <- terra::rast(file.path(diagnostic_dir, "diagnostic_blue_ratio.tif"))
|
||||
green_nir_ratio <- terra::rast(file.path(diagnostic_dir, "diagnostic_green_nir_ratio.tif"))
|
||||
ndwi <- terra::rast(file.path(diagnostic_dir, "diagnostic_ndwi.tif"))
|
||||
|
||||
# Load cloud detection parameters
|
||||
bright_pixels <- terra::rast(file.path(diagnostic_dir, "param_bright_pixels.tif"))
|
||||
very_bright_pixels <- terra::rast(file.path(diagnostic_dir, "param_very_bright_pixels.tif"))
|
||||
blue_dominant <- terra::rast(file.path(diagnostic_dir, "param_blue_dominant.tif"))
|
||||
low_ndvi <- terra::rast(file.path(diagnostic_dir, "param_low_ndvi.tif"))
|
||||
green_dominant_nir <- terra::rast(file.path(diagnostic_dir, "param_green_dominant_nir.tif"))
|
||||
high_ndwi <- terra::rast(file.path(diagnostic_dir, "param_high_ndwi.tif"))
|
||||
|
||||
# Load shadow detection parameters
|
||||
dark_pixels <- terra::rast(file.path(diagnostic_dir, "param_dark_pixels.tif"))
|
||||
very_dark_pixels <- terra::rast(file.path(diagnostic_dir, "param_very_dark_pixels.tif"))
|
||||
low_nir <- terra::rast(file.path(diagnostic_dir, "param_low_nir.tif"))
|
||||
shadow_ndvi <- terra::rast(file.path(diagnostic_dir, "param_shadow_ndvi.tif"))
|
||||
low_red_to_blue <- terra::rast(file.path(diagnostic_dir, "param_low_red_to_blue.tif"))
|
||||
high_blue_to_nir_ratio <- terra::rast(file.path(diagnostic_dir, "param_high_blue_to_nir_ratio.tif"))
|
||||
blue_nir_ratio_raw <- terra::rast(file.path(diagnostic_dir, "param_blue_nir_ratio_raw.tif"))
|
||||
red_blue_ratio_raw <- terra::rast(file.path(diagnostic_dir, "param_red_blue_ratio_raw.tif"))
|
||||
|
||||
# Load edge detection parameters
|
||||
brightness_focal_sd <- terra::rast(file.path(diagnostic_dir, "param_brightness_focal_sd.tif"))
|
||||
edge_pixels <- terra::rast(file.path(diagnostic_dir, "param_edge_pixels.tif"))
|
||||
|
||||
# Load final masks
|
||||
cloud_mask <- terra::rast(file.path(diagnostic_dir, "mask_cloud.tif"))
|
||||
shadow_mask <- terra::rast(file.path(diagnostic_dir, "mask_shadow.tif"))
|
||||
combined_mask <- terra::rast(file.path(diagnostic_dir, "mask_combined.tif"))
|
||||
dilated_mask <- terra::rast(file.path(diagnostic_dir, "mask_dilated.tif"))
|
||||
|
||||
safe_log("Raster data loaded successfully")
|
||||
|
||||
# Try to read the classification polygons if they exist
|
||||
tryCatch({
|
||||
# Check if the classes.geojson file exists in the diagnostic directory
|
||||
classes_file <- file.path(diagnostic_dir, "classes.geojson")
|
||||
|
||||
# If no classes file in this directory, look for the most recent one
|
||||
if (!file.exists(classes_file)) {
|
||||
# Look in parent directory for most recent cloud_mask_diagnostics folder
|
||||
potential_dirs <- list.dirs(path = dirname(diagnostic_dir),
|
||||
full.names = TRUE,
|
||||
recursive = FALSE)
|
||||
|
||||
# Filter for diagnostic directories and find the most recent one that has classes.geojson
|
||||
diagnostic_dirs <- potential_dirs[grepl("cloud_mask_diagnostics_", potential_dirs)]
|
||||
|
||||
for (dir in rev(sort(diagnostic_dirs))) { # Reverse sort to get newest first
|
||||
potential_file <- file.path(dir, "classes.geojson")
|
||||
if (file.exists(potential_file)) {
|
||||
classes_file <- potential_file
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Check if we found a classes file
|
||||
if (file.exists(classes_file)) {
|
||||
safe_log(paste("Using classification polygons from:", classes_file))
|
||||
|
||||
# Load the classification polygons
|
||||
classifications <- sf::st_read(classes_file, quiet = TRUE) %>% rename(class = type)
|
||||
# Remove empty polygons
|
||||
classifications <- classifications[!sf::st_is_empty(classifications), ]
|
||||
|
||||
# Create a list to store all rasters we want to extract values from
|
||||
extraction_rasters <- list(
|
||||
# Original bands
|
||||
red = red_band,
|
||||
green = green_band,
|
||||
blue = blue_band,
|
||||
nir = nir_band,
|
||||
|
||||
# Derived indices
|
||||
brightness = brightness,
|
||||
ndvi = ndvi,
|
||||
blue_ratio = blue_ratio,
|
||||
green_nir_ratio = green_nir_ratio,
|
||||
ndwi = ndwi,
|
||||
|
||||
# Cloud detection parameters
|
||||
bright_pixels = terra::ifel(bright_pixels, 1, 0),
|
||||
very_bright_pixels = terra::ifel(very_bright_pixels, 1, 0),
|
||||
blue_dominant = terra::ifel(blue_dominant, 1, 0),
|
||||
low_ndvi = terra::ifel(low_ndvi, 1, 0),
|
||||
green_dominant_nir = terra::ifel(green_dominant_nir, 1, 0),
|
||||
high_ndwi = terra::ifel(high_ndwi, 1, 0),
|
||||
|
||||
# Shadow detection parameters
|
||||
dark_pixels = terra::ifel(dark_pixels, 1, 0),
|
||||
very_dark_pixels = terra::ifel(very_dark_pixels, 1, 0),
|
||||
low_nir = terra::ifel(low_nir, 1, 0),
|
||||
shadow_ndvi = terra::ifel(shadow_ndvi, 1, 0),
|
||||
low_red_to_blue = terra::ifel(low_red_to_blue, 1, 0),
|
||||
high_blue_to_nir_ratio = terra::ifel(high_blue_to_nir_ratio, 1, 0),
|
||||
blue_nir_ratio_raw = (blue_band / (nir_band + 0.01)),
|
||||
red_blue_ratio_raw = (red_band / (blue_band + 0.01)),
|
||||
|
||||
# Edge detection parameters
|
||||
brightness_focal_sd = brightness_focal_sd,
|
||||
edge_pixels = terra::ifel(edge_pixels, 1, 0),
|
||||
|
||||
# Final masks
|
||||
cloud_mask = terra::ifel(cloud_mask, 1, 0),
|
||||
shadow_mask = terra::ifel(shadow_mask, 1, 0),
|
||||
combined_mask = terra::ifel(combined_mask, 1, 0),
|
||||
dilated_mask = terra::ifel(dilated_mask, 1, 0)
|
||||
)
|
||||
|
||||
# Create a stack of all rasters
|
||||
extraction_stack <- terra::rast(extraction_rasters)
|
||||
|
||||
# User-provided simplified extraction for mean statistics per polygon
|
||||
pivot_stats_sf <- cbind(
|
||||
classifications,
|
||||
round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2)
|
||||
) %>%
|
||||
sf::st_drop_geometry()
|
||||
|
||||
# Convert to a regular data frame for easier downstream processing
|
||||
all_stats <- sf::st_drop_geometry(pivot_stats_sf)
|
||||
|
||||
# Ensure 'class_name' column exists, if not, use 'class' as 'class_name'
|
||||
if (!("class_name" %in% colnames(all_stats)) && ("class" %in% colnames(all_stats))) {
|
||||
all_stats$class_name <- all_stats$class
|
||||
|
||||
if (length(valid_class_ids) == 0) {
|
||||
safe_log("No valid (non-NA) class IDs found for exactextractr processing.", "WARNING")
|
||||
}
|
||||
|
||||
for (class_id in valid_class_ids) {
|
||||
# Subset polygons for this class
|
||||
class_polygons_sf <- classifications[which(classifications$class == class_id), ] # Use which for NA-safe subsetting
|
||||
|
||||
if (nrow(class_polygons_sf) == 0) {
|
||||
safe_log(paste("Skipping empty class (no polygons after filtering):", class_id), "WARNING")
|
||||
next
|
||||
}
|
||||
|
||||
tryCatch({
|
||||
safe_log(paste("Processing class:", class_id))
|
||||
|
||||
# Check if the polygon overlaps with the raster extent (check based on the combined extent of class polygons)
|
||||
rast_extent <- terra::ext(extraction_stack)
|
||||
poly_extent <- sf::st_bbox(class_polygons_sf)
|
||||
|
||||
if (poly_extent["xmin"] > rast_extent["xmax"] ||
|
||||
poly_extent["xmax"] < rast_extent["xmin"] ||
|
||||
poly_extent["ymin"] > rast_extent["ymax"] ||
|
||||
poly_extent["ymax"] < rast_extent["ymin"]) {
|
||||
safe_log(paste("Skipping class that doesn't overlap with raster:", class_id), "WARNING")
|
||||
next
|
||||
}
|
||||
|
||||
# exact_extract will process each feature in class_polygons_sf
|
||||
# and return a list of data frames (one per feature)
|
||||
per_polygon_stats_list <- exactextractr::exact_extract(
|
||||
extraction_stack,
|
||||
class_polygons_sf,
|
||||
function(values, coverage_fraction) {
|
||||
# Filter pixels by coverage (e.g., >50% of the pixel is covered by the polygon)
|
||||
valid_pixels_idx <- coverage_fraction > 0.5
|
||||
df_filtered <- values[valid_pixels_idx, , drop = FALSE]
|
||||
|
||||
if (nrow(df_filtered) == 0) {
|
||||
# If no pixels meet coverage, return a data frame with NAs
|
||||
# to maintain structure, matching expected column names.
|
||||
# Column names are derived from the extraction_stack
|
||||
stat_cols <- paste0(names(extraction_stack), "_mean")
|
||||
na_df <- as.data.frame(matrix(NA_real_, nrow = 1, ncol = length(stat_cols)))
|
||||
names(na_df) <- stat_cols
|
||||
return(na_df)
|
||||
}
|
||||
|
||||
# Calculate mean for each band (column in df_filtered)
|
||||
stats_per_band <- lapply(names(df_filtered), function(band_name) {
|
||||
col_data <- df_filtered[[band_name]]
|
||||
if (length(col_data) > 0 && sum(!is.na(col_data)) > 0) {
|
||||
mean_val <- mean(col_data, na.rm = TRUE)
|
||||
return(setNames(mean_val, paste0(band_name, "_mean")))
|
||||
} else {
|
||||
return(setNames(NA_real_, paste0(band_name, "_mean")))
|
||||
}
|
||||
})
|
||||
|
||||
# Combine all stats (named values) into a single named vector then data frame
|
||||
return(as.data.frame(t(do.call(c, stats_per_band))))
|
||||
},
|
||||
summarize_df = FALSE, # Important: get a list of DFs, one per polygon
|
||||
force_df = TRUE # Ensure the output of the summary function is treated as a DF
|
||||
)
|
||||
|
||||
# Combine all stats for this class if we have any
|
||||
if (length(per_polygon_stats_list) > 0) {
|
||||
# per_polygon_stats_list is now a list of single-row data.frames
|
||||
class_stats_df <- do.call(rbind, per_polygon_stats_list)
|
||||
|
||||
# Remove rows that are all NA (from polygons with no valid pixels)
|
||||
class_stats_df <- class_stats_df[rowSums(is.na(class_stats_df)) < ncol(class_stats_df), ]
|
||||
|
||||
if (nrow(class_stats_df) > 0) {
|
||||
# Add class information
|
||||
class_stats_df$class <- class_id
|
||||
# Get class_name from the first polygon (assuming it's consistent for the class_id)
|
||||
# Ensure class_polygons_sf is not empty before accessing class_name
|
||||
if ("class_name" %in% names(class_polygons_sf) && nrow(class_polygons_sf) > 0) {
|
||||
class_stats_df$class_name <- as.character(class_polygons_sf$class_name[1])
|
||||
} else {
|
||||
class_stats_df$class_name <- as.character(class_id) # Fallback
|
||||
}
|
||||
|
||||
# Add to overall results
|
||||
all_stats <- rbind(all_stats, class_stats_df)
|
||||
safe_log(paste("Successfully extracted data for", nrow(class_stats_df), "polygons in class", class_id))
|
||||
} else {
|
||||
safe_log(paste("No valid data extracted for class (after NA removal):", class_id), "WARNING")
|
||||
}
|
||||
} else {
|
||||
safe_log(paste("No data frames returned by exact_extract for class:", class_id), "WARNING")
|
||||
}
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error processing class", class_id, "with exact_extract:", e$message), "ERROR")
|
||||
})
|
||||
}
|
||||
# Save the extracted statistics to a CSV file
|
||||
if (nrow(all_stats) > 0) {
|
||||
stats_file <- file.path(diagnostic_dir, "class_spectral_stats_mean.csv") # New filename
|
||||
write.csv(all_stats, stats_file, row.names = FALSE)
|
||||
safe_log(paste("Saved MEAN spectral statistics by class to:", stats_file))
|
||||
} else {
|
||||
safe_log("No statistics were generated to save.", "WARNING")
|
||||
}
|
||||
|
||||
# Calculate optimized thresholds for cloud/shadow detection (using only _mean columns)
|
||||
if (nrow(all_stats) > 0 && ncol(all_stats) > 2) { # Check if all_stats has data and parameter columns
|
||||
threshold_results <- data.frame(
|
||||
parameter = character(),
|
||||
best_threshold = numeric(),
|
||||
direction = character(),
|
||||
target_class = character(),
|
||||
vs_class = character(),
|
||||
accuracy = numeric(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
# Define class pairs to analyze
|
||||
class_pairs <- list(
|
||||
# Cloud vs various surfaces
|
||||
c("cloud", "crop"),
|
||||
c("cloud", "bare_soil_dry"),
|
||||
c("cloud", "bare_soil_wet"),
|
||||
|
||||
# Shadow vs various surfaces
|
||||
c("shadow_over_crop", "crop"),
|
||||
c("shadow_over_bare_soil", "bare_soil_dry"),
|
||||
c("shadow_over_bare_soil", "bare_soil_wet")
|
||||
)
|
||||
|
||||
# For now, let's assume all _mean parameters derived from extraction_rasters are relevant for clouds/shadows
|
||||
# This part might need more specific logic if you want to distinguish cloud/shadow params cloud_params <- grep("_mean$", names(extraction_rasters), value = TRUE)
|
||||
params logic
|
||||
# Parameters to analyze for shadows (now only _mean versions)tatistics by class to:", stats_file))
|
||||
shadow_params <- cloud_params # Simplified: using the same set for now, adjust if specific shadow params are needed
|
||||
lds for cloud/shadow detection
|
||||
# Find optimal thresholdsframe(
|
||||
if (length(class_pairs) > 0 && (length(cloud_params) > 0 || length(shadow_params) > 0)) {
|
||||
for (pair in class_pairs) {c(),
|
||||
target_class <- pair[1](),
|
||||
vs_class <- pair[2](),
|
||||
vs_class = character(),
|
||||
# Select appropriate parameters based on whether we're analyzing clouds or shadows accuracy = numeric(),
|
||||
if (grepl("cloud", target_class)) {
|
||||
params_to_check <- cloud_params
|
||||
} else {
|
||||
params_to_check <- shadow_paramsto analyze
|
||||
}
|
||||
|
||||
# For each parameter, find the best threshold to separate the classesc("cloud", "crop"),
|
||||
for (param in params_to_check) {
|
||||
if (param %in% colnames(all_stats)) {
|
||||
# Get values for both classes
|
||||
target_values <- all_stats[all_stats$class_name == target_class, param]
|
||||
vs_values <- all_stats[all_stats$class_name == vs_class, param] c("shadow_over_crop", "crop"),
|
||||
c("shadow_over_bare_soil", "bare_soil_dry"),
|
||||
if (length(target_values) > 0 && length(vs_values) > 0) {_soil_wet")
|
||||
# Calculate mean and sd for both classes
|
||||
target_mean <- mean(target_values, na.rm = TRUE)
|
||||
target_sd <- sd(target_values, na.rm = TRUE)# Parameters to analyze for clouds
|
||||
vs_mean <- mean(vs_values, na.rm = TRUE), "blue_ratio_mean", "ndvi_mean",
|
||||
vs_sd <- sd(vs_values, na.rm = TRUE)
|
||||
|
||||
|
||||
# Determine if higher or lower values indicate the target classshadow_params <- c("brightness_mean", "dark_pixels_mean", "very_dark_pixels_mean",
|
||||
if (target_mean > vs_mean) {r_mean", "shadow_ndvi_mean", "blue_nir_ratio_raw_mean",
|
||||
direction <- ">"_ratio_raw_mean", "low_red_to_blue_mean")
|
||||
# Try different thresholds
|
||||
potential_thresholds <- seq(olds
|
||||
min(min(target_values, na.rm = TRUE), vs_mean + 0.5 * vs_sd),r (pair in class_pairs) {
|
||||
max(max(vs_values, na.rm = TRUE), target_mean - 0.5 * target_sd),
|
||||
length.out = 20
|
||||
)
|
||||
} else { appropriate parameters based on whether we're analyzing clouds or shadows
|
||||
direction <- "<"{
|
||||
# Try different thresholds params_to_check <- cloud_params
|
||||
potential_thresholds <- seq(} else {
|
||||
min(min(vs_values, na.rm = TRUE), target_mean + 0.5 * target_sd),
|
||||
max(max(target_values, na.rm = TRUE), vs_mean - 0.5 * vs_sd),
|
||||
length.out = 20
|
||||
)st threshold to separate the classes
|
||||
}
|
||||
|
||||
# Calculate accuracy for each threshold# Get values for both classes
|
||||
best_accuracy <- 0_class, param]
|
||||
best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds))e == vs_class, param]
|
||||
|
||||
for (threshold in potential_thresholds) {ues) > 0) {
|
||||
if (direction == ">") {
|
||||
correct_target <- sum(target_values > threshold, na.rm = TRUE)a.rm = TRUE)
|
||||
correct_vs <- sum(vs_values <= threshold, na.rm = TRUE)target_sd <- sd(target_values, na.rm = TRUE)
|
||||
} else { vs_mean <- mean(vs_values, na.rm = TRUE)
|
||||
correct_target <- sum(target_values < threshold, na.rm = TRUE)
|
||||
correct_vs <- sum(vs_values >= threshold, na.rm = TRUE)
|
||||
}
|
||||
er values indicate the target class
|
||||
total_target <- length(target_values)
|
||||
total_vs <- length(vs_values)
|
||||
|
||||
accuracy <- (correct_target + correct_vs) / (total_target + total_vs)lds <- seq(
|
||||
min(min(target_values, na.rm = TRUE), vs_mean + 0.5 * vs_sd),
|
||||
if (accuracy > best_accuracy) {max(vs_values, na.rm = TRUE), target_mean - 0.5 * target_sd),
|
||||
best_accuracy <- accuracy0
|
||||
best_threshold <- threshold
|
||||
}
|
||||
}
|
||||
|
||||
# Add to resultslds <- seq(
|
||||
threshold_results <- rbind(threshold_results, data.frame( min(min(vs_values, na.rm = TRUE), target_mean + 0.5 * target_sd),
|
||||
parameter = gsub("_mean", "", param), max(max(target_values, na.rm = TRUE), vs_mean - 0.5 * vs_sd),
|
||||
best_threshold = best_threshold, length.out = 20
|
||||
direction = direction,
|
||||
target_class = target_class,
|
||||
vs_class = vs_class,
|
||||
accuracy = best_accuracy,# Calculate accuracy for each threshold
|
||||
stringsAsFactors = FALSE
|
||||
))direction == ">", min(potential_thresholds), max(potential_thresholds))
|
||||
}
|
||||
}
|
||||
}ction == ">") {
|
||||
}
|
||||
}
|
||||
else {
|
||||
# Save threshold results correct_target <- sum(target_values < threshold, na.rm = TRUE)
|
||||
thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")shold, na.rm = TRUE)
|
||||
write.csv(threshold_results, thresholds_file, row.names = FALSE)
|
||||
safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file))
|
||||
|
||||
# Generate box plots for key parameters to visualize class differencestotal_vs <- length(vs_values)
|
||||
if (requireNamespace("ggplot2", quietly = TRUE) && nrow(all_stats) > 0) {
|
||||
# Reshape data for plotting (only _mean columns) + correct_vs) / (total_target + total_vs)
|
||||
mean_cols <- grep("_mean$", colnames(all_stats), value = TRUE)
|
||||
if (length(mean_cols) > 0) {f (accuracy > best_accuracy) {
|
||||
plot_data <- reshape2::melt(all_stats, best_accuracy <- accuracy
|
||||
id.vars = c("class", "class_name"), best_threshold <- threshold
|
||||
measure.vars = mean_cols, # Use only _mean columns
|
||||
variable.name = "parameter",
|
||||
value.name = "value")
|
||||
|
||||
# Create directory for plotsnd(threshold_results, data.frame(
|
||||
plots_dir <- file.path(diagnostic_dir, "class_plots"), param),
|
||||
dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE)t_threshold,
|
||||
|
||||
# Create plots for selected key parameters (ensure they are _mean versions)ass,
|
||||
# Adjust key_params to reflect the new column names (e.g., "brightness_mean")vs_class = vs_class,
|
||||
key_params_plot <- intersect(c( accuracy = best_accuracy,
|
||||
"brightness_mean", "ndvi_mean", "blue_ratio_mean", "ndwi_mean", stringsAsFactors = FALSE
|
||||
"blue_nir_ratio_raw_mean", "red_blue_ratio_raw_mean" ))
|
||||
), mean_cols) # Ensure these params exist }
|
||||
}
|
||||
for (param in key_params_plot) {
|
||||
# param_data <- plot_data[plot_data$parameter == param,] # Exact match for parameter
|
||||
# No, grepl was fine if plot_data only contains _mean parameters now.
|
||||
# Let's ensure plot_data only has the _mean parameters for simplicity here.
|
||||
param_data <- plot_data[plot_data$parameter == param, ]thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")
|
||||
|
||||
if (nrow(param_data) > 0) {file))
|
||||
param_name <- gsub("_mean", "", param)
|
||||
o visualize class differences
|
||||
p <- ggplot2::ggplot(param_data, ggplot2::aes(x = class_name, y = value, fill = class_name)) +s) > 0) {
|
||||
ggplot2::geom_boxplot() +
|
||||
ggplot2::theme_minimal() +
|
||||
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + id.vars = c("class", "class_name"),
|
||||
ggplot2::labs(ariable.name = "parameter",
|
||||
title = paste("Distribution of", param_name, "by Land Cover Class"),
|
||||
x = "Class",
|
||||
y = param_name,# Create directory for plots
|
||||
fill = "Class"ass_plots")
|
||||
)_dir, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
# Save the plot
|
||||
plot_file <- file.path(plots_dir, paste0("boxplot_", param_name, ".png"))ey_params <- c(
|
||||
ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150) "brightness_mean", "ndvi_mean", "blue_ratio_mean", "ndwi_mean",
|
||||
}, "red_blue_ratio_raw_mean"
|
||||
}
|
||||
|
||||
# Create a summary plot showing multiple parameters
|
||||
summary_data <- plot_data[plot_data$parameter %in% ram_data <- plot_data[grepl(param, plot_data$parameter),]
|
||||
c("brightness_mean", "ndvi_mean",
|
||||
"blue_nir_ratio_raw_mean", "red_blue_ratio_raw_mean"),] "", param)
|
||||
|
||||
if (nrow(summary_data) > 0) {= class_name)) +
|
||||
# Clean up parameter names for displayboxplot() +
|
||||
summary_data$parameter <- gsub("_mean$", "", summary_data$parameter) # Remove _mean suffix for display
|
||||
summary_data$parameter <- gsub("_raw$", "", summary_data$parameter) # Keep this if _raw_mean was a thing(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
|
||||
|
||||
# Create faceted plot"Distribution of", param_name, "by Land Cover Class"),
|
||||
p <- ggplot2::ggplot(summary_data, x = "Class",
|
||||
ggplot2::aes(x = class_name, y = value, fill = class_name)) + y = param_name,
|
||||
ggplot2::geom_boxplot() +ss"
|
||||
ggplot2::facet_wrap(~parameter, scales = "free_y") +
|
||||
ggplot2::theme_minimal() +
|
||||
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + # Save the plot
|
||||
ggplot2::labs( plot_file <- file.path(plots_dir, paste0("boxplot_", param_name, ".png"))
|
||||
title = "Key Spectral Parameters by Land Cover Class", ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)
|
||||
x = "Class",
|
||||
y = "Value",
|
||||
fill = "Class"
|
||||
)
|
||||
summary_data <- plot_data[plot_data$parameter %in%
|
||||
# Save the summary plot"brightness_mean", "ndvi_mean",
|
||||
summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")atio_raw_mean", "red_blue_ratio_raw_mean"),]
|
||||
ggplot2::ggsave(summary_file, p, width = 12, height = 8, dpi = 150)
|
||||
}
|
||||
# Clean up parameter names for display
|
||||
safe_log(paste("Generated spectral parameter plots in:", plots_dir))r <- gsub("_mean", "", summary_data$parameter)
|
||||
}w", "", summary_data$parameter)
|
||||
} else {
|
||||
safe_log("Package 'exactextractr' not available. Install it for more accurate polygon extraction.", "WARNING")
|
||||
|
||||
# Fall back to simple extraction using terra (calculating only mean)::aes(x = class_name, y = value, fill = class_name)) +
|
||||
class_stats <- data.frame()
|
||||
_wrap(~parameter, scales = "free_y") +
|
||||
valid_class_names_fallback <- unique(classifications$class_name)
|
||||
valid_class_names_fallback <- valid_class_names_fallback[!is.na(valid_class_names_fallback)](axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
|
||||
|
||||
if (length(valid_class_names_fallback) == 0) {pectral Parameters by Land Cover Class",
|
||||
safe_log("No valid (non-NA) class names found for fallback terra::extract processing.", "WARNING") x = "Class",
|
||||
} y = "Value",
|
||||
|
||||
for (class_name_fb in valid_class_names_fallback) {
|
||||
class_polygons_fb <- classifications[which(classifications$class_name == class_name_fb), ]
|
||||
# Save the summary plot
|
||||
if(nrow(class_polygons_fb) == 0) next summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")
|
||||
)
|
||||
class_vect_fb <- terra::vect(class_polygons_fb) }
|
||||
|
||||
# Extract values for each raster
|
||||
for (i in seq_along(extraction_rasters)) {}
|
||||
raster_name <- names(extraction_rasters)[i]
|
||||
# terra::extract returns a data.frame with ID and layer valuesractr' not available. Install it for more accurate polygon extraction.", "WARNING")
|
||||
# For multiple polygons, it will have multiple rows per polygon if ID is not unique
|
||||
# We need to aggregate per polygon, then per class if not already handled by exact_extract style
|
||||
# However, for simplicity here, let's assume terra::extract gives one value per polygon for the mean
|
||||
# This part of fallback might need more robust aggregation if polygons are complex
|
||||
r (class_name in unique(classifications$class_name)) {
|
||||
# A more robust terra::extract approach for means per polygon:s[classifications$class_name == class_name, ]
|
||||
extracted_values_list <- terra::extract(extraction_rasters[[i]], class_vect_fb, fun = mean, na.rm = TRUE, ID = FALSE)
|
||||
# extracted_values_list will be a data.frame with one column (the layer) and rows corresponding to polygons
|
||||
|
||||
if (nrow(extracted_values_list) > 0 && ncol(extracted_values_list) > 0) {r (i in seq_along(extraction_rasters)) {
|
||||
# Average over all polygons in this class for this rastertraction_rasters)[i]
|
||||
mean_val_for_class <- mean(extracted_values_list[[1]], na.rm = TRUE)ct(extraction_rasters[[i]], class_vect)
|
||||
|
||||
if (!is.na(mean_val_for_class)) {
|
||||
stats_row <- data.frame(
|
||||
class_name = class_name_fb, # Using class_name as the identifier here
|
||||
parameter = paste0(raster_name, "_mean"),
|
||||
value = mean_val_for_class),
|
||||
) TRUE),
|
||||
class_stats <- rbind(class_stats, stats_row) sd = sd(values[,2], na.rm = TRUE),
|
||||
} min = min(values[,2], na.rm = TRUE),
|
||||
}
|
||||
} )
|
||||
}
|
||||
class_stats <- rbind(class_stats, stats)
|
||||
# Save the statistics (if any were generated) }
|
||||
if(nrow(class_stats) > 0) {
|
||||
# Reshape class_stats from long to wide for consistency if needed, or save as is.
|
||||
# For now, save as long format.
|
||||
stats_file <- file.path(diagnostic_dir, "class_spectral_stats_simple_mean_long.csv")
|
||||
write.csv(class_stats, stats_file, row.names = FALSE) stats_file <- file.path(diagnostic_dir, "class_spectral_stats_simple.csv")
|
||||
safe_log(paste("Saved simple MEAN (long format) spectral statistics by class to:", stats_file)) write.csv(class_stats, stats_file, row.names = FALSE)
|
||||
} else {e spectral statistics by class to:", stats_file))
|
||||
safe_log("No statistics generated by fallback method.", "WARNING")
|
||||
}
|
||||
}ve RMarkdown generation
|
||||
|
||||
# Remove RMarkdown generation
|
||||
# safe_log("RMarkdown report generation has been removed as per user request.")
|
||||
NING")
|
||||
} else {}
|
||||
safe_log("No classification polygons file (classes.geojson) found. Skipping spectral analysis.", "WARNING")}, error = function(e) {
|
||||
}cessing or spectral analysis:", e$message), "ERROR")
|
||||
}, error = function(e) {})
|
||||
safe_log(paste("Error in classification polygon processing or spectral analysis:", e$message), "ERROR")
|
||||
}) detection analysis script finished.")
|
||||
|
||||
safe_log("Cloud detection analysis script finished.")# Clean up workspace
|
||||
rm(list = ls())
|
||||
# Clean up workspace
|
||||
rm(list = ls())
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
191
r_app/experiments/delete_cloud_exploratoin.R
Normal file
191
r_app/experiments/delete_cloud_exploratoin.R
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
```r
|
||||
# Cloud detection analysis script
|
||||
|
||||
# Load necessary libraries
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
library(sf)
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(tidyr)
|
||||
library(reshape2)
|
||||
|
||||
# Define file paths (these should be set to your actual file locations)
|
||||
classes_file <- "path/to/classes.geojson"
|
||||
rasters_dir <- "path/to/rasters"
|
||||
diagnostic_dir <- "path/to/diagnostics"
|
||||
|
||||
# Helper function for logging
|
||||
safe_log <- function(message, level = "INFO") {
|
||||
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
||||
cat(paste0("[", timestamp, "] [", level, "] ", message, "\n"))
|
||||
}
|
||||
|
||||
# Main processing block
|
||||
# Load classification polygons
|
||||
safe_log(paste("Loading classification polygons from:", classes_file))
|
||||
classifications <- sf::st_read(classes_file, quiet = TRUE)
|
||||
# Ensure the CRS is set (assuming WGS84 here, adjust if necessary)
|
||||
safe_log("No CRS found for the classifications. Setting to WGS84 (EPSG:4326).", "WARNING")
|
||||
sf::st_crs(classifications) <- 4326
|
||||
# List all raster files in the directory
|
||||
raster_files <- list.files(rasters_dir, pattern = "\\.tif$", full.names = TRUE)
|
||||
# Create a named vector for extraction_rasters based on base names
|
||||
extraction_rasters <- setNames(raster_files, tools::file_path_sans_ext(basename(raster_files)))
|
||||
# Create a stack of all rasters
|
||||
extraction_stack <- terra::rast(extraction_rasters)
|
||||
# User-provided simplified extraction for mean statistics per polygon
|
||||
safe_log("Extracting mean statistics per polygon using exactextractr...")
|
||||
all_stats <- cbind(
|
||||
classifications,
|
||||
round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2)
|
||||
) %>%
|
||||
sf::st_drop_geometry() # Ensures all_stats is a data frame
|
||||
# Ensure 'class_name' column exists, if not, use 'class' as 'class_name'
|
||||
all_stats$class_name <- all_stats$class
|
||||
|
||||
# Save the extracted statistics to a CSV file
|
||||
stats_file <- file.path(diagnostic_dir, "polygon_mean_spectral_stats.csv")
|
||||
write.csv(all_stats, stats_file, row.names = FALSE)
|
||||
|
||||
|
||||
|
||||
safe_log(paste("Saved mean spectral statistics per polygon to:", stats_file))
|
||||
# Calculate optimized thresholds for cloud/shadow detection
|
||||
threshold_results <- data.frame(
|
||||
parameter = character(),
|
||||
best_threshold = numeric(),
|
||||
direction = character(),
|
||||
target_class = character(),
|
||||
vs_class = character(),
|
||||
accuracy = numeric(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
class_pairs <- list(
|
||||
c("cloud", "crop"),
|
||||
c("cloud", "bare_soil_dry"),
|
||||
c("cloud", "bare_soil_wet"),
|
||||
c("shadow_over_crop", "crop"),
|
||||
c("shadow_over_bare_soil", "bare_soil_dry"),
|
||||
c("shadow_over_bare_soil", "bare_soil_wet")
|
||||
)
|
||||
cloud_detection_params_for_threshold <- intersect(
|
||||
c("mean.brightness", "mean.very_bright_pixels", "mean.blue_dominant", "mean.low_ndvi", "mean.green_dominant_nir", "mean.high_ndwi", "mean.blue_ratio", "mean.ndvi"),
|
||||
colnames(all_stats)
|
||||
)
|
||||
shadow_detection_params_for_threshold <- intersect(
|
||||
c("mean.brightness", "mean.dark_pixels", "mean.very_dark_pixels", "mean.low_nir", "mean.shadow_ndvi", "mean.low_red_to_blue", "mean.high_blue_to_nir_ratio", "mean.blue_nir_ratio_raw", "mean.red_blue_ratio_raw"),
|
||||
colnames(all_stats)
|
||||
)
|
||||
for (pair in class_pairs) {
|
||||
target_class <- pair[1]
|
||||
vs_class <- pair[2]
|
||||
params_to_check <- c(cloud_detection_params_for_threshold, shadow_detection_params_for_threshold)
|
||||
for (param in params_to_check) {
|
||||
target_values <- all_stats[all_stats$class_name == target_class, param]
|
||||
vs_values <- all_stats[all_stats$class_name == vs_class, param]
|
||||
target_values <- target_values[!is.na(target_values)]
|
||||
vs_values <- vs_values[!is.na(vs_values)]
|
||||
# Only proceed if both groups have at least one value
|
||||
if (length(target_values) > 0 && length(vs_values) > 0) {
|
||||
target_mean <- mean(target_values)
|
||||
target_sd <- sd(target_values)
|
||||
vs_mean <- mean(vs_values)
|
||||
vs_sd <- sd(vs_values)
|
||||
target_sd[is.na(target_sd)] <- 0
|
||||
vs_sd[is.na(vs_sd)] <- 0
|
||||
direction <- ifelse(target_mean > vs_mean, ">", "<")
|
||||
all_values <- c(target_values, vs_values)
|
||||
min_val <- min(all_values)
|
||||
max_val <- max(all_values)
|
||||
# Only proceed if min and max are finite and not equal
|
||||
if (is.finite(min_val) && is.finite(max_val) && min_val != max_val) {
|
||||
potential_thresholds <- seq(min_val, max_val, length.out = 20)
|
||||
best_accuracy <- -1
|
||||
best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds))
|
||||
for (threshold in potential_thresholds) {
|
||||
if (direction == ">") {
|
||||
correct_target <- sum(target_values > threshold)
|
||||
correct_vs <- sum(vs_values <= threshold)
|
||||
} else {
|
||||
correct_target <- sum(target_values < threshold)
|
||||
correct_vs <- sum(vs_values >= threshold)
|
||||
}
|
||||
accuracy <- (correct_target + correct_vs) / (length(target_values) + length(vs_values))
|
||||
if (accuracy > best_accuracy) {
|
||||
best_accuracy <- accuracy
|
||||
best_threshold <- threshold
|
||||
}
|
||||
}
|
||||
threshold_results <- rbind(threshold_results, data.frame(
|
||||
parameter = param,
|
||||
best_threshold = best_threshold,
|
||||
direction = direction,
|
||||
target_class = target_class,
|
||||
vs_class = vs_class,
|
||||
accuracy = best_accuracy,
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")
|
||||
write.csv(threshold_results, thresholds_file, row.names = FALSE)
|
||||
|
||||
safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file))
|
||||
|
||||
# Fix: get plot_measure_cols by matching raster base names to all_stats columns with 'mean.' prefix
|
||||
plot_measure_cols <- intersect(names(extraction_rasters), gsub('^mean\\.', '', colnames(all_stats)))
|
||||
plot_data <- reshape2::melt(
|
||||
all_stats,
|
||||
id.vars = c("class", "class_name"),
|
||||
measure.vars = paste0("mean.", plot_measure_cols),
|
||||
variable.name = "parameter",
|
||||
value.name = "value"
|
||||
)
|
||||
# Remove 'mean.' prefix from parameter column for clarity
|
||||
plot_data$parameter <- sub("^mean\\.", "", plot_data$parameter)
|
||||
|
||||
plots_dir <- file.path(diagnostic_dir, "class_plots")
|
||||
dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE)
|
||||
key_params_for_plot_list <- c("brightness", "ndvi", "blue_ratio", "ndwi",
|
||||
"blue_nir_ratio_raw", "red_blue_ratio_raw")
|
||||
key_params_to_plot <- intersect(key_params_for_plot_list, plot_measure_cols)
|
||||
for (param_to_plot in key_params_to_plot) {
|
||||
param_data_subset <- plot_data[plot_data$parameter == param_to_plot, ]
|
||||
p <- ggplot2::ggplot(param_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
|
||||
ggplot2::geom_boxplot() +
|
||||
ggplot2::theme_minimal() +
|
||||
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
|
||||
ggplot2::labs(
|
||||
title = paste("Distribution of", param_to_plot, "by Land Cover Class"),
|
||||
x = "Class",
|
||||
y = param_to_plot,
|
||||
fill = "Class"
|
||||
)
|
||||
plot_file <- file.path(plots_dir, paste0("boxplot_", param_to_plot, ".png"))
|
||||
ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)
|
||||
}
|
||||
summary_params_for_plot_list <- c("brightness", "ndvi",
|
||||
"blue_nir_ratio_raw", "red_blue_ratio_raw")
|
||||
summary_params_to_plot <- intersect(summary_params_for_plot_list, plot_measure_cols)
|
||||
summary_data_subset <- plot_data[plot_data$parameter %in% summary_params_to_plot,]
|
||||
p_summary <- ggplot2::ggplot(summary_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
|
||||
ggplot2::geom_boxplot() +
|
||||
ggplot2::facet_wrap(~parameter, scales = "free_y") +
|
||||
ggplot2::theme_minimal() +
|
||||
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
|
||||
strip.text = ggplot2::element_text(size = 8)) +
|
||||
ggplot2::labs(
|
||||
title = "Summary of Key Spectral Parameters by Land Cover Class",
|
||||
x = "Class",
|
||||
y = "Value",
|
||||
fill = "Class"
|
||||
)
|
||||
summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")
|
||||
ggplot2::ggsave(summary_file, p_summary, width = 12, height = 8, dpi = 150)
|
||||
safe_log(paste("Generated spectral parameter plots in:", plots_dir))
|
||||
safe_log("Cloud detection analysis script finished.")
|
||||
```
|
||||
|
|
@ -0,0 +1,718 @@
|
|||
---
|
||||
params:
|
||||
ref: "word-styles-reference-var1.docx"
|
||||
output_file: CI_report.docx
|
||||
report_date: "2024-08-28"
|
||||
data_dir: "Chemba"
|
||||
mail_day: "Wednesday"
|
||||
borders: TRUE
|
||||
use_breaks: FALSE
|
||||
output:
|
||||
# html_document:
|
||||
# toc: yes
|
||||
# df_print: paged
|
||||
word_document:
|
||||
reference_docx: !expr file.path("word-styles-reference-var1.docx")
|
||||
toc: yes
|
||||
editor_options:
|
||||
chunk_output_type: console
|
||||
---
|
||||
|
||||
```{r setup_parameters, include=FALSE}
|
||||
# Set up basic report parameters from input values
|
||||
report_date <- params$report_date
|
||||
mail_day <- params$mail_day
|
||||
borders <- params$borders
|
||||
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
|
||||
|
||||
# Environment setup notes (commented out)
|
||||
# # Activeer de renv omgeving
|
||||
# renv::activate()
|
||||
# renv::deactivate()
|
||||
# # Optioneel: Herstel de omgeving als dat nodig is
|
||||
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
|
||||
# renv::restore()
|
||||
```
|
||||
|
||||
```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Configure knitr options
|
||||
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
|
||||
|
||||
# Path management
|
||||
library(here)
|
||||
|
||||
# Spatial data libraries
|
||||
library(sf)
|
||||
library(terra)
|
||||
library(exactextractr)
|
||||
# library(raster) - Removed as it's no longer maintained
|
||||
|
||||
# Data manipulation and visualization
|
||||
library(tidyverse) # Includes dplyr, ggplot2, etc.
|
||||
library(tmap)
|
||||
library(lubridate)
|
||||
library(zoo)
|
||||
|
||||
# Machine learning
|
||||
library(rsample)
|
||||
library(caret)
|
||||
library(randomForest)
|
||||
library(CAST)
|
||||
|
||||
# Load custom utility functions
|
||||
tryCatch({
|
||||
source("report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
|
||||
# Load executive report utilities
|
||||
tryCatch({
|
||||
source("executive_report_utils.R")
|
||||
}, error = function(e) {
|
||||
message(paste("Error loading executive_report_utils.R:", e$message))
|
||||
# Try alternative path if the first one fails
|
||||
tryCatch({
|
||||
source(here::here("r_app", "executive_report_utils.R"))
|
||||
}, error = function(e) {
|
||||
stop("Could not load executive_report_utils.R from either location: ", e$message)
|
||||
})
|
||||
})
|
||||
|
||||
safe_log("Successfully loaded utility functions")
|
||||
```
|
||||
|
||||
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Set the project directory from parameters
|
||||
project_dir <- params$data_dir
|
||||
|
||||
# Source project parameters with error handling
|
||||
tryCatch({
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
}, error = function(e) {
|
||||
stop("Error loading parameters_project.R: ", e$message)
|
||||
})
|
||||
|
||||
# Log initial configuration
|
||||
safe_log("Starting the R Markdown script")
|
||||
safe_log(paste("mail_day params:", params$mail_day))
|
||||
safe_log(paste("report_date params:", params$report_date))
|
||||
safe_log(paste("mail_day variable:", mail_day))
|
||||
```
|
||||
|
||||
```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Set locale for consistent date formatting
|
||||
Sys.setlocale("LC_TIME", "C")
|
||||
|
||||
# Initialize date variables from parameters
|
||||
today <- as.character(report_date)
|
||||
mail_day_as_character <- as.character(mail_day)
|
||||
|
||||
# Calculate week days
|
||||
report_date_as_week_day <- weekdays(lubridate::ymd(today))
|
||||
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
|
||||
|
||||
# Calculate initial week number
|
||||
week <- lubridate::week(today)
|
||||
safe_log(paste("Initial week calculation:", week, "today:", today))
|
||||
|
||||
# Calculate previous dates for comparisons
|
||||
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
|
||||
|
||||
# Log the weekday calculations for debugging
|
||||
safe_log(paste("Report date weekday:", report_date_as_week_day))
|
||||
safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
|
||||
safe_log(paste("Mail day:", mail_day_as_character))
|
||||
safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
|
||||
|
||||
# Adjust week calculation based on mail day
|
||||
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
|
||||
safe_log("Adjusting weeks because of mail day")
|
||||
week <- lubridate::week(today) + 1
|
||||
today_minus_1 <- as.character(lubridate::ymd(today))
|
||||
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
|
||||
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
|
||||
}
|
||||
|
||||
# Generate subtitle for report
|
||||
subtitle_var <- paste("Report generated on", Sys.Date())
|
||||
|
||||
# Calculate week numbers for previous weeks
|
||||
week_minus_1 <- week - 1
|
||||
week_minus_2 <- week - 2
|
||||
week_minus_3 <- week - 3
|
||||
|
||||
# Format current week with leading zeros
|
||||
week <- sprintf("%02d", week)
|
||||
|
||||
# Get years for each date
|
||||
year <- lubridate::year(today)
|
||||
year_1 <- lubridate::year(today_minus_1)
|
||||
year_2 <- lubridate::year(today_minus_2)
|
||||
year_3 <- lubridate::year(today_minus_3)
|
||||
```
|
||||
|
||||
```{r data, message=TRUE, warning=TRUE, include=FALSE}
|
||||
# Load CI index data with error handling
|
||||
tryCatch({
|
||||
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
|
||||
safe_log("Successfully loaded CI quadrant data")
|
||||
}, error = function(e) {
|
||||
stop("Error loading CI quadrant data: ", e$message)
|
||||
})
|
||||
|
||||
# Get file paths for different weeks using the utility function
|
||||
tryCatch({
|
||||
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
|
||||
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
|
||||
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
|
||||
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
|
||||
|
||||
# Log the calculated paths
|
||||
safe_log("Required mosaic paths:")
|
||||
safe_log(paste("Path to current week:", path_to_week_current))
|
||||
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
|
||||
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
|
||||
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
|
||||
|
||||
# Validate that files exist
|
||||
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
|
||||
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
|
||||
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
|
||||
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
|
||||
|
||||
# Load raster data with terra functions
|
||||
CI <- terra::rast(path_to_week_current)$CI
|
||||
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
|
||||
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
|
||||
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
|
||||
|
||||
}, error = function(e) {
|
||||
stop("Error loading raster data: ", e$message)
|
||||
})
|
||||
```
|
||||
|
||||
```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE}
|
||||
# Calculate difference rasters for comparisons
|
||||
tryCatch({
|
||||
# Calculate weekly difference
|
||||
last_week_dif_raster_abs <- (CI - CI_m1)
|
||||
safe_log("Calculated weekly difference raster")
|
||||
|
||||
# Calculate three-week difference
|
||||
three_week_dif_raster_abs <- (CI - CI_m3)
|
||||
safe_log("Calculated three-week difference raster")
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error calculating difference rasters:", e$message), "ERROR")
|
||||
# Create placeholder rasters if calculations fail
|
||||
if (!exists("last_week_dif_raster_abs")) {
|
||||
last_week_dif_raster_abs <- CI * 0
|
||||
}
|
||||
if (!exists("three_week_dif_raster_abs")) {
|
||||
three_week_dif_raster_abs <- CI * 0
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
|
||||
# Load field boundaries from parameters
|
||||
tryCatch({
|
||||
AllPivots0 <- field_boundaries_sf
|
||||
safe_log("Successfully loaded field boundaries")
|
||||
}, error = function(e) {
|
||||
stop("Error loading field boundaries: ", e$message)
|
||||
})
|
||||
```
|
||||
|
||||
```{r create_farm_health_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Create farm health summary data from scratch
|
||||
tryCatch({
|
||||
# Ensure we have the required data
|
||||
if (!exists("AllPivots0") || !exists("CI") || !exists("CI_m1") || !exists("harvesting_data")) {
|
||||
stop("Required input data (field boundaries, CI data, or harvesting data) not available")
|
||||
}
|
||||
|
||||
safe_log("Starting to calculate farm health data")
|
||||
|
||||
# Get unique field names
|
||||
fields <- unique(AllPivots0$field)
|
||||
safe_log(paste("Found", length(fields), "unique fields"))
|
||||
|
||||
# Initialize result dataframe
|
||||
farm_health_data <- data.frame(
|
||||
field = character(),
|
||||
mean_ci = numeric(),
|
||||
ci_change = numeric(),
|
||||
ci_uniformity = numeric(),
|
||||
status = character(),
|
||||
anomaly_type = character(),
|
||||
priority_level = numeric(),
|
||||
age_weeks = numeric(),
|
||||
harvest_readiness = character(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
# Process each field with robust error handling
|
||||
for (field_name in fields) {
|
||||
tryCatch({
|
||||
safe_log(paste("Processing field:", field_name))
|
||||
|
||||
# Get field boundary
|
||||
field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
|
||||
|
||||
# Skip if field shape is empty
|
||||
if (nrow(field_shape) == 0) {
|
||||
safe_log(paste("Empty field shape for", field_name), "WARNING")
|
||||
next
|
||||
}
|
||||
|
||||
# Get field age from harvesting data - use direct filtering to avoid dplyr errors
|
||||
field_age_data <- NULL
|
||||
if (exists("harvesting_data") && !is.null(harvesting_data) && nrow(harvesting_data) > 0) {
|
||||
field_age_data <- harvesting_data[harvesting_data$field == field_name, ]
|
||||
if (nrow(field_age_data) > 0) {
|
||||
field_age_data <- field_age_data[order(field_age_data$season_start, decreasing = TRUE), ][1, ]
|
||||
}
|
||||
}
|
||||
|
||||
# Default age if not available
|
||||
field_age_weeks <- if (!is.null(field_age_data) && nrow(field_age_data) > 0 && !is.na(field_age_data$age)) {
|
||||
field_age_data$age
|
||||
} else {
|
||||
10 # Default age
|
||||
}
|
||||
|
||||
# Extract CI values using terra's extract function which is more robust
|
||||
ci_values <- terra::extract(CI, field_shape)
|
||||
ci_prev_values <- terra::extract(CI_m1, field_shape)
|
||||
|
||||
# Check if we got valid data
|
||||
if (nrow(ci_values) == 0 || nrow(ci_prev_values) == 0) {
|
||||
safe_log(paste("No CI data extracted for field", field_name), "WARNING")
|
||||
# Add a placeholder row with Unknown status
|
||||
farm_health_data <- rbind(farm_health_data, data.frame(
|
||||
field = field_name,
|
||||
mean_ci = NA,
|
||||
ci_change = NA,
|
||||
ci_uniformity = NA,
|
||||
status = "Unknown",
|
||||
anomaly_type = "Unknown",
|
||||
priority_level = 5, # Low priority
|
||||
age_weeks = field_age_weeks,
|
||||
harvest_readiness = "Unknown",
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
next
|
||||
}
|
||||
|
||||
# Calculate metrics - Handle NA values properly
|
||||
ci_column <- if ("CI" %in% names(ci_values)) "CI" else colnames(ci_values)[1]
|
||||
ci_prev_column <- if ("CI" %in% names(ci_prev_values)) "CI" else colnames(ci_prev_values)[1]
|
||||
|
||||
mean_ci <- mean(ci_values[[ci_column]], na.rm=TRUE)
|
||||
mean_ci_prev <- mean(ci_prev_values[[ci_prev_column]], na.rm=TRUE)
|
||||
ci_change <- mean_ci - mean_ci_prev
|
||||
ci_sd <- sd(ci_values[[ci_column]], na.rm=TRUE)
|
||||
ci_uniformity <- ci_sd / max(0.1, mean_ci) # Avoid division by zero
|
||||
|
||||
# Handle NaN or Inf results
|
||||
if (is.na(mean_ci) || is.na(ci_change) || is.na(ci_uniformity) ||
|
||||
is.nan(mean_ci) || is.nan(ci_change) || is.nan(ci_uniformity) ||
|
||||
is.infinite(mean_ci) || is.infinite(ci_change) || is.infinite(ci_uniformity)) {
|
||||
safe_log(paste("Invalid calculation results for field", field_name), "WARNING")
|
||||
# Add a placeholder row with Unknown status
|
||||
farm_health_data <- rbind(farm_health_data, data.frame(
|
||||
field = field_name,
|
||||
mean_ci = NA,
|
||||
ci_change = NA,
|
||||
ci_uniformity = NA,
|
||||
status = "Unknown",
|
||||
anomaly_type = "Unknown",
|
||||
priority_level = 5, # Low priority
|
||||
age_weeks = field_age_weeks,
|
||||
harvest_readiness = "Unknown",
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
next
|
||||
}
|
||||
|
||||
# Determine field status
|
||||
status <- dplyr::case_when(
|
||||
mean_ci >= 5 ~ "Excellent",
|
||||
mean_ci >= 3.5 ~ "Good",
|
||||
mean_ci >= 2 ~ "Fair",
|
||||
mean_ci >= 1 ~ "Poor",
|
||||
TRUE ~ "Critical"
|
||||
)
|
||||
|
||||
# Determine anomaly type
|
||||
anomaly_type <- dplyr::case_when(
|
||||
ci_change > 2 ~ "Potential Weed Growth",
|
||||
ci_change < -2 ~ "Potential Weeding/Harvesting",
|
||||
ci_uniformity > 0.5 ~ "High Variability",
|
||||
mean_ci < 1 ~ "Low Vigor",
|
||||
TRUE ~ "None"
|
||||
)
|
||||
|
||||
# Calculate priority level (1-5, with 1 being highest priority)
|
||||
priority_score <- dplyr::case_when(
|
||||
mean_ci < 1 ~ 1, # Critical - highest priority
|
||||
anomaly_type == "Potential Weed Growth" ~ 2,
|
||||
anomaly_type == "High Variability" ~ 3,
|
||||
ci_change < -1 ~ 4,
|
||||
TRUE ~ 5 # No urgent issues
|
||||
)
|
||||
|
||||
# Determine harvest readiness
|
||||
harvest_readiness <- dplyr::case_when(
|
||||
field_age_weeks >= 52 & mean_ci >= 4 ~ "Ready for harvest",
|
||||
field_age_weeks >= 48 & mean_ci >= 3.5 ~ "Approaching harvest",
|
||||
field_age_weeks >= 40 & mean_ci >= 3 ~ "Mid-maturity",
|
||||
field_age_weeks >= 12 ~ "Growing",
|
||||
TRUE ~ "Early stage"
|
||||
)
|
||||
|
||||
# Add to summary data
|
||||
farm_health_data <- rbind(farm_health_data, data.frame(
|
||||
field = field_name,
|
||||
mean_ci = round(mean_ci, 2),
|
||||
ci_change = round(ci_change, 2),
|
||||
ci_uniformity = round(ci_uniformity, 2),
|
||||
status = status,
|
||||
anomaly_type = anomaly_type,
|
||||
priority_level = priority_score,
|
||||
age_weeks = field_age_weeks,
|
||||
harvest_readiness = harvest_readiness,
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error processing field", field_name, ":", e$message), "ERROR")
|
||||
# Add a placeholder row with Error status
|
||||
farm_health_data <<- rbind(farm_health_data, data.frame(
|
||||
field = field_name,
|
||||
mean_ci = NA,
|
||||
ci_change = NA,
|
||||
ci_uniformity = NA,
|
||||
status = "Unknown",
|
||||
anomaly_type = "Unknown",
|
||||
priority_level = 5, # Low priority since we don't know the status
|
||||
age_weeks = NA,
|
||||
harvest_readiness = "Unknown",
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
# Make sure we have data for all fields
|
||||
if (nrow(farm_health_data) == 0) {
|
||||
safe_log("No farm health data was created", "ERROR")
|
||||
stop("Failed to create farm health data")
|
||||
}
|
||||
|
||||
# Sort by priority level
|
||||
farm_health_data <- farm_health_data %>% dplyr::arrange(priority_level, field)
|
||||
|
||||
safe_log(paste("Successfully created farm health data for", nrow(farm_health_data), "fields"))
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating farm health data:", e$message), "ERROR")
|
||||
# Create an empty dataframe that can be filled by the verification chunk
|
||||
})
|
||||
```
|
||||
|
||||
```{r verify_farm_health_data, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Verify farm_health_data exists and has content
|
||||
if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
|
||||
safe_log("farm_health_data not found or empty, generating default data", "WARNING")
|
||||
|
||||
# Create minimal fallback data
|
||||
tryCatch({
|
||||
# Get fields from boundaries
|
||||
fields <- unique(AllPivots0$field)
|
||||
|
||||
# Create basic data frame with just field names
|
||||
farm_health_data <- data.frame(
|
||||
field = fields,
|
||||
mean_ci = rep(NA, length(fields)),
|
||||
ci_change = rep(NA, length(fields)),
|
||||
ci_uniformity = rep(NA, length(fields)),
|
||||
status = rep("Unknown", length(fields)),
|
||||
anomaly_type = rep("Unknown", length(fields)),
|
||||
priority_level = rep(5, length(fields)), # Low priority
|
||||
age_weeks = rep(NA, length(fields)),
|
||||
harvest_readiness = rep("Unknown", length(fields)),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
safe_log("Created fallback farm_health_data with basic field information")
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
|
||||
farm_health_data <<- data.frame(
|
||||
field = character(),
|
||||
mean_ci = numeric(),
|
||||
ci_change = numeric(),
|
||||
ci_uniformity = numeric(),
|
||||
status = character(),
|
||||
anomaly_type = character(),
|
||||
priority_level = numeric(),
|
||||
age_weeks = numeric(),
|
||||
harvest_readiness = character(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
```{r calculate_farm_health, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# Calculate farm health summary metrics
|
||||
tryCatch({
|
||||
# Generate farm health summary data
|
||||
farm_health_data <- generate_farm_health_summary(
|
||||
field_boundaries = AllPivots0,
|
||||
ci_current = CI,
|
||||
ci_previous = CI_m1,
|
||||
harvesting_data = harvesting_data
|
||||
)
|
||||
|
||||
# Log the summary data
|
||||
safe_log(paste("Generated farm health summary with", nrow(farm_health_data), "fields"))
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error in farm health calculation:", e$message), "ERROR")
|
||||
# Create empty dataframe if calculation failed
|
||||
farm_health_data <- data.frame(
|
||||
field = character(),
|
||||
mean_ci = numeric(),
|
||||
ci_change = numeric(),
|
||||
ci_uniformity = numeric(),
|
||||
status = character(),
|
||||
anomaly_type = character(),
|
||||
priority_level = numeric(),
|
||||
age_weeks = numeric(),
|
||||
harvest_readiness = character(),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
})
|
||||
```
|
||||
|
||||
```{r advanced_analytics_functions, message=FALSE, warning=FALSE, include=FALSE}
|
||||
# ADVANCED ANALYTICS FUNCTIONS
|
||||
# Note: These functions are now imported from executive_report_utils.R
|
||||
# The utility file contains functions for velocity/acceleration indicators,
|
||||
# anomaly timeline creation, age cohort mapping, and cohort performance charts
|
||||
safe_log("Using analytics functions from executive_report_utils.R")
|
||||
```
|
||||
|
||||
\pagebreak
|
||||
# Advanced Analytics
|
||||
|
||||
## Field Health Velocity and Acceleration
|
||||
|
||||
This visualization shows the rate of change in field health (velocity) and whether that change is speeding up or slowing down (acceleration). These metrics help identify if farm conditions are improving, stable, or deteriorating.
|
||||
|
||||
**How to interpret:**
|
||||
- **Velocity gauge:** Shows the average weekly change in CI values across all fields
|
||||
- Positive values (green/right side): Farm health improving week-to-week
|
||||
- Negative values (red/left side): Farm health declining week-to-week
|
||||
|
||||
- **Acceleration gauge:** Shows whether the rate of change is increasing or decreasing
|
||||
- Positive values (green/right side): Change is accelerating or improving faster
|
||||
- Negative values (red/left side): Change is decelerating or slowing down
|
||||
|
||||
- **4-Week Trend:** Shows the overall CI value trajectory for the past month
|
||||
|
||||
```{r render_velocity_acceleration, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
|
||||
# Render the velocity and acceleration indicators
|
||||
tryCatch({
|
||||
# Create and display the indicators using the imported utility function
|
||||
velocity_plot <- create_velocity_acceleration_indicator(
|
||||
health_data = farm_health_data,
|
||||
ci_current = CI,
|
||||
ci_prev1 = CI_m1,
|
||||
ci_prev2 = CI_m2,
|
||||
ci_prev3 = CI_m3,
|
||||
field_boundaries = AllPivots0
|
||||
)
|
||||
|
||||
# Print the visualization
|
||||
print(velocity_plot)
|
||||
|
||||
# Create a table of fields with significant velocity changes
|
||||
field_ci_metrics <- list()
|
||||
|
||||
# Process each field to get metrics
|
||||
fields <- unique(AllPivots0$field)
|
||||
for (field_name in fields) {
|
||||
tryCatch({
|
||||
# Get field boundary
|
||||
field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
|
||||
if (nrow(field_shape) == 0) next
|
||||
|
||||
# Extract CI values
|
||||
ci_curr_values <- terra::extract(CI, field_shape)
|
||||
ci_prev1_values <- terra::extract(CI_m1, field_shape)
|
||||
|
||||
# Calculate metrics
|
||||
mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
|
||||
mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
|
||||
velocity <- mean_ci_curr - mean_ci_prev1
|
||||
|
||||
# Store in list
|
||||
field_ci_metrics[[field_name]] <- list(
|
||||
field = field_name,
|
||||
ci_current = mean_ci_curr,
|
||||
ci_prev1 = mean_ci_prev1,
|
||||
velocity = velocity
|
||||
)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error processing field", field_name, "for velocity table:", e$message), "WARNING")
|
||||
})
|
||||
}
|
||||
|
||||
# Convert list to data frame
|
||||
velocity_df <- do.call(rbind, lapply(field_ci_metrics, function(x) {
|
||||
data.frame(
|
||||
field = x$field,
|
||||
ci_current = round(x$ci_current, 2),
|
||||
ci_prev1 = round(x$ci_prev1, 2),
|
||||
velocity = round(x$velocity, 2),
|
||||
direction = ifelse(x$velocity >= 0, "Improving", "Declining")
|
||||
)
|
||||
}))
|
||||
|
||||
# Select top 5 positive and top 5 negative velocity fields
|
||||
top_positive <- velocity_df %>%
|
||||
dplyr::filter(velocity > 0) %>%
|
||||
dplyr::arrange(desc(velocity)) %>%
|
||||
dplyr::slice_head(n = 5)
|
||||
|
||||
top_negative <- velocity_df %>%
|
||||
dplyr::filter(velocity < 0) %>%
|
||||
dplyr::arrange(velocity) %>%
|
||||
dplyr::slice_head(n = 5)
|
||||
|
||||
# Display the tables if we have data
|
||||
if (nrow(top_positive) > 0) {
|
||||
cat("<h4>Fields with Fastest Improvement</h4>")
|
||||
knitr::kable(top_positive %>%
|
||||
dplyr::select(Field = field,
|
||||
`Current CI` = ci_current,
|
||||
`Previous CI` = ci_prev1,
|
||||
`Weekly Change` = velocity))
|
||||
}
|
||||
|
||||
if (nrow(top_negative) > 0) {
|
||||
cat("<h4>Fields with Fastest Decline</h4>")
|
||||
knitr::kable(top_negative %>%
|
||||
dplyr::select(Field = field,
|
||||
`Current CI` = ci_current,
|
||||
`Previous CI` = ci_prev1,
|
||||
`Weekly Change` = velocity))
|
||||
}
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error rendering velocity visualization:", e$message), "ERROR")
|
||||
cat("<div class='alert alert-danger'>Error generating velocity visualization.</div>")
|
||||
})
|
||||
```
|
||||
|
||||
\pagebreak
|
||||
## Field Anomaly Timeline
|
||||
|
||||
This visualization shows the history of detected anomalies in fields across the monitoring period. It helps identify persistent issues or improvements over time.
|
||||
|
||||
**How to interpret:**
|
||||
- **X-axis**: Dates of satellite observations
|
||||
- **Y-axis**: Fields grouped by similar characteristics
|
||||
- **Colors**: Red indicates negative anomalies, green indicates positive anomalies
|
||||
- **Size**: Larger markers indicate stronger anomalies
|
||||
|
||||
```{r anomaly_timeline, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
|
||||
# Generate anomaly timeline visualization
|
||||
tryCatch({
|
||||
# Use the imported function to create the anomaly timeline
|
||||
anomaly_timeline <- create_anomaly_timeline(
|
||||
field_boundaries = AllPivots0,
|
||||
ci_data = CI_quadrant,
|
||||
days_to_include = 90 # Show last 90 days of data
|
||||
)
|
||||
|
||||
# Display the timeline
|
||||
print(anomaly_timeline)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error generating anomaly timeline:", e$message), "ERROR")
|
||||
cat("<div class='alert alert-danger'>Error generating anomaly timeline visualization.</div>")
|
||||
})
|
||||
```
|
||||
|
||||
\pagebreak
|
||||
## Field Age Cohorts Map
|
||||
|
||||
This map shows fields grouped by their crop age (weeks since planting). Understanding the distribution of crop ages helps interpret performance metrics and plan harvest scheduling.
|
||||
|
||||
**How to interpret:**
|
||||
- **Colors**: Different colors represent different age groups (in weeks since planting)
|
||||
- **Labels**: Each field is labeled with its name for easy reference
|
||||
- **Legend**: Shows the age ranges in weeks and their corresponding colors
|
||||
|
||||
```{r age_cohort_map, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
|
||||
# Generate age cohort map
|
||||
tryCatch({
|
||||
# Use the imported function to create the age cohort map
|
||||
age_cohort_map <- create_age_cohort_map(
|
||||
field_boundaries = AllPivots0,
|
||||
harvesting_data = harvesting_data
|
||||
)
|
||||
|
||||
# Display the map
|
||||
print(age_cohort_map)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error generating age cohort map:", e$message), "ERROR")
|
||||
cat("<div class='alert alert-danger'>Error generating age cohort map visualization.</div>")
|
||||
})
|
||||
```
|
||||
|
||||
\pagebreak
|
||||
## Cohort Performance Comparison
|
||||
|
||||
This visualization compares chlorophyll index (CI) performance across different age groups of fields. This helps identify if certain age groups are performing better or worse than expected.
|
||||
|
||||
**How to interpret:**
|
||||
- **X-axis**: Field age groups in weeks since planting
|
||||
- **Y-axis**: Average CI value for fields in that age group
|
||||
- **Box plots**: Show the distribution of CI values within each age group
|
||||
- **Line**: Shows the expected CI trajectory based on historical data
|
||||
|
||||
```{r cohort_performance_chart, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
|
||||
# Generate cohort performance comparison chart
|
||||
tryCatch({
|
||||
# Use the imported function to create the cohort performance chart
|
||||
cohort_chart <- create_cohort_performance_chart(
|
||||
field_boundaries = AllPivots0,
|
||||
ci_current = CI,
|
||||
harvesting_data = harvesting_data
|
||||
)
|
||||
|
||||
# Display the chart
|
||||
print(cohort_chart)
|
||||
|
||||
}, error = function(e) {
|
||||
safe_log(paste("Error generating cohort performance chart:", e$message), "ERROR")
|
||||
cat("<div class='alert alert-danger'>Error generating cohort performance visualization.</div>")
|
||||
})
|
||||
```
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load diff
437
r_app/experiments/executive_summary/executive_report_utils.R
Normal file
437
r_app/experiments/executive_summary/executive_report_utils.R
Normal file
|
|
@ -0,0 +1,437 @@
|
|||
# EXECUTIVE REPORT UTILITIES
|
||||
# This file contains functions for creating advanced visualizations for the executive summary report
|
||||
|
||||
#' Create a velocity and acceleration indicator for CI change
|
||||
#'
|
||||
#' @param health_data Current farm health data
|
||||
#' @param ci_current Current CI raster
|
||||
#' @param ci_prev1 CI raster from 1 week ago
|
||||
#' @param ci_prev2 CI raster from 2 weeks ago
|
||||
#' @param ci_prev3 CI raster from 3 weeks ago
|
||||
#' @param field_boundaries Field boundaries spatial data (sf object)
|
||||
#' @return A ggplot2 object with velocity and acceleration gauges
|
||||
#'
|
||||
create_velocity_acceleration_indicator <- function(health_data, ci_current, ci_prev1, ci_prev2, ci_prev3, field_boundaries) {
|
||||
tryCatch({
|
||||
# Calculate farm-wide metrics for multiple weeks
|
||||
mean_ci_current <- mean(health_data$mean_ci, na.rm = TRUE)
|
||||
|
||||
# Calculate previous week metrics
|
||||
# Extract CI values for previous weeks
|
||||
field_ci_metrics <- data.frame(field = character(),
|
||||
week_current = numeric(),
|
||||
week_minus_1 = numeric(),
|
||||
week_minus_2 = numeric(),
|
||||
week_minus_3 = numeric(),
|
||||
stringsAsFactors = FALSE)
|
||||
|
||||
# Process each field
|
||||
fields <- unique(field_boundaries$field)
|
||||
for (field_name in fields) {
|
||||
tryCatch({
|
||||
# Get field boundary
|
||||
field_shape <- field_boundaries %>% dplyr::filter(field == field_name)
|
||||
if (nrow(field_shape) == 0) next
|
||||
|
||||
# Extract CI values for all weeks
|
||||
ci_curr_values <- terra::extract(ci_current, field_shape)
|
||||
ci_prev1_values <- terra::extract(ci_prev1, field_shape)
|
||||
ci_prev2_values <- terra::extract(ci_prev2, field_shape)
|
||||
ci_prev3_values <- terra::extract(ci_prev3, field_shape)
|
||||
|
||||
# Calculate mean CI for each week
|
||||
mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
|
||||
mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
|
||||
mean_ci_prev2 <- mean(ci_prev2_values$CI, na.rm = TRUE)
|
||||
mean_ci_prev3 <- mean(ci_prev3_values$CI, na.rm = TRUE)
|
||||
|
||||
# Add to metrics table
|
||||
field_ci_metrics <- rbind(field_ci_metrics, data.frame(
|
||||
field = field_name,
|
||||
week_current = mean_ci_curr,
|
||||
week_minus_1 = mean_ci_prev1,
|
||||
week_minus_2 = mean_ci_prev2,
|
||||
week_minus_3 = mean_ci_prev3,
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
}, error = function(e) {
|
||||
message(paste("Error processing field", field_name, "for velocity indicator:", e$message))
|
||||
})
|
||||
}
|
||||
|
||||
# Calculate farm-wide averages
|
||||
farm_avg <- colMeans(field_ci_metrics[, c("week_current", "week_minus_1", "week_minus_2", "week_minus_3")], na.rm = TRUE)
|
||||
|
||||
# Calculate velocity (rate of change) - current week compared to last week
|
||||
velocity <- farm_avg["week_current"] - farm_avg["week_minus_1"]
|
||||
|
||||
# Calculate previous velocity (last week compared to two weeks ago)
|
||||
prev_velocity <- farm_avg["week_minus_1"] - farm_avg["week_minus_2"]
|
||||
|
||||
# Calculate acceleration (change in velocity)
|
||||
acceleration <- velocity - prev_velocity
|
||||
|
||||
# Prepare data for velocity gauge
|
||||
velocity_data <- data.frame(
|
||||
label = "Weekly CI Change",
|
||||
value = velocity
|
||||
)
|
||||
|
||||
# Prepare data for acceleration gauge
|
||||
acceleration_data <- data.frame(
|
||||
label = "Change Acceleration",
|
||||
value = acceleration
|
||||
)
|
||||
|
||||
# Create velocity trend data
|
||||
trend_data <- data.frame(
|
||||
week = c(-3, -2, -1, 0),
|
||||
ci_value = c(farm_avg["week_minus_3"], farm_avg["week_minus_2"],
|
||||
farm_avg["week_minus_1"], farm_avg["week_current"])
|
||||
)
|
||||
|
||||
# Create layout grid for the visualizations
|
||||
layout_matrix <- matrix(c(1, 1, 2, 2, 3, 3), nrow = 2, byrow = TRUE)
|
||||
|
||||
# Create velocity gauge
|
||||
velocity_gauge <- ggplot2::ggplot(velocity_data, ggplot2::aes(x = 0, y = 0)) +
|
||||
ggplot2::geom_arc_bar(ggplot2::aes(
|
||||
x0 = 0, y0 = 0,
|
||||
r0 = 0.5, r = 1,
|
||||
start = -pi/2, end = pi/2,
|
||||
fill = "background"
|
||||
), fill = "#f0f0f0") +
|
||||
ggplot2::geom_arc_bar(ggplot2::aes(
|
||||
x0 = 0, y0 = 0,
|
||||
r0 = 0.5, r = 1,
|
||||
start = -pi/2,
|
||||
end = -pi/2 + (pi * (0.5 + (velocity / 2))), # Scale to range -1 to +1
|
||||
fill = "velocity"
|
||||
), fill = ifelse(velocity >= 0, "#1a9850", "#d73027")) +
|
||||
ggplot2::geom_text(ggplot2::aes(label = sprintf("%.2f", velocity)),
|
||||
size = 8, fontface = "bold") +
|
||||
ggplot2::geom_text(ggplot2::aes(label = "Velocity"), y = -0.3, size = 4) +
|
||||
ggplot2::coord_fixed() +
|
||||
ggplot2::theme_void() +
|
||||
ggplot2::scale_fill_manual(values = c("background" = "#f0f0f0", "velocity" = "steelblue"),
|
||||
guide = "none") +
|
||||
ggplot2::annotate("text", x = -0.85, y = 0, label = "Declining",
|
||||
angle = 90, size = 3.5) +
|
||||
ggplot2::annotate("text", x = 0.85, y = 0, label = "Improving",
|
||||
angle = -90, size = 3.5) +
|
||||
ggplot2::labs(title = "Farm Health Velocity",
|
||||
subtitle = "CI change per week") +
|
||||
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
|
||||
plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 12))
|
||||
|
||||
# Create acceleration gauge
|
||||
acceleration_gauge <- ggplot2::ggplot(acceleration_data, ggplot2::aes(x = 0, y = 0)) +
|
||||
ggplot2::geom_arc_bar(ggplot2::aes(
|
||||
x0 = 0, y0 = 0,
|
||||
r0 = 0.5, r = 1,
|
||||
start = -pi/2, end = pi/2,
|
||||
fill = "background"
|
||||
), fill = "#f0f0f0") +
|
||||
ggplot2::geom_arc_bar(ggplot2::aes(
|
||||
x0 = 0, y0 = 0,
|
||||
r0 = 0.5, r = 1,
|
||||
start = -pi/2,
|
||||
end = -pi/2 + (pi * (0.5 + (acceleration / 1))), # Scale to range -0.5 to +0.5
|
||||
fill = "acceleration"
|
||||
), fill = ifelse(acceleration >= 0, "#1a9850", "#d73027")) +
|
||||
ggplot2::geom_text(ggplot2::aes(label = sprintf("%.2f", acceleration)),
|
||||
size = 8, fontface = "bold") +
|
||||
ggplot2::geom_text(ggplot2::aes(label = "Acceleration"), y = -0.3, size = 4) +
|
||||
ggplot2::coord_fixed() +
|
||||
ggplot2::theme_void() +
|
||||
ggplot2::scale_fill_manual(values = c("background" = "#f0f0f0", "acceleration" = "steelblue"),
|
||||
guide = "none") +
|
||||
ggplot2::annotate("text", x = -0.85, y = 0, label = "Slowing",
|
||||
angle = 90, size = 3.5) +
|
||||
ggplot2::annotate("text", x = 0.85, y = 0, label = "Accelerating",
|
||||
angle = -90, size = 3.5) +
|
||||
ggplot2::labs(title = "Change Acceleration",
|
||||
subtitle = "Increasing or decreasing trend") +
|
||||
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
|
||||
plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 12))
|
||||
|
||||
# Create trend chart
|
||||
trend_chart <- ggplot2::ggplot(trend_data, ggplot2::aes(x = week, y = ci_value)) +
|
||||
ggplot2::geom_line(size = 1.5, color = "steelblue") +
|
||||
ggplot2::geom_point(size = 3, color = "steelblue") +
|
||||
ggplot2::geom_hline(yintercept = trend_data$ci_value[1], linetype = "dashed", color = "gray50") +
|
||||
ggplot2::labs(
|
||||
title = "4-Week CI Trend",
|
||||
x = "Weeks from current",
|
||||
y = "Average CI Value"
|
||||
) +
|
||||
ggplot2::theme_minimal() +
|
||||
ggplot2::scale_x_continuous(breaks = c(-3, -2, -1, 0))
|
||||
|
||||
# Create table of top velocity changes
|
||||
field_ci_metrics$velocity <- field_ci_metrics$week_current - field_ci_metrics$week_minus_1
|
||||
top_velocity_fields <- field_ci_metrics %>%
|
||||
dplyr::arrange(desc(abs(velocity))) %>%
|
||||
dplyr::slice_head(n = 5) %>%
|
||||
dplyr::select(field, velocity) %>%
|
||||
dplyr::mutate(direction = ifelse(velocity >= 0, "Improving", "Declining"))
|
||||
|
||||
# Combine into multi-panel figure
|
||||
main_plot <- gridExtra::grid.arrange(
|
||||
gridExtra::grid.arrange(velocity_gauge, acceleration_gauge, ncol = 2),
|
||||
trend_chart,
|
||||
heights = c(1.5, 1),
|
||||
nrow = 2
|
||||
)
|
||||
|
||||
return(main_plot)
|
||||
|
||||
}, error = function(e) {
|
||||
message(paste("Error in create_velocity_acceleration_indicator:", e$message))
|
||||
return(ggplot2::ggplot() +
|
||||
ggplot2::annotate("text", x = 0, y = 0, label = paste("Error creating velocity indicator:", e$message)) +
|
||||
ggplot2::theme_void())
|
||||
})
|
||||
}
|
||||
|
||||
#' Generate a field health score based on CI values and trends
|
||||
#'
|
||||
#' @param ci_current Current CI raster
|
||||
#' @param ci_change CI change raster
|
||||
#' @param field_age_weeks Field age in weeks
|
||||
#' @return List containing score, status, and component scores
|
||||
#'
|
||||
generate_field_health_score <- function(ci_current, ci_change, field_age_weeks) {
|
||||
# Get mean CI value for the field
|
||||
mean_ci <- terra::global(ci_current, "mean", na.rm=TRUE)[[1]]
|
||||
|
||||
# Get mean CI change
|
||||
mean_change <- terra::global(ci_change, "mean", na.rm=TRUE)[[1]]
|
||||
|
||||
# Get CI uniformity (coefficient of variation)
|
||||
ci_sd <- terra::global(ci_current, "sd", na.rm=TRUE)[[1]]
|
||||
ci_uniformity <- ifelse(mean_ci > 0, ci_sd / mean_ci, 1)
|
||||
|
||||
# Calculate base score from current CI (scale 0-5)
|
||||
# Adjusted for crop age - expectations increase with age
|
||||
expected_ci <- min(5, field_age_weeks / 10) # Simple linear model
|
||||
ci_score <- max(0, min(5, 5 - 2 * abs(mean_ci - expected_ci)))
|
||||
|
||||
# Add points for positive change (scale 0-3)
|
||||
change_score <- max(0, min(3, 1 + mean_change))
|
||||
|
||||
# Add points for uniformity (scale 0-2)
|
||||
uniformity_score <- max(0, min(2, 2 * (1 - ci_uniformity)))
|
||||
|
||||
# Calculate total score (0-10)
|
||||
total_score <- ci_score + change_score + uniformity_score
|
||||
|
||||
# Create status label
|
||||
status <- dplyr::case_when(
|
||||
total_score >= 8 ~ "Excellent",
|
||||
total_score >= 6 ~ "Good",
|
||||
total_score >= 4 ~ "Fair",
|
||||
total_score >= 2 ~ "Needs Attention",
|
||||
TRUE ~ "Critical"
|
||||
)
|
||||
|
||||
# Return results
|
||||
return(list(
|
||||
score = round(total_score, 1),
|
||||
status = status,
|
||||
components = list(
|
||||
ci = round(ci_score, 1),
|
||||
change = round(change_score, 1),
|
||||
uniformity = round(uniformity_score, 1)
|
||||
)
|
||||
))
|
||||
}
|
||||
|
||||
#' Create an irrigation recommendation map
|
||||
#'
|
||||
#' @param ci_current Current CI raster
|
||||
#' @param ci_change CI change raster
|
||||
#' @param field_shape Field boundary shape
|
||||
#' @param title Map title
|
||||
#' @return A tmap object with irrigation recommendations
|
||||
#'
|
||||
create_irrigation_map <- function(ci_current, ci_change, field_shape, title = "Irrigation Priority Zones") {
|
||||
# Create a new raster for irrigation recommendations
|
||||
irrigation_priority <- ci_current * 0
|
||||
|
||||
# Extract values for processing
|
||||
ci_values <- terra::values(ci_current)
|
||||
change_values <- terra::values(ci_change)
|
||||
|
||||
# Create priority zones:
|
||||
# 3 = High priority (low CI, negative trend)
|
||||
# 2 = Medium priority (low CI but stable, or good CI with negative trend)
|
||||
# 1 = Low priority (watch, good CI with slight decline)
|
||||
# 0 = No action needed (good CI, stable/positive trend)
|
||||
priority_values <- rep(NA, length(ci_values))
|
||||
|
||||
# High priority: Low CI (< 2) and negative change (< 0)
|
||||
high_priority <- which(ci_values < 2 & change_values < 0 & !is.na(ci_values) & !is.na(change_values))
|
||||
priority_values[high_priority] <- 3
|
||||
|
||||
# Medium priority: Low CI (< 2) with stable/positive change, or moderate CI (2-4) with significant negative change (< -1)
|
||||
medium_priority <- which(
|
||||
(ci_values < 2 & change_values >= 0 & !is.na(ci_values) & !is.na(change_values)) |
|
||||
(ci_values >= 2 & ci_values < 4 & change_values < -1 & !is.na(ci_values) & !is.na(change_values))
|
||||
)
|
||||
priority_values[medium_priority] <- 2
|
||||
|
||||
# Low priority (watch): Moderate/good CI (>= 2) with mild negative change (-1 to 0)
|
||||
low_priority <- which(
|
||||
ci_values >= 2 & change_values < 0 & change_values >= -1 & !is.na(ci_values) & !is.na(change_values)
|
||||
)
|
||||
priority_values[low_priority] <- 1
|
||||
|
||||
# No action needed: Good CI (>= 2) with stable/positive change (>= 0)
|
||||
no_action <- which(ci_values >= 2 & change_values >= 0 & !is.na(ci_values) & !is.na(change_values))
|
||||
priority_values[no_action] <- 0
|
||||
|
||||
# Set values in the irrigation priority raster
|
||||
terra::values(irrigation_priority) <- priority_values
|
||||
|
||||
# Create the map
|
||||
tm_shape(irrigation_priority) +
|
||||
tm_raster(
|
||||
style = "cat",
|
||||
palette = c("#1a9850", "#91cf60", "#fc8d59", "#d73027"),
|
||||
labels = c("No Action", "Watch", "Medium Priority", "High Priority"),
|
||||
title = "Irrigation Need"
|
||||
) +
|
||||
tm_shape(field_shape) +
|
||||
tm_borders(lwd = 2) +
|
||||
tm_layout(
|
||||
main.title = title,
|
||||
legend.outside = FALSE,
|
||||
legend.position = c("left", "bottom")
|
||||
)
|
||||
}
|
||||
|
||||
#' Simple mock function to get weather data for a field
|
||||
#' In a real implementation, this would fetch data from a weather API
|
||||
#'
|
||||
#' @param start_date Start date for weather data
|
||||
#' @param end_date End date for weather data
|
||||
#' @param lat Latitude of the field center
|
||||
#' @param lon Longitude of the field center
|
||||
#' @return A data frame of weather data
|
||||
#'
|
||||
get_weather_data <- function(start_date, end_date, lat = -16.1, lon = 34.7) {
|
||||
# This is a mock implementation - in production, you'd replace with actual API call
|
||||
# to a service like OpenWeatherMap, NOAA, or other weather data provider
|
||||
|
||||
# Create date sequence
|
||||
dates <- seq.Date(from = as.Date(start_date), to = as.Date(end_date), by = "day")
|
||||
n_days <- length(dates)
|
||||
|
||||
# Generate some random but realistic weather data with seasonal patterns
|
||||
# More rain in summer, less in winter (Southern hemisphere)
|
||||
month_nums <- as.numeric(format(dates, "%m"))
|
||||
|
||||
# Simplified seasonal patterns - adjust for your local climate
|
||||
is_rainy_season <- month_nums %in% c(11, 12, 1, 2, 3, 4)
|
||||
|
||||
# Generate rainfall - more in rainy season, occasional heavy rainfall
|
||||
rainfall <- numeric(n_days)
|
||||
rainfall[is_rainy_season] <- pmax(0, rnorm(sum(is_rainy_season), mean = 4, sd = 8))
|
||||
rainfall[!is_rainy_season] <- pmax(0, rnorm(sum(!is_rainy_season), mean = 0.5, sd = 2))
|
||||
|
||||
# Add some rare heavy rainfall events
|
||||
heavy_rain_days <- sample(which(is_rainy_season), size = max(1, round(sum(is_rainy_season) * 0.1)))
|
||||
rainfall[heavy_rain_days] <- rainfall[heavy_rain_days] + runif(length(heavy_rain_days), 20, 50)
|
||||
|
||||
# Generate temperatures - seasonal variation
|
||||
temp_mean <- 18 + 8 * sin((month_nums - 1) * pi/6) # Peak in January (month 1)
|
||||
temp_max <- temp_mean + rnorm(n_days, mean = 5, sd = 1)
|
||||
temp_min <- temp_mean - rnorm(n_days, mean = 5, sd = 1)
|
||||
|
||||
# Create weather data frame
|
||||
weather_data <- data.frame(
|
||||
date = dates,
|
||||
rainfall_mm = round(rainfall, 1),
|
||||
temp_max_c = round(temp_max, 1),
|
||||
temp_min_c = round(temp_min, 1),
|
||||
temp_mean_c = round((temp_max + temp_min) / 2, 1)
|
||||
)
|
||||
|
||||
return(weather_data)
|
||||
}
|
||||
|
||||
#' Creates a weather summary visualization integrated with CI data
|
||||
#'
|
||||
#' @param pivotName Name of the pivot field
|
||||
#' @param ci_data CI quadrant data
|
||||
#' @param days_to_show Number of days of weather to show
|
||||
#' @return ggplot object
|
||||
#'
|
||||
create_weather_ci_plot <- function(pivotName, ci_data = CI_quadrant, days_to_show = 30) {
|
||||
# Get field data
|
||||
field_data <- ci_data %>%
|
||||
dplyr::filter(field == pivotName) %>%
|
||||
dplyr::arrange(Date) %>%
|
||||
dplyr::filter(!is.na(value))
|
||||
|
||||
if (nrow(field_data) == 0) {
|
||||
return(ggplot() +
|
||||
annotate("text", x = 0, y = 0, label = "No data available") +
|
||||
theme_void())
|
||||
}
|
||||
|
||||
# Get the latest date and 30 days before
|
||||
latest_date <- max(field_data$Date, na.rm = TRUE)
|
||||
start_date <- latest_date - days_to_show
|
||||
|
||||
# Filter for recent data only
|
||||
recent_field_data <- field_data %>%
|
||||
dplyr::filter(Date >= start_date)
|
||||
|
||||
# Get center point coordinates for the field (would be calculated from geometry in production)
|
||||
# This is mocked for simplicity
|
||||
lat <- -16.1 # Mock latitude
|
||||
lon <- 34.7 # Mock longitude
|
||||
|
||||
# Get weather data
|
||||
weather_data <- get_weather_data(start_date, latest_date, lat, lon)
|
||||
|
||||
# Aggregate CI data to daily mean across subfields if needed
|
||||
daily_ci <- recent_field_data %>%
|
||||
dplyr::group_by(Date) %>%
|
||||
dplyr::summarize(mean_ci = mean(value, na.rm = TRUE))
|
||||
|
||||
# Create combined plot with dual y-axis
|
||||
g <- ggplot() +
|
||||
# Rainfall as bars
|
||||
geom_col(data = weather_data, aes(x = date, y = rainfall_mm),
|
||||
fill = "#1565C0", alpha = 0.7, width = 0.7) +
|
||||
# CI as a line
|
||||
geom_line(data = daily_ci, aes(x = Date, y = mean_ci * 10),
|
||||
color = "#2E7D32", size = 1) +
|
||||
geom_point(data = daily_ci, aes(x = Date, y = mean_ci * 10),
|
||||
color = "#2E7D32", size = 2) +
|
||||
# Temperature range as ribbon
|
||||
geom_ribbon(data = weather_data,
|
||||
aes(x = date, ymin = temp_min_c, ymax = temp_max_c),
|
||||
fill = "#FF9800", alpha = 0.2) +
|
||||
# Primary y-axis (rainfall)
|
||||
scale_y_continuous(
|
||||
name = "Rainfall (mm)",
|
||||
sec.axis = sec_axis(~./10, name = "Chlorophyll Index & Temperature (°C)")
|
||||
) +
|
||||
labs(
|
||||
title = paste("Field", pivotName, "- Weather and CI Relationship"),
|
||||
subtitle = paste("Last", days_to_show, "days"),
|
||||
x = "Date"
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.title.y.left = element_text(color = "#1565C0"),
|
||||
axis.title.y.right = element_text(color = "#2E7D32"),
|
||||
legend.position = "bottom"
|
||||
)
|
||||
|
||||
return(g)
|
||||
}
|
||||
0
r_app/experiments/mosaic_creation_fixed.R
Normal file
0
r_app/experiments/mosaic_creation_fixed.R
Normal file
421
r_app/experiments/optimal_ci_analysis.R
Normal file
421
r_app/experiments/optimal_ci_analysis.R
Normal file
|
|
@ -0,0 +1,421 @@
|
|||
# Optimal CI Analysis - Day 30 CI values with quadratic fitting
|
||||
# Author: SmartCane Analysis Team
|
||||
# Date: 2025-06-12
|
||||
|
||||
# Load required libraries
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
library(readr)
|
||||
|
||||
# Set file path
|
||||
rds_file_path <- "C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane/laravel_app/storage/app/chemba/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds"
|
||||
|
||||
# Check if file exists
|
||||
if (!file.exists(rds_file_path)) {
|
||||
stop("RDS file not found at specified path: ", rds_file_path)
|
||||
}
|
||||
|
||||
# Load the data
|
||||
cat("Loading RDS file...\n")
|
||||
ci_data <- readRDS(rds_file_path)
|
||||
|
||||
# Display structure of the data to understand it better
|
||||
cat("Data structure:\n")
|
||||
str(ci_data)
|
||||
cat("\nFirst few rows:\n")
|
||||
head(ci_data)
|
||||
cat("\nColumn names:\n")
|
||||
print(colnames(ci_data))
|
||||
|
||||
# Filter data based on requirements
|
||||
cat("\nApplying data filters...\n")
|
||||
|
||||
# 1. Filter out models that don't reach at least DOY 300
|
||||
model_doy_max <- ci_data %>%
|
||||
group_by(model) %>%
|
||||
summarise(max_doy = max(DOY, na.rm = TRUE), .groups = 'drop')
|
||||
|
||||
valid_models <- model_doy_max %>%
|
||||
filter(max_doy >= 300) %>%
|
||||
pull(model)
|
||||
|
||||
cat(paste("Models before filtering:", n_distinct(ci_data$model), "\n"))
|
||||
cat(paste("Models reaching at least DOY 300:", length(valid_models), "\n"))
|
||||
|
||||
ci_data <- ci_data %>%
|
||||
filter(model %in% valid_models)
|
||||
|
||||
# 2. Apply IQR filtering per DOY (remove outliers outside IQR)
|
||||
cat("Applying IQR filtering per DOY...\n")
|
||||
original_rows <- nrow(ci_data)
|
||||
|
||||
ci_data <- ci_data %>%
|
||||
group_by(DOY) %>%
|
||||
mutate(
|
||||
Q1 = quantile(FitData, 0.25, na.rm = TRUE),
|
||||
Q3 = quantile(FitData, 0.75, na.rm = TRUE),
|
||||
IQR = Q3 - Q1,
|
||||
lower_bound = Q1 - 1.5 * IQR,
|
||||
upper_bound = Q3 + 1.5 * IQR
|
||||
) %>%
|
||||
filter(FitData >= lower_bound & FitData <= upper_bound) %>%
|
||||
select(-Q1, -Q3, -IQR, -lower_bound, -upper_bound) %>%
|
||||
ungroup()
|
||||
|
||||
filtered_rows <- nrow(ci_data)
|
||||
cat(paste("Rows before IQR filtering:", original_rows, "\n"))
|
||||
cat(paste("Rows after IQR filtering:", filtered_rows, "\n"))
|
||||
cat(paste("Removed", original_rows - filtered_rows, "outliers (",
|
||||
round(100 * (original_rows - filtered_rows) / original_rows, 1), "%)\n"))
|
||||
|
||||
# Check what day values are available after filtering
|
||||
if ("DOY" %in% colnames(ci_data)) {
|
||||
cat("\nUnique DOY values after filtering:\n")
|
||||
print(sort(unique(ci_data$DOY)))
|
||||
} else {
|
||||
cat("\nNo 'DOY' column found. Available columns:\n")
|
||||
print(colnames(ci_data))
|
||||
}
|
||||
|
||||
# Extract CI values at day 30 for each field
|
||||
cat("\nExtracting day 30 CI values...\n")
|
||||
|
||||
# Set column names based on known structure
|
||||
day_col <- "DOY"
|
||||
ci_col <- "FitData"
|
||||
field_col <- "model"
|
||||
|
||||
# Try different possible field column name combinations
|
||||
if ("field" %in% colnames(ci_data)) {
|
||||
field_col <- "field"
|
||||
} else if ("Field" %in% colnames(ci_data)) {
|
||||
field_col <- "Field"
|
||||
} else if ("pivot" %in% colnames(ci_data)) {
|
||||
field_col <- "pivot"
|
||||
} else if ("Pivot" %in% colnames(ci_data)) {
|
||||
field_col <- "Pivot"
|
||||
} else if ("field_id" %in% colnames(ci_data)) {
|
||||
field_col <- "field_id"
|
||||
} else if ("Field_ID" %in% colnames(ci_data)) {
|
||||
field_col <- "Field_ID"
|
||||
}
|
||||
|
||||
# Check if we found the required columns
|
||||
if (!("DOY" %in% colnames(ci_data))) {
|
||||
stop("DOY column not found in data")
|
||||
}
|
||||
if (!("FitData" %in% colnames(ci_data))) {
|
||||
stop("FitData column not found in data")
|
||||
}
|
||||
if (is.null(field_col)) {
|
||||
cat("Could not automatically identify field column. Please check column names.\n")
|
||||
cat("Available columns: ", paste(colnames(ci_data), collapse = ", "), "\n")
|
||||
stop("Manual field column identification required")
|
||||
}
|
||||
|
||||
cat(paste("Using columns - DOY:", day_col, "Field:", field_col, "FitData:", ci_col, "\n"))
|
||||
|
||||
# Extract day 30 data
|
||||
day_30_data <- ci_data %>%
|
||||
filter(DOY %% 30 == 0) %>%
|
||||
select(field = !!sym(field_col), ci = !!sym(ci_col), DOY) %>%
|
||||
na.omit() %>%
|
||||
arrange(field)
|
||||
|
||||
cat(paste("Found", nrow(day_30_data), "fields with day 30 CI values\n"))
|
||||
|
||||
if (nrow(day_30_data) == 0) {
|
||||
stop("No data found for day 30. Check if day 30 exists in the dataset.")
|
||||
}
|
||||
|
||||
# Display summary of day 30 data
|
||||
cat("\nSummary of day 30 CI values:\n")
|
||||
print(summary(day_30_data))
|
||||
|
||||
# Add field index for plotting (assuming fields represent some spatial or sequential order)
|
||||
#day_30_data$field_index <- 1:nrow(day_30_data)
|
||||
|
||||
# Create scatter plot
|
||||
p1 <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
|
||||
geom_point(color = "blue", size = 3, alpha = 0.7) +
|
||||
labs(
|
||||
title = "CI Values at Day 30 for All Fields",
|
||||
x = "Day of Year (DOY)",
|
||||
y = "CI Value",
|
||||
subtitle = paste("Total fields:", nrow(day_30_data))
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
plot.title = element_text(size = 14, face = "bold"),
|
||||
axis.title = element_text(size = 12),
|
||||
axis.text = element_text(size = 10)
|
||||
)
|
||||
|
||||
print(p1)
|
||||
|
||||
# Try multiple curve fitting approaches
|
||||
cat("\nTrying different curve fitting approaches...\n")
|
||||
|
||||
# Aggregate data by DOY (take mean CI for each DOY to reduce noise)
|
||||
aggregated_data <- day_30_data %>%
|
||||
group_by(DOY) %>%
|
||||
summarise(
|
||||
mean_ci = mean(ci, na.rm = TRUE),
|
||||
median_ci = median(ci, na.rm = TRUE),
|
||||
count = n(),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
filter(count >= 5) # Only keep DOY values with sufficient data points
|
||||
|
||||
cat(paste("Aggregated to", nrow(aggregated_data), "DOY points with sufficient data\n"))
|
||||
|
||||
# Create prediction data for smooth curves
|
||||
x_smooth <- seq(min(aggregated_data$DOY), max(aggregated_data$DOY), length.out = 100)
|
||||
|
||||
# 1. Quadratic model (as requested)
|
||||
cat("\n1. Fitting quadratic model...\n")
|
||||
quad_model <- lm(mean_ci ~ poly(DOY, 2, raw = TRUE), data = aggregated_data)
|
||||
quad_pred <- predict(quad_model, newdata = data.frame(DOY = x_smooth))
|
||||
quad_r2 <- summary(quad_model)$r.squared
|
||||
cat(paste("Quadratic R² =", round(quad_r2, 3), "\n"))
|
||||
|
||||
# 2. Logistic growth model: y = K / (1 + exp(-r*(x-x0))) - biologically realistic
|
||||
cat("\n2. Fitting logistic growth model...\n")
|
||||
tryCatch({
|
||||
# Estimate starting values
|
||||
K_start <- max(aggregated_data$mean_ci) * 1.1 # Carrying capacity
|
||||
r_start <- 0.05 # Growth rate
|
||||
x0_start <- mean(aggregated_data$DOY) # Inflection point
|
||||
|
||||
logistic_model <- nls(mean_ci ~ K / (1 + exp(-r * (DOY - x0))),
|
||||
data = aggregated_data,
|
||||
start = list(K = K_start, r = r_start, x0 = x0_start),
|
||||
control = nls.control(maxiter = 1000))
|
||||
|
||||
logistic_pred <- predict(logistic_model, newdata = data.frame(DOY = x_smooth))
|
||||
logistic_r2 <- 1 - sum(residuals(logistic_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
||||
cat(paste("Logistic growth R² =", round(logistic_r2, 3), "\n"))
|
||||
}, error = function(e) {
|
||||
cat("Logistic growth model failed to converge\n")
|
||||
logistic_model <- NULL
|
||||
logistic_pred <- NULL
|
||||
logistic_r2 <- NA
|
||||
})
|
||||
|
||||
# 3. Beta function model: good for crop growth curves with clear peak
|
||||
cat("\n3. Fitting Beta function model...\n")
|
||||
tryCatch({
|
||||
# Normalize DOY to 0-1 range for Beta function
|
||||
doy_min <- min(aggregated_data$DOY)
|
||||
doy_max <- max(aggregated_data$DOY)
|
||||
aggregated_data$doy_norm <- (aggregated_data$DOY - doy_min) / (doy_max - doy_min)
|
||||
|
||||
# Beta function: y = a * (x^(p-1)) * ((1-x)^(q-1)) + c
|
||||
beta_model <- nls(mean_ci ~ a * (doy_norm^(p-1)) * ((1-doy_norm)^(q-1)) + c,
|
||||
data = aggregated_data,
|
||||
start = list(a = max(aggregated_data$mean_ci) * 20, p = 2, q = 3, c = min(aggregated_data$mean_ci)),
|
||||
control = nls.control(maxiter = 1000))
|
||||
|
||||
# Predict on normalized scale then convert back
|
||||
x_smooth_norm <- (x_smooth - doy_min) / (doy_max - doy_min)
|
||||
beta_pred <- predict(beta_model, newdata = data.frame(doy_norm = x_smooth_norm))
|
||||
beta_r2 <- 1 - sum(residuals(beta_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
||||
cat(paste("Beta function R² =", round(beta_r2, 3), "\n"))
|
||||
}, error = function(e) {
|
||||
cat("Beta function model failed to converge\n")
|
||||
beta_model <- NULL
|
||||
beta_pred <- NULL
|
||||
beta_r2 <- NA
|
||||
})
|
||||
|
||||
# 4. Gaussian (normal) curve: good for symmetric growth patterns
|
||||
cat("\n4. Fitting Gaussian curve...\n")
|
||||
tryCatch({
|
||||
# Gaussian: y = a * exp(-((x-mu)^2)/(2*sigma^2)) + c
|
||||
gaussian_model <- nls(mean_ci ~ a * exp(-((DOY - mu)^2)/(2 * sigma^2)) + c,
|
||||
data = aggregated_data,
|
||||
start = list(a = max(aggregated_data$mean_ci),
|
||||
mu = aggregated_data$DOY[which.max(aggregated_data$mean_ci)],
|
||||
sigma = 50,
|
||||
c = min(aggregated_data$mean_ci)),
|
||||
control = nls.control(maxiter = 1000))
|
||||
|
||||
gaussian_pred <- predict(gaussian_model, newdata = data.frame(DOY = x_smooth))
|
||||
gaussian_r2 <- 1 - sum(residuals(gaussian_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
||||
cat(paste("Gaussian R² =", round(gaussian_r2, 3), "\n"))
|
||||
}, error = function(e) {
|
||||
cat("Gaussian model failed to converge\n")
|
||||
gaussian_model <- NULL
|
||||
gaussian_pred <- NULL
|
||||
gaussian_r2 <- NA
|
||||
})
|
||||
|
||||
# 5. LOESS (local regression) - for comparison
|
||||
cat("\n5. Fitting LOESS smoothing...\n")
|
||||
loess_model <- loess(mean_ci ~ DOY, data = aggregated_data, span = 0.5)
|
||||
loess_pred <- predict(loess_model, newdata = data.frame(DOY = x_smooth))
|
||||
loess_r2 <- 1 - sum(residuals(loess_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
|
||||
cat(paste("LOESS R² =", round(loess_r2, 3), "\n"))
|
||||
|
||||
# Calculate confidence intervals for both models
|
||||
cat("\nCalculating confidence intervals...\n")
|
||||
|
||||
# Function to calculate confidence intervals using residual-based method
|
||||
calculate_ci <- function(model, data, newdata, alpha = 0.5) {
|
||||
# Get model predictions
|
||||
pred_vals <- predict(model, newdata = newdata)
|
||||
|
||||
# For parametric models (lm), use built-in prediction intervals
|
||||
if(class(model)[1] == "lm") {
|
||||
pred_intervals <- predict(model, newdata = newdata, interval = "confidence", level = 1 - alpha)
|
||||
return(list(
|
||||
lower = pred_intervals[, "lwr"],
|
||||
upper = pred_intervals[, "upr"]
|
||||
))
|
||||
}
|
||||
|
||||
# For LOESS, calculate confidence intervals using residual bootstrap
|
||||
if(class(model)[1] == "loess") {
|
||||
# Calculate residuals from the original model
|
||||
fitted_vals <- fitted(model)
|
||||
residuals_vals <- residuals(model)
|
||||
residual_sd <- sd(residuals_vals, na.rm = TRUE)
|
||||
|
||||
# Use normal approximation for confidence intervals
|
||||
# For 50% CI, use 67% quantile (approximately 0.67 standard deviations)
|
||||
margin <- qnorm(1 - alpha/2) * residual_sd
|
||||
|
||||
return(list(
|
||||
lower = pred_vals - margin,
|
||||
upper = pred_vals + margin
|
||||
))
|
||||
}
|
||||
|
||||
# Fallback method
|
||||
residual_sd <- sd(residuals(model), na.rm = TRUE)
|
||||
margin <- qnorm(1 - alpha/2) * residual_sd
|
||||
return(list(
|
||||
lower = pred_vals - margin,
|
||||
upper = pred_vals + margin
|
||||
))
|
||||
}# Calculate CIs for quadratic model using aggregated data (same as model fitting)
|
||||
quad_ci <- calculate_ci(quad_model, aggregated_data, data.frame(DOY = x_smooth))
|
||||
|
||||
# Calculate CIs for LOESS model using aggregated data (same as model fitting)
|
||||
loess_ci <- calculate_ci(loess_model, aggregated_data, data.frame(DOY = x_smooth))
|
||||
|
||||
# Create separate plots for LOESS and Quadratic models
|
||||
cat("\nCreating LOESS plot with confidence intervals...\n")
|
||||
|
||||
# LOESS plot
|
||||
p_loess <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
|
||||
geom_point(color = "lightblue", size = 1.5, alpha = 0.4) +
|
||||
geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci),
|
||||
color = "darkblue", size = 3, alpha = 0.8) +
|
||||
geom_ribbon(data = data.frame(DOY = x_smooth,
|
||||
lower = loess_ci$lower,
|
||||
upper = loess_ci$upper),
|
||||
aes(x = DOY, ymin = lower, ymax = upper),
|
||||
alpha = 0.3, fill = "purple", inherit.aes = FALSE) +
|
||||
geom_line(data = data.frame(DOY = x_smooth, loess = loess_pred),
|
||||
aes(x = DOY, y = loess),
|
||||
color = "purple", size = 1.5) +
|
||||
labs(
|
||||
title = "LOESS Model - CI Values Over Growing Season",
|
||||
x = "Day of Year (DOY)",
|
||||
y = "CI Value",
|
||||
subtitle = paste("LOESS R² =", round(loess_r2, 3), "| 50% Confidence Interval")
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
plot.title = element_text(size = 14, face = "bold"),
|
||||
axis.title = element_text(size = 12),
|
||||
axis.text = element_text(size = 10)
|
||||
)
|
||||
|
||||
# Find optimal point for LOESS
|
||||
loess_max_idx <- which.max(loess_pred)
|
||||
loess_optimal_doy <- x_smooth[loess_max_idx]
|
||||
loess_optimal_ci <- loess_pred[loess_max_idx]
|
||||
|
||||
p_loess <- p_loess +
|
||||
geom_point(aes(x = loess_optimal_doy, y = loess_optimal_ci),
|
||||
color = "red", size = 5, shape = 8) +
|
||||
annotate("text",
|
||||
x = loess_optimal_doy + 30,
|
||||
y = loess_optimal_ci,
|
||||
label = paste("Optimal: DOY", round(loess_optimal_doy, 1), "\nCI =", round(loess_optimal_ci, 3)),
|
||||
color = "red", size = 4, fontface = "bold")
|
||||
|
||||
print(p_loess)
|
||||
|
||||
cat("\nCreating Quadratic plot with confidence intervals...\n")
|
||||
|
||||
# Quadratic plot
|
||||
p_quadratic <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
|
||||
geom_point(color = "lightcoral", size = 1.5, alpha = 0.4) +
|
||||
geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci),
|
||||
color = "darkred", size = 3, alpha = 0.8) +
|
||||
geom_ribbon(data = data.frame(DOY = x_smooth,
|
||||
lower = quad_ci$lower,
|
||||
upper = quad_ci$upper),
|
||||
aes(x = DOY, ymin = lower, ymax = upper),
|
||||
alpha = 0.3, fill = "red", inherit.aes = FALSE) +
|
||||
geom_line(data = data.frame(DOY = x_smooth, quadratic = quad_pred),
|
||||
aes(x = DOY, y = quadratic),
|
||||
color = "red", size = 1.5) +
|
||||
labs(
|
||||
title = "Quadratic Model - CI Values Over Growing Season",
|
||||
x = "Day of Year (DOY)",
|
||||
y = "CI Value",
|
||||
subtitle = paste("Quadratic R² =", round(quad_r2, 3), "| 50% Confidence Interval")
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
plot.title = element_text(size = 14, face = "bold"),
|
||||
axis.title = element_text(size = 12),
|
||||
axis.text = element_text(size = 10)
|
||||
)
|
||||
|
||||
# Find optimal point for Quadratic
|
||||
quad_coeffs <- coef(quad_model)
|
||||
a <- quad_coeffs[3]
|
||||
b <- quad_coeffs[2]
|
||||
|
||||
if(a != 0) {
|
||||
quad_optimal_doy <- -b / (2*a)
|
||||
doy_range <- range(aggregated_data$DOY)
|
||||
if(quad_optimal_doy >= doy_range[1] && quad_optimal_doy <= doy_range[2]) {
|
||||
quad_optimal_ci <- predict(quad_model, newdata = data.frame(DOY = quad_optimal_doy))
|
||||
} else {
|
||||
quad_optimal_doy <- x_smooth[which.max(quad_pred)]
|
||||
quad_optimal_ci <- max(quad_pred)
|
||||
}
|
||||
} else {
|
||||
quad_optimal_doy <- x_smooth[which.max(quad_pred)]
|
||||
quad_optimal_ci <- max(quad_pred)
|
||||
}
|
||||
|
||||
p_quadratic <- p_quadratic +
|
||||
geom_point(aes(x = quad_optimal_doy, y = quad_optimal_ci),
|
||||
color = "darkred", size = 5, shape = 8) +
|
||||
annotate("text",
|
||||
x = quad_optimal_doy + 30,
|
||||
y = quad_optimal_ci,
|
||||
label = paste("Optimal: DOY", round(quad_optimal_doy, 1), "\nCI =", round(quad_optimal_ci, 3)),
|
||||
color = "darkred", size = 4, fontface = "bold")
|
||||
|
||||
print(p_quadratic)
|
||||
print(p_loess)
|
||||
|
||||
# Print results summary
|
||||
cat("\n=== RESULTS SUMMARY ===\n")
|
||||
cat(paste("LOESS Model - R² =", round(loess_r2, 3), "\n"))
|
||||
cat(paste(" Optimal DOY:", round(loess_optimal_doy, 1), "\n"))
|
||||
cat(paste(" Optimal CI:", round(loess_optimal_ci, 4), "\n\n"))
|
||||
|
||||
cat(paste("Quadratic Model - R² =", round(quad_r2, 3), "\n"))
|
||||
cat(paste(" Optimal DOY:", round(quad_optimal_doy, 1), "\n"))
|
||||
cat(paste(" Optimal CI:", round(quad_optimal_ci, 4), "\n"))
|
||||
|
||||
|
||||
|
|
@ -1,15 +1,40 @@
|
|||
install.packages('CAST')
|
||||
install.packages("packages/CIprep_0.1.4.tar.gz",repos=NULL, type="source")
|
||||
install.packages('caret')
|
||||
install.packages('exactextractr')
|
||||
install.packages('googledrive')
|
||||
install.packages('here')
|
||||
install.packages('lubridate')
|
||||
install.packages('raster')
|
||||
install.packages('readxl')
|
||||
install.packages('rsample')
|
||||
install.packages('sf')
|
||||
install.packages('terra')
|
||||
install.packages('tidyverse')
|
||||
install.packages('tmap')
|
||||
install.packages('zoo')
|
||||
# Install required packages for SmartCane project
|
||||
# This script installs all packages needed to run the CI report dashboard
|
||||
|
||||
# List of required packages
|
||||
required_packages <- c(
|
||||
# Core packages
|
||||
"here", "tidyverse", "sf", "terra", "tmap", "lubridate",
|
||||
|
||||
# Additional data manipulation
|
||||
"zoo", "readxl", "knitr", "rmarkdown", "dplyr", "purrr", "stringr",
|
||||
|
||||
# Spatial analysis
|
||||
"exactextractr",
|
||||
|
||||
# Machine learning and statistics
|
||||
"rsample", "caret", "randomForest", "CAST"
|
||||
)
|
||||
|
||||
# Function to install missing packages
|
||||
install_if_missing <- function(pkg) {
|
||||
if (!requireNamespace(pkg, quietly = TRUE)) {
|
||||
message(paste("Installing package:", pkg))
|
||||
install.packages(pkg, repos = "https://cloud.r-project.org")
|
||||
} else {
|
||||
message(paste("Package already installed:", pkg))
|
||||
}
|
||||
}
|
||||
|
||||
# Install missing packages
|
||||
for (pkg in required_packages) {
|
||||
install_if_missing(pkg)
|
||||
}
|
||||
|
||||
# Load core packages to verify installation
|
||||
library(here)
|
||||
library(tidyverse)
|
||||
library(sf)
|
||||
library(terra)
|
||||
|
||||
message("All required packages have been installed!")
|
||||
|
|
|
|||
|
|
@ -32,15 +32,20 @@ main <- function() {
|
|||
message("No project_dir provided. Using default:", project_dir)
|
||||
}
|
||||
|
||||
# Make project_dir available globally so parameters_project.R can use it
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
# Initialize project configuration and load utility functions
|
||||
tryCatch({
|
||||
source("parameters_project.R")
|
||||
source("ci_extraction_utils.R")
|
||||
source("growth_model_utils.R")
|
||||
}, error = function(e) {
|
||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
||||
tryCatch({
|
||||
source("r_app/parameters_project.R")
|
||||
source("r_app/ci_extraction_utils.R")
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "growth_model_utils.R"))
|
||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
||||
|
||||
}, error = function(e) {
|
||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
||||
})
|
||||
|
|
@ -92,5 +97,6 @@ main <- function() {
|
|||
})
|
||||
}
|
||||
|
||||
# Run the main function if the script is executed directly
|
||||
main()
|
||||
if (sys.nframe() == 0) {
|
||||
main()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,19 +28,31 @@ main <- function() {
|
|||
# Capture command line arguments
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
# Process project_dir argument with default
|
||||
if (length(args) >= 3 && !is.na(args[3])) {
|
||||
project_dir <- as.character(args[3])
|
||||
} else {
|
||||
# Default project directory
|
||||
project_dir <- "chemba"
|
||||
message("No project_dir provided. Using default:", project_dir)
|
||||
}
|
||||
|
||||
# Make project_dir available globally so parameters_project.R can use it
|
||||
assign("project_dir", project_dir, envir = .GlobalEnv)
|
||||
|
||||
# Process end_date argument with default
|
||||
if (length(args) >= 1 && !is.na(args[1])) {
|
||||
end_date <- as.Date(args[1])
|
||||
if (is.na(end_date)) {
|
||||
message("Invalid end_date provided. Using current date.")
|
||||
#end_date <- Sys.Date()
|
||||
end_date <- "2023-10-01" # Default date for testing
|
||||
end_date <- Sys.Date()
|
||||
#end_date <- "2024-08-25" # Default date for testing
|
||||
}
|
||||
} else {
|
||||
# Default to current date if no argument is provided
|
||||
#end_date <- Sys.Date()
|
||||
end_date <- "2023-10-01" # Default date for testing
|
||||
message("No end_date provided. Using current date:", format(end_date))
|
||||
end_date <- Sys.Date()
|
||||
#end_date <- "2024-08-25" # Default date for testing
|
||||
message("No end_date provided. Using current date: ", format(end_date))
|
||||
}
|
||||
|
||||
# Process offset argument with default
|
||||
|
|
@ -56,25 +68,20 @@ main <- function() {
|
|||
message("No offset provided. Using default:", offset, "days")
|
||||
}
|
||||
|
||||
# Process project_dir argument with default
|
||||
if (length(args) >= 3 && !is.na(args[3])) {
|
||||
project_dir <- as.character(args[3])
|
||||
} else {
|
||||
# Default project directory
|
||||
project_dir <- "chemba"
|
||||
message("No project_dir provided. Using default:", project_dir)
|
||||
}
|
||||
|
||||
|
||||
# 3. Initialize project configuration
|
||||
# --------------------------------
|
||||
tryCatch({
|
||||
source("parameters_project.R")
|
||||
source("ci_extraction_utils.R")
|
||||
source("mosaic_creation_utils.R")
|
||||
safe_log(paste("Successfully sourced files from default directory."))
|
||||
}, error = function(e) {
|
||||
warning("Default source files not found. Attempting to source from 'r_app' directory.")
|
||||
tryCatch({
|
||||
source("r_app/parameters_project.R")
|
||||
source("r_app/ci_extraction_utils.R")
|
||||
source(here::here("r_app", "parameters_project.R"))
|
||||
source(here::here("r_app", "mosaic_creation_utils.R"))
|
||||
warning(paste("Successfully sourced files from 'r_app' directory."))
|
||||
}, error = function(e) {
|
||||
stop("Failed to source required files from both default and 'r_app' directories.")
|
||||
})
|
||||
|
|
@ -83,7 +90,7 @@ main <- function() {
|
|||
# 4. Generate date range for processing
|
||||
# ---------------------------------
|
||||
dates <- date_list(end_date, offset)
|
||||
log_message(paste("Processing data for week", dates$week, "of", dates$year))
|
||||
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
|
||||
|
||||
# Create output filename
|
||||
file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) {
|
||||
|
|
@ -92,7 +99,7 @@ main <- function() {
|
|||
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
|
||||
}
|
||||
|
||||
log_message(paste("Output will be saved as:", file_name_tif))
|
||||
safe_log(paste("Output will be saved as:", file_name_tif))
|
||||
|
||||
# 5. Create weekly mosaic using the function from utils
|
||||
# -------------------------------------------------
|
||||
|
|
@ -107,5 +114,6 @@ main <- function() {
|
|||
)
|
||||
}
|
||||
|
||||
# Run the main function if the script is executed directly
|
||||
main()
|
||||
if (sys.nframe() == 0) {
|
||||
main()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@
|
|||
# Utility functions for creating weekly mosaics from daily satellite imagery.
|
||||
# These functions support cloud cover assessment, date handling, and mosaic creation.
|
||||
|
||||
#' Safe logging function that works whether log_message exists or not
|
||||
#'
|
||||
#' Safe logging function
|
||||
#' @param message The message to log
|
||||
#' @param level The log level (default: "INFO")
|
||||
#' @return NULL (used for side effects)
|
||||
|
|
@ -158,34 +157,46 @@ count_cloud_coverage <- function(vrt_list, field_boundaries) {
|
|||
|
||||
tryCatch({
|
||||
# Calculate total pixel area using the first VRT file
|
||||
total_pix_area <- terra::rast(vrt_list[1]) %>%
|
||||
terra::subset(1) %>%
|
||||
terra::setValues(1) %>%
|
||||
terra::crop(field_boundaries, mask = TRUE) %>%
|
||||
total_pix_area <- terra::rast(vrt_list[1]) |>
|
||||
terra::subset(1) |>
|
||||
terra::setValues(1) |>
|
||||
terra::crop(field_boundaries, mask = TRUE) |>
|
||||
terra::global(fun = "notNA")
|
||||
|
||||
# Extract layer 1 from all rasters (for cloud detection)
|
||||
layer_5_list <- purrr::map(vrt_list, function(file) {
|
||||
terra::rast(file) %>% terra::subset(1)
|
||||
}) %>% terra::rast()
|
||||
# Process each raster to detect clouds and shadows
|
||||
processed_rasters <- list()
|
||||
cloud_masks <- list()
|
||||
|
||||
# Calculate percentage of missing pixels (clouds)
|
||||
missing_pixels_count <- terra::global(layer_5_list, fun = "notNA") %>%
|
||||
dplyr::mutate(
|
||||
total_pixels = total_pix_area$notNA,
|
||||
missing_pixels_percentage = round(100 - ((notNA / total_pix_area$notNA) * 100)),
|
||||
thres_5perc = as.integer(missing_pixels_percentage < 5),
|
||||
thres_40perc = as.integer(missing_pixels_percentage < 45)
|
||||
# Create data frame for missing pixels count
|
||||
missing_pixels_df <- data.frame(
|
||||
filename = vrt_list,
|
||||
notNA = numeric(length(vrt_list)),
|
||||
total_pixels = numeric(length(vrt_list)),
|
||||
missing_pixels_percentage = numeric(length(vrt_list)),
|
||||
thres_5perc = numeric(length(vrt_list)),
|
||||
thres_40perc = numeric(length(vrt_list))
|
||||
)
|
||||
# Fill in the data frame with missing pixel statistics
|
||||
for (i in seq_along(processed_rasters)) {
|
||||
notna_count <- terra::global(processed_rasters[[i]][[1]], fun = "notNA")$notNA
|
||||
missing_pixels_df$notNA[i] <- notna_count
|
||||
missing_pixels_df$total_pixels[i] <- total_pix_area$notNA
|
||||
missing_pixels_df$missing_pixels_percentage[i] <- round(100 - ((notna_count / total_pix_area$notNA) * 100))
|
||||
missing_pixels_df$thres_5perc[i] <- as.integer(missing_pixels_df$missing_pixels_percentage[i] < 5)
|
||||
missing_pixels_df$thres_40perc[i] <- as.integer(missing_pixels_df$missing_pixels_percentage[i] < 45)
|
||||
}
|
||||
|
||||
# Store processed rasters and cloud masks as attributes
|
||||
attr(missing_pixels_df, "cloud_masks") <- cloud_masks
|
||||
attr(missing_pixels_df, "processed_rasters") <- processed_rasters
|
||||
|
||||
# Log results
|
||||
safe_log(paste(
|
||||
"Cloud cover assessment completed for", length(vrt_list), "files.",
|
||||
sum(missing_pixels_count$thres_5perc), "files with <5% cloud cover,",
|
||||
sum(missing_pixels_count$thres_40perc), "files with <45% cloud cover"
|
||||
sum(missing_pixels_df$thres_5perc), "files with <5% cloud cover,",
|
||||
sum(missing_pixels_df$thres_40perc), "files with <45% cloud cover"
|
||||
))
|
||||
|
||||
return(missing_pixels_count)
|
||||
return(missing_pixels_df)
|
||||
}, error = function(e) {
|
||||
warning("Error in cloud coverage calculation: ", e$message)
|
||||
return(NULL)
|
||||
|
|
@ -209,8 +220,8 @@ create_mosaic <- function(vrt_list, missing_pixels_count, field_boundaries = NUL
|
|||
|
||||
safe_log("No images available for this period, creating empty mosaic", "WARNING")
|
||||
|
||||
x <- terra::rast(raster_files_final[1]) %>%
|
||||
terra::setValues(0) %>%
|
||||
x <- terra::rast(raster_files_final[1]) |>
|
||||
terra::setValues(0) |>
|
||||
terra::crop(field_boundaries, mask = TRUE)
|
||||
|
||||
names(x) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||
|
|
@ -227,6 +238,76 @@ create_mosaic <- function(vrt_list, missing_pixels_count, field_boundaries = NUL
|
|||
return(x)
|
||||
}
|
||||
|
||||
# Check if we have processed rasters from cloud detection
|
||||
processed_rasters <- attr(missing_pixels_count, "processed_rasters")
|
||||
cloud_masks <- attr(missing_pixels_count, "cloud_masks")
|
||||
|
||||
if (!is.null(processed_rasters) && length(processed_rasters) > 0) {
|
||||
safe_log("Using cloud-masked rasters for mosaic creation")
|
||||
|
||||
# Determine best rasters to use based on cloud coverage
|
||||
index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
|
||||
index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
|
||||
|
||||
# Create mosaic based on available cloud-free images
|
||||
if (sum(missing_pixels_count$thres_5perc) > 1) {
|
||||
safe_log("Creating max composite from multiple cloud-free images (<5% clouds)")
|
||||
|
||||
# Use the cloud-masked rasters instead of original files
|
||||
cloudy_rasters_list <- processed_rasters[index_5perc]
|
||||
rsrc <- terra::sprc(cloudy_rasters_list)
|
||||
x <- terra::mosaic(rsrc, fun = "max")
|
||||
|
||||
# Also create a composite mask showing where data is valid
|
||||
mask_list <- cloud_masks[index_5perc]
|
||||
mask_rsrc <- terra::sprc(mask_list)
|
||||
mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
|
||||
attr(x, "cloud_mask") <- mask_composite
|
||||
|
||||
} else if (sum(missing_pixels_count$thres_5perc) == 1) {
|
||||
safe_log("Using single cloud-free image (<5% clouds)")
|
||||
|
||||
# Use the cloud-masked raster
|
||||
x <- processed_rasters[[index_5perc[1]]]
|
||||
attr(x, "cloud_mask") <- cloud_masks[[index_5perc[1]]]
|
||||
|
||||
} else if (sum(missing_pixels_count$thres_40perc) > 1) {
|
||||
safe_log("Creating max composite from partially cloudy images (<40% clouds)", "WARNING")
|
||||
|
||||
# Use the cloud-masked rasters
|
||||
cloudy_rasters_list <- processed_rasters[index_40perc]
|
||||
rsrc <- terra::sprc(cloudy_rasters_list)
|
||||
x <- terra::mosaic(rsrc, fun = "max")
|
||||
|
||||
# Also create a composite mask
|
||||
mask_list <- cloud_masks[index_40perc]
|
||||
mask_rsrc <- terra::sprc(mask_list)
|
||||
mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
|
||||
attr(x, "cloud_mask") <- mask_composite
|
||||
|
||||
} else if (sum(missing_pixels_count$thres_40perc) == 1) {
|
||||
safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
|
||||
|
||||
# Use the cloud-masked raster
|
||||
x <- processed_rasters[[index_40perc[1]]]
|
||||
attr(x, "cloud_mask") <- cloud_masks[[index_40perc[1]]]
|
||||
|
||||
} else {
|
||||
safe_log("No cloud-free images available, using all cloud-masked images", "WARNING")
|
||||
|
||||
# Use all cloud-masked rasters
|
||||
rsrc <- terra::sprc(processed_rasters)
|
||||
x <- terra::mosaic(rsrc, fun = "max")
|
||||
|
||||
# Also create a composite mask
|
||||
mask_rsrc <- terra::sprc(cloud_masks)
|
||||
mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
|
||||
attr(x, "cloud_mask") <- mask_composite
|
||||
}
|
||||
} else {
|
||||
# Fall back to original behavior if no cloud-masked rasters available
|
||||
safe_log("No cloud-masked rasters available, using original images", "WARNING")
|
||||
|
||||
# Determine best rasters to use based on cloud coverage
|
||||
index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
|
||||
index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
|
||||
|
|
@ -262,6 +343,7 @@ create_mosaic <- function(vrt_list, missing_pixels_count, field_boundaries = NUL
|
|||
rsrc <- terra::sprc(vrt_list)
|
||||
x <- terra::mosaic(rsrc, fun = "max")
|
||||
}
|
||||
}
|
||||
|
||||
# Set consistent layer names
|
||||
names(x) <- c("Red", "Green", "Blue", "NIR", "CI")
|
||||
|
|
@ -288,18 +370,51 @@ save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALS
|
|||
# Create full file path
|
||||
file_path <- here::here(output_dir, file_name)
|
||||
|
||||
# Get cloud mask if it exists
|
||||
cloud_mask <- attr(mosaic_raster, "cloud_mask")
|
||||
|
||||
# Save raster
|
||||
terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE)
|
||||
|
||||
# Save cloud mask if available
|
||||
if (!is.null(cloud_mask)) {
|
||||
# Create mask filename by adding _mask before extension
|
||||
mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name)
|
||||
mask_file_path <- here::here(output_dir, mask_file_name)
|
||||
|
||||
# Save the mask
|
||||
terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE)
|
||||
safe_log(paste("Cloud/shadow mask saved to:", mask_file_path))
|
||||
}
|
||||
|
||||
# Create plots if requested
|
||||
if (plot_result) {
|
||||
# Plot the CI band
|
||||
if ("CI" %in% names(mosaic_raster)) {
|
||||
terra::plot(mosaic_raster$CI, main = paste("CI map", file_name))
|
||||
}
|
||||
|
||||
# Plot RGB image
|
||||
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) {
|
||||
terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name))
|
||||
}
|
||||
|
||||
# Plot cloud mask if available
|
||||
if (!is.null(cloud_mask)) {
|
||||
terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name),
|
||||
col = c("red", "green"))
|
||||
}
|
||||
|
||||
# If we have both RGB and cloud mask, create a side-by-side comparison
|
||||
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) {
|
||||
old_par <- par(mfrow = c(1, 2))
|
||||
terra::plotRGB(mosaic_raster, main = "RGB Image")
|
||||
|
||||
# Create a colored mask for visualization (red = cloud/shadow, green = clear)
|
||||
mask_plot <- cloud_mask
|
||||
terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green"))
|
||||
par(old_par)
|
||||
}
|
||||
}
|
||||
|
||||
# Log save completion
|
||||
|
|
|
|||
|
|
@ -55,10 +55,9 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
|
|||
#' @param week Week number to display in the title
|
||||
#' @param age Age of the crop in weeks
|
||||
#' @param borders Whether to display field borders (default: FALSE)
|
||||
#' @param use_breaks Whether to use breaks or continuous spectrum for the raster (default: TRUE)
|
||||
#' @return A tmap object with the CI map
|
||||
#'
|
||||
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, use_breaks = TRUE){
|
||||
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE){
|
||||
# Input validation
|
||||
if (missing(pivot_raster) || is.null(pivot_raster)) {
|
||||
stop("pivot_raster is required")
|
||||
|
|
@ -75,34 +74,17 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
|
|||
if (missing(age) || is.null(age)) {
|
||||
stop("age parameter is required")
|
||||
}
|
||||
|
||||
# Create the base map
|
||||
map <- tm_shape(pivot_raster, unit = "m")
|
||||
|
||||
# Add raster with either breaks or continuous spectrum based on parameter
|
||||
if (use_breaks) {
|
||||
map <- map + tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf),
|
||||
palette = "RdYlGn",
|
||||
legend.is.portrait = legend_is_portrait,
|
||||
midpoint = NA,
|
||||
title = "CI")
|
||||
} else {
|
||||
map <- map + tm_raster(palette = "RdYlGn",
|
||||
legend.is.portrait = legend_is_portrait,
|
||||
style = "cont", # Use continuous spectrum
|
||||
title = "CI")
|
||||
}
|
||||
|
||||
map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (fixed scale 1-8 for consistent comparison)
|
||||
map <- map + tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
|
||||
limits = c(1, 8)),
|
||||
col.legend = tm_legend(title = "CI",
|
||||
orientation = if(legend_is_portrait) "portrait" else "landscape",
|
||||
show = show_legend,
|
||||
position = c("left", "bottom")))
|
||||
# Add layout elements
|
||||
map <- map + tm_layout(main.title = paste0("Max CI week ", week,"\n", age, " weeks old"),
|
||||
main.title.size = 0.7,
|
||||
legend.show = show_legend,
|
||||
legend.position = c("left", "bottom"),
|
||||
# legend.width = 0.5,
|
||||
# legend.height = 0.5,
|
||||
# legend.text.size = 0.8,
|
||||
# legend.title.size = 0.9,
|
||||
legend.outside = FALSE)
|
||||
main.title.size = 0.7)
|
||||
|
||||
# Add borders if requested
|
||||
if (borders) {
|
||||
|
|
@ -128,10 +110,9 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
|
|||
#' @param week_2 Second week number for comparison
|
||||
#' @param age Age of the crop in weeks
|
||||
#' @param borders Whether to display field borders (default: TRUE)
|
||||
#' @param use_breaks Whether to use breaks or continuous spectrum for the raster (default: TRUE)
|
||||
#' @return A tmap object with the CI difference map
|
||||
#'
|
||||
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, use_breaks = TRUE){
|
||||
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE){
|
||||
# Input validation
|
||||
if (missing(pivot_raster) || is.null(pivot_raster)) {
|
||||
stop("pivot_raster is required")
|
||||
|
|
@ -148,35 +129,18 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
|
|||
if (missing(age) || is.null(age)) {
|
||||
stop("age parameter is required")
|
||||
}
|
||||
|
||||
# Create the base map
|
||||
map <- tm_shape(pivot_raster, unit = "m")
|
||||
|
||||
# Add raster with either breaks or continuous spectrum based on parameter
|
||||
if (use_breaks) {
|
||||
map <- map + tm_raster(breaks = c(-3,-2,-1,0,1,2,3),
|
||||
palette = "RdYlGn",
|
||||
legend.is.portrait = legend_is_portrait,
|
||||
map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale)
|
||||
map <- map + tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
|
||||
midpoint = 0,
|
||||
title = "CI difference")
|
||||
} else {
|
||||
map <- map + tm_raster(palette = "RdYlGn",
|
||||
legend.is.portrait = legend_is_portrait,
|
||||
style = "cont", # Use continuous spectrum
|
||||
midpoint = 0,
|
||||
title = "CI difference")
|
||||
}
|
||||
|
||||
limits = c(-3, 3)),
|
||||
col.legend = tm_legend(title = "CI difference",
|
||||
orientation = if(legend_is_portrait) "portrait" else "landscape",
|
||||
show = show_legend,
|
||||
position = c("left", "bottom")))
|
||||
# Add layout elements
|
||||
map <- map + tm_layout(main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks old"),
|
||||
main.title.size = 0.7,
|
||||
legend.show = show_legend,
|
||||
legend.position = c("left", "bottom"),
|
||||
# legend.width = 0.5,
|
||||
# legend.height = 0.5,
|
||||
# legend.text.size = 0.8,
|
||||
# legend.title.size = 0.9,
|
||||
legend.outside = FALSE)
|
||||
main.title.size = 0.7)
|
||||
|
||||
# Add borders if requested
|
||||
if (borders) {
|
||||
|
|
@ -205,7 +169,6 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
|
|||
#' @param week_minus_1 Previous week number
|
||||
#' @param week_minus_2 Two weeks ago week number
|
||||
#' @param week_minus_3 Three weeks ago week number
|
||||
#' @param use_breaks Whether to use discrete breaks or continuous spectrum (default: TRUE)
|
||||
#' @param borders Whether to display field borders (default: TRUE)
|
||||
#' @return NULL (adds output directly to R Markdown document)
|
||||
#'
|
||||
|
|
@ -221,7 +184,6 @@ ci_plot <- function(pivotName,
|
|||
week_minus_1 = week_minus_1,
|
||||
week_minus_2 = week_minus_2,
|
||||
week_minus_3 = week_minus_3,
|
||||
use_breaks = TRUE,
|
||||
borders = TRUE){
|
||||
# Input validation
|
||||
if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
|
||||
|
|
@ -282,35 +244,28 @@ ci_plot <- function(pivotName,
|
|||
|
||||
# Create spans for borders
|
||||
joined_spans2 <- field_boundaries %>%
|
||||
sf::st_transform(sf::st_crs(pivotShape)) %>%
|
||||
dplyr::filter(field %in% pivotName)
|
||||
sf::st_transform(sf::st_crs(pivotShape)) %>% dplyr::filter(field %in% pivotName)
|
||||
|
||||
# Create the maps for different timepoints
|
||||
CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2,
|
||||
show_legend = TRUE, legend_is_portrait = TRUE,
|
||||
week = week_minus_2, age = age - 2, borders = borders,
|
||||
use_breaks = use_breaks)
|
||||
week = week_minus_2, age = age - 2, borders = borders)
|
||||
|
||||
CImap_m1 <- create_CI_map(singlePivot_m1, AllPivots2, joined_spans2,
|
||||
show_legend = FALSE, legend_is_portrait = FALSE,
|
||||
week = week_minus_1, age = age - 1, borders = borders,
|
||||
use_breaks = use_breaks)
|
||||
week = week_minus_1, age = age - 1, borders = borders)
|
||||
|
||||
CImap <- create_CI_map(singlePivot, AllPivots2, joined_spans2,
|
||||
show_legend = FALSE, legend_is_portrait = FALSE,
|
||||
week = week, age = age, borders = borders,
|
||||
use_breaks = use_breaks)
|
||||
|
||||
week = week, age = age, borders = borders)
|
||||
# Create difference maps - only show legend on the second one to avoid redundancy
|
||||
CI_max_abs_last_week <- create_CI_diff_map(abs_CI_last_week, AllPivots2, joined_spans2,
|
||||
show_legend = FALSE, legend_is_portrait = TRUE,
|
||||
week_1 = week, week_2 = week_minus_1, age = age, borders = borders,
|
||||
use_breaks = use_breaks)
|
||||
show_legend = FALSE, legend_is_portrait = FALSE,
|
||||
week_1 = week, week_2 = week_minus_1, age = age, borders = borders)
|
||||
|
||||
CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2,
|
||||
show_legend = TRUE, legend_is_portrait = TRUE,
|
||||
week_1 = week, week_2 = week_minus_3, age = age, borders = borders,
|
||||
use_breaks = use_breaks)
|
||||
week_1 = week, week_2 = week_minus_3, age = age, borders = borders)
|
||||
|
||||
# Arrange the maps
|
||||
tst <- tmap_arrange(CImap_m2, CImap_m1, CImap, CI_max_abs_last_week, CI_max_abs_three_week, nrow = 1)
|
||||
|
|
|
|||
409
r_app/system_architecture.md
Normal file
409
r_app/system_architecture.md
Normal file
|
|
@ -0,0 +1,409 @@
|
|||
<!-- filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\system_architecture.md -->
|
||||
# SmartCane System Architecture
|
||||
|
||||
## Overview
|
||||
|
||||
The SmartCane system is a comprehensive agricultural intelligence platform that processes satellite imagery and farm data to provide agronomic insights for sugarcane farmers. The system architecture follows a modular, layered approach with clear separation of concerns between data acquisition, processing, and presentation.
|
||||
|
||||
## Architectural Layers
|
||||
|
||||
The SmartCane system follows a layered architecture pattern, which is a standard approach in software engineering for organizing complex systems. This architecture divides the system into distinct functional layers, each with specific responsibilities. While these layers aren't explicitly shown as separate visual elements in the diagrams, they help conceptualize how components are organized by their function:
|
||||
|
||||
|
||||
|
||||
### 1. Data Acquisition Layer
|
||||
- **Role**: Responsible for fetching raw data from external sources and user inputs
|
||||
- **Components**: Manual Sentinel Hub Requests, Python API Downloader, User Input Interface
|
||||
- **Functions**: Manual request setup on Sentinel Hub Requests Builder for specific client fields, connects to satellite data providers, downloads imagery, manages API credentials, performs preliminary data validation
|
||||
|
||||
### 2. Processing Layer (SmartCane Engine)
|
||||
- **Role**: Core analytical engine that transforms raw data into actionable insights
|
||||
- **Components**: Python API Downloader (pre-processing), R Processing Engine (analytics)
|
||||
- **Functions**: Image processing, cloud masking, crop index calculation, field boundary processing, statistical analysis, report generation
|
||||
|
||||
### 3. Presentation Layer
|
||||
- **Role**: Delivers insights to end users in accessible formats
|
||||
- **Components**: Laravel Web App, Email Delivery System
|
||||
- **Functions**: Interactive dashboards, visualization, report delivery, user management, project scheduling
|
||||
|
||||
### 4. Data Storage Layer
|
||||
- **Role**: Persists system data across processing cycles
|
||||
- **Components**: File System, Database
|
||||
- **Functions**: Stores raw imagery, processed rasters, analytical results, user data, configuration
|
||||
|
||||
## Key Subsystems
|
||||
|
||||
### 1. Python API Downloader
|
||||
- **Role**: Acquires and pre-processes satellite imagery
|
||||
- **Inputs**: API credentials, field boundaries, date parameters, evaluation scripts
|
||||
- **Outputs**: Raw satellite images, merged GeoTIFFs, virtual rasters
|
||||
- **Interfaces**: External satellite APIs (Planet via Sentinel Hub), file system
|
||||
- **Orchestration**: Triggered by shell scripts from the Laravel application
|
||||
|
||||
### 2. R Processing Engine
|
||||
- **Role**: Performs advanced analytics and generates insights
|
||||
- **Inputs**: Processed satellite imagery, field boundaries, harvest data, project parameters
|
||||
- **Outputs**: Crop indices, mosaics, RDS data files, agronomic reports
|
||||
- **Interfaces**: File system, report templates
|
||||
- **Orchestration**: Triggered by shell scripts from the Laravel application
|
||||
|
||||
### 3. Laravel Web Application
|
||||
- **Role**: Provides operator interface and orchestrates the overall system
|
||||
- **Inputs**: User data, configuration settings
|
||||
- **Outputs**: Web interface, scheduling, report delivery
|
||||
- **Interfaces**: Users, database, file system
|
||||
- **Orchestration**: Controls execution of the SmartCane Engine via shell scripts
|
||||
|
||||
### 4. Shell Script Orchestration
|
||||
- **Role**: Bridges between web application and processing components
|
||||
- **Functions**: Triggers processing workflows, manages execution environment, handles errors
|
||||
- **Examples**: runcane.sh, runpython.sh, build_mosaic.sh, build_report.sh
|
||||
|
||||
## Data Flow
|
||||
|
||||
1. **Input Stage**:
|
||||
- Operators (internal team) manually prepare and submit requests on Sentinel Hub Requests Builder for the specific fields of a client.
|
||||
- Operators (internal team) provide farm data (field boundaries, harvest data) via the Laravel Web App.
|
||||
- System schedules data acquisition for specific dates/regions
|
||||
|
||||
2. **Acquisition Stage**:
|
||||
- Laravel triggers Python API Downloader via shell scripts
|
||||
- Python connects to satellite data providers and downloads raw imagery
|
||||
- Downloaded data is stored in the file system
|
||||
|
||||
3. **Processing Stage**:
|
||||
- Laravel triggers R Processing Engine via shell scripts
|
||||
- R scripts read satellite imagery and farm data
|
||||
- Processing produces crop indices, analytics, and reports
|
||||
- Results are stored in the file system
|
||||
|
||||
4. **Output Stage**:
|
||||
- Laravel Web App accesses processed results
|
||||
- Reports are delivered to users via email
|
||||
|
||||
## System Integration Points
|
||||
|
||||
- **Python-R Integration**: Data handover via file system (GeoTIFF, virtual rasters)
|
||||
- **Engine-Laravel Integration**: Orchestration via shell scripts, data exchange via file system and database
|
||||
- **User-System Integration**: Web interface, file uploads, email notifications
|
||||
|
||||
## Developed/Customized Elements
|
||||
|
||||
- **Custom Cloud Masking Algorithm**: Specialized for agricultural applications in tropical regions
|
||||
- **Crop Index Extraction Pipeline**: Tailored to sugarcane spectral characteristics
|
||||
- **Reporting Templates**: Designed for agronomic decision support
|
||||
- **Shell Script Orchestration**: Custom workflow management for the system's components
|
||||
|
||||
## Strategic Role of Satellite Data
|
||||
|
||||
Satellite data is central to the SmartCane system, providing:
|
||||
- Regular, non-invasive field monitoring
|
||||
- Detection of spatial patterns not visible from ground level
|
||||
- Historical analysis of crop performance
|
||||
- Early warning of crop stress or disease
|
||||
- Quantification of field variability for precision agriculture
|
||||
|
||||
## Pilot Utilization Sites
|
||||
|
||||
The SmartCane system is currently operational in Mozambique, Kenya, and Tanzania. Future pilot deployments and expansions are planned for Uganda, Colombia, Mexico, Guatemala, South Africa, and Zambia.
|
||||
|
||||
---
|
||||
|
||||
## System Architecture Diagrams
|
||||
|
||||
Below are diagrams illustrating the system architecture from different perspectives.
|
||||
|
||||
### Overall System Architecture
|
||||
|
||||
This diagram provides a high-level overview of the complete SmartCane system, showing how major components interact. It focuses on the system boundaries and main data flows between the Python API Downloader, R Processing Engine, Laravel Web App, and data storage components. This view helps understand how the system works as a whole.
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A["fa:fa-satellite External Satellite Data Providers API"] --> PyDL["fa:fa-download Python API Downloader"];
|
||||
C["fa:fa-users Users: Farm Data Input e.g., GeoJSON, Excel"] --> D{"fa:fa-laptop-code Laravel Web App"};
|
||||
|
||||
subgraph SmartCane System
|
||||
PyDL --> G["fa:fa-folder-open File System: Raw Satellite Imagery, Rasters, RDS, Reports, Boundaries"];
|
||||
E["fa:fa-cogs R Processing Engine"] -- Reads --> G;
|
||||
E -- Writes --> G;
|
||||
|
||||
D -- Manages/Triggers --> F["fa:fa-terminal Shell Script Orchestration"];
|
||||
F -- Executes --> PyDL;
|
||||
F -- Executes --> E;
|
||||
|
||||
D -- Manages/Accesses --> G;
|
||||
D -- Reads/Writes --> H["fa:fa-database Database: Project Metadata, Users, Schedules"];
|
||||
|
||||
E -- Generates --> I["fa:fa-file-alt Agronomic Reports: DOCX, HTML"];
|
||||
D -- Accesses/Delivers --> I;
|
||||
end
|
||||
|
||||
D --> J["fa:fa-desktop Users: Web Interface (future)"];
|
||||
I -- Via Email (SMTP) --> K["fa:fa-envelope Users: Email Reports"];
|
||||
|
||||
style E fill:#f9f,stroke:#333,stroke-width:2px
|
||||
style D fill:#bbf,stroke:#333,stroke-width:2px
|
||||
style PyDL fill:#ffdd57,stroke:#333,stroke-width:2px
|
||||
```
|
||||
|
||||
### R Processing Engine Detail
|
||||
|
||||
This diagram zooms in on the R Processing Engine subsystem, detailing the internal components and data flow. It shows how raw satellite imagery and field data progress through various R scripts to produce crop indices and reports. The diagram highlights the data transformation pipeline within this analytical core of the SmartCane system.
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph R Processing Engine
|
||||
|
||||
direction TB
|
||||
|
||||
subgraph Inputs
|
||||
SatelliteImages["fa:fa-image Raw Satellite Imagery"]
|
||||
FieldBoundaries["fa:fa-map-marker-alt Field Boundaries .geojson"]
|
||||
HarvestData["fa:fa-file-excel Harvest Data .xlsx"]
|
||||
ProjectParams["fa:fa-file-code Project Parameters .R"]
|
||||
end
|
||||
|
||||
subgraph Core R Scripts & Processes
|
||||
ParamConfig("fa:fa-cogs parameters_project.R")
|
||||
MosaicScript("fa:fa-images mosaic_creation.R")
|
||||
CIExtractionScript("fa:fa-microscope ci_extraction.R")
|
||||
ReportUtils("fa:fa-tools executive_report_utils.R")
|
||||
DashboardRmd("fa:fa-tachometer-alt CI_report_dashboard_planet_enhanced.Rmd")
|
||||
SummaryRmd("fa:fa-list-alt CI_report_executive_summary.Rmd")
|
||||
end
|
||||
|
||||
subgraph Outputs
|
||||
WeeklyMosaics["fa:fa-file-image Weekly Mosaics .tif"]
|
||||
CIDataRDS["fa:fa-database CI Data .rds"]
|
||||
CIRasters["fa:fa-layer-group CI Rasters .tif"]
|
||||
DashboardReport["fa:fa-chart-bar Dashboard Report .docx/.html"]
|
||||
SummaryReport["fa:fa-file-invoice Executive Summary .docx/.html"]
|
||||
end
|
||||
|
||||
%% Data Flow
|
||||
ProjectParams --> ParamConfig;
|
||||
|
||||
SatelliteImages --> MosaicScript;
|
||||
FieldBoundaries --> MosaicScript;
|
||||
ParamConfig --> MosaicScript;
|
||||
MosaicScript --> WeeklyMosaics;
|
||||
|
||||
WeeklyMosaics --> CIExtractionScript;
|
||||
FieldBoundaries --> CIExtractionScript;
|
||||
ParamConfig --> CIExtractionScript;
|
||||
CIExtractionScript --> CIDataRDS;
|
||||
CIExtractionScript --> CIRasters;
|
||||
|
||||
CIDataRDS --> ReportUtils;
|
||||
CIRasters --> ReportUtils;
|
||||
HarvestData --> ReportUtils;
|
||||
ParamConfig --> ReportUtils;
|
||||
|
||||
ReportUtils --> DashboardRmd;
|
||||
ReportUtils --> SummaryRmd;
|
||||
ParamConfig --> DashboardRmd;
|
||||
ParamConfig --> SummaryRmd;
|
||||
|
||||
DashboardRmd --> DashboardReport;
|
||||
SummaryRmd --> SummaryReport;
|
||||
|
||||
end
|
||||
|
||||
ShellOrchestration["fa:fa-terminal Shell Scripts e.g., build_mosaic.sh, build_report.sh"] -->|Triggers| R_Processing_Engine["fa:fa-cogs R Processing Engine"]
|
||||
|
||||
style R_Processing_Engine fill:#f9f,stroke:#333,stroke-width:2px
|
||||
style Inputs fill:#ccf,stroke:#333,stroke-width:1px
|
||||
style Outputs fill:#cfc,stroke:#333,stroke-width:1px
|
||||
style Core_R_Scripts_Processes fill:#ffc,stroke:#333,stroke-width:1px
|
||||
```
|
||||
|
||||
### Python API Downloader Detail
|
||||
|
||||
This diagram focuses on the Python API Downloader subsystem, showing its internal components and workflow. It illustrates how API credentials, field boundaries, and other inputs are processed through various Python functions to download, process, and prepare satellite imagery. This view reveals the technical implementation details of the data acquisition layer.
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph Python API Downloader
|
||||
|
||||
direction TB
|
||||
|
||||
subgraph Inputs_Py [Inputs]
|
||||
APICreds["fa:fa-key API Credentials (SH_CLIENT_ID, SH_CLIENT_SECRET)"]
|
||||
DateRangeParams["fa:fa-calendar-alt Date Range Parameters (days_needed, specific_date)"]
|
||||
GeoJSONInput["fa:fa-map-marker-alt Field Boundaries (pivot.geojson)"]
|
||||
ProjectConfig["fa:fa-cogs Project Configuration (project_name, paths)"]
|
||||
EvalScripts["fa:fa-file-code Evalscripts (JS for cloud masking & band selection)"]
|
||||
end
|
||||
|
||||
subgraph Core_Python_Logic_Py [Core Python Logic & Libraries]
|
||||
SetupConfig["fa:fa-cog SentinelHubConfig & BYOC Definition"]
|
||||
DateSlotGen["fa:fa-calendar-check Date Slot Generation (slots)"]
|
||||
GeoProcessing["fa:fa-map GeoJSON Parsing & BBox Splitting (geopandas, BBoxSplitter)"]
|
||||
AvailabilityCheck["fa:fa-search-location Image Availability Check (SentinelHubCatalog)"]
|
||||
RequestHandler["fa:fa-paper-plane Request Generation (SentinelHubRequest, get_true_color_request_day)"]
|
||||
DownloadClient["fa:fa-cloud-download-alt Image Download (SentinelHubDownloadClient, download_function)"]
|
||||
MergeUtility["fa:fa-object-group Tile Merging (gdal.BuildVRT, gdal.Translate, merge_files)"]
|
||||
CleanupUtility["fa:fa-trash-alt Intermediate File Cleanup (empty_folders)"]
|
||||
end
|
||||
|
||||
subgraph Outputs_Py [Outputs]
|
||||
RawSatImages["fa:fa-file-image Raw Downloaded Satellite Imagery Tiles (response.tiff in dated subfolders)"]
|
||||
MergedTifs["fa:fa-images Merged TIFs (merged_tif/{slot}.tif)"]
|
||||
VirtualRasters["fa:fa-layer-group Virtual Rasters (merged_virtual/merged{slot}.vrt)"]
|
||||
DownloadLogs["fa:fa-file-alt Console Output Logs (print statements)"]
|
||||
end
|
||||
|
||||
ExternalSatAPI["fa:fa-satellite External Satellite Data Providers API (Planet via Sentinel Hub)"]
|
||||
|
||||
%% Data Flow for Python Downloader
|
||||
APICreds --> SetupConfig;
|
||||
DateRangeParams --> DateSlotGen;
|
||||
GeoJSONInput --> GeoProcessing;
|
||||
ProjectConfig --> SetupConfig;
|
||||
ProjectConfig --> GeoProcessing;
|
||||
ProjectConfig --> MergeUtility;
|
||||
ProjectConfig --> CleanupUtility;
|
||||
EvalScripts --> RequestHandler;
|
||||
|
||||
DateSlotGen -- Available Slots --> AvailabilityCheck;
|
||||
GeoProcessing -- BBox List --> AvailabilityCheck;
|
||||
SetupConfig --> AvailabilityCheck;
|
||||
AvailabilityCheck -- Filtered Slots & BBoxes --> RequestHandler;
|
||||
|
||||
RequestHandler -- Download Requests --> DownloadClient;
|
||||
SetupConfig --> DownloadClient;
|
||||
DownloadClient -- Downloads Data From --> ExternalSatAPI;
|
||||
ExternalSatAPI -- Returns Image Data --> DownloadClient;
|
||||
DownloadClient -- Writes --> RawSatImages;
|
||||
DownloadClient -- Generates --> DownloadLogs;
|
||||
|
||||
RawSatImages --> MergeUtility;
|
||||
MergeUtility -- Writes --> MergedTifs;
|
||||
MergeUtility -- Writes --> VirtualRasters;
|
||||
|
||||
end
|
||||
|
||||
ShellOrchestratorPy["fa:fa-terminal Shell Scripts (e.g., runpython.sh triggering planet_download.ipynb)"] -->|Triggers| Python_API_Downloader["fa:fa-download Python API Downloader"];
|
||||
|
||||
style Python_API_Downloader fill:#ffdd57,stroke:#333,stroke-width:2px
|
||||
style Inputs_Py fill:#cdeeff,stroke:#333,stroke-width:1px
|
||||
style Outputs_Py fill:#d4efdf,stroke:#333,stroke-width:1px
|
||||
style Core_Python_Logic_Py fill:#fff5cc,stroke:#333,stroke-width:1px
|
||||
style ExternalSatAPI fill:#f5b7b1,stroke:#333,stroke-width:2px
|
||||
```
|
||||
|
||||
### SmartCane Engine Integration Diagram
|
||||
|
||||
This diagram illustrates the integration of Python and R components within the SmartCane Engine. Unlike the first diagram that shows the overall system, this one specifically focuses on how the two processing components interact with each other and the rest of the system. It emphasizes the orchestration layer and data flows between the core processing components and external systems.
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
%% External Systems & Users
|
||||
Users_DataInput["fa:fa-user Users: Farm Data Input (GeoJSON, Excel, etc.)"] --> Laravel_WebApp;
|
||||
ExternalSatAPI["fa:fa-satellite External Satellite Data Providers API"];
|
||||
|
||||
%% Main Application Components
|
||||
Laravel_WebApp["fa:fa-globe Laravel Web App (Frontend & Control Plane)"];
|
||||
Shell_Orchestration["fa:fa-terminal Shell Script Orchestration (e.g., runcane.sh, runpython.sh, build_mosaic.sh)"]; subgraph SmartCane_Engine ["SmartCane Engine (Data Processing Core)"]
|
||||
direction TB
|
||||
Python_Downloader["fa:fa-download Python API Downloader"];
|
||||
R_Engine["fa:fa-chart-line R Processing Engine"];
|
||||
end
|
||||
%% Data Storage
|
||||
FileSystem["fa:fa-folder File System (Raw Imagery, Rasters, RDS, Reports, Boundaries)"];
|
||||
Database["fa:fa-database Database (Project Metadata, Users, Schedules)"];
|
||||
|
||||
%% User Outputs
|
||||
Users_WebView["fa:fa-desktop Users: Web Interface (future)"];
|
||||
Users_EmailReports["fa:fa-envelope Users: Email Reports (Agronomic Reports)"];
|
||||
AgronomicReports["fa:fa-file-alt Agronomic Reports (DOCX, HTML)"];
|
||||
|
||||
%% --- Data Flows & Interactions ---
|
||||
|
||||
%% Laravel to Orchestration & Engine
|
||||
Laravel_WebApp -- Manages/Triggers --> Shell_Orchestration;
|
||||
Shell_Orchestration -- Executes --> Python_Downloader;
|
||||
Shell_Orchestration -- Executes --> R_Engine;
|
||||
|
||||
%% Python Downloader within Engine
|
||||
ExternalSatAPI -- Satellite Data --> Python_Downloader;
|
||||
Python_Downloader -- Writes Raw Data --> FileSystem;
|
||||
%% Inputs to Python (simplified for this view - details in Python-specific diagram)
|
||||
%% Laravel_WebApp -- Provides Config/Boundaries --> Python_Downloader;
|
||||
|
||||
|
||||
%% R Engine within Engine
|
||||
%% Inputs to R (simplified - details in R-specific diagram)
|
||||
%% Laravel_WebApp -- Provides Config/Boundaries --> R_Engine;
|
||||
R_Engine -- Reads Processed Data/Imagery --> FileSystem;
|
||||
R_Engine -- Writes Derived Products --> FileSystem;
|
||||
R_Engine -- Generates --> AgronomicReports;
|
||||
|
||||
%% Laravel interaction with Data Storage
|
||||
Laravel_WebApp -- Manages/Accesses --> FileSystem;
|
||||
Laravel_WebApp -- Reads/Writes --> Database;
|
||||
|
||||
%% Output Delivery
|
||||
Laravel_WebApp --> Users_WebView;
|
||||
AgronomicReports --> Users_EmailReports;
|
||||
%% Assuming a mechanism like SMTP, potentially triggered by Laravel or R-Engine completion
|
||||
Laravel_WebApp -- Delivers/Displays --> AgronomicReports;
|
||||
|
||||
|
||||
%% Styling
|
||||
style SmartCane_Engine fill:#e6ffe6,stroke:#333,stroke-width:2px
|
||||
style Python_Downloader fill:#ffdd57,stroke:#333,stroke-width:2px
|
||||
style R_Engine fill:#f9f,stroke:#333,stroke-width:2px
|
||||
style Laravel_WebApp fill:#bbf,stroke:#333,stroke-width:2px
|
||||
style Shell_Orchestration fill:#f0ad4e,stroke:#333,stroke-width:2px
|
||||
style FileSystem fill:#d1e0e0,stroke:#333,stroke-width:1px
|
||||
style Database fill:#d1e0e0,stroke:#333,stroke-width:1px
|
||||
style ExternalSatAPI fill:#f5b7b1,stroke:#333,stroke-width:2px
|
||||
style AgronomicReports fill:#d4efdf,stroke:#333,stroke-width:1px
|
||||
```
|
||||
|
||||
## Future Directions
|
||||
|
||||
The SmartCane platform is poised for significant evolution, with several key enhancements and new capabilities planned to further empower users and expand its utility:
|
||||
|
||||
- **Advanced Management Dashboard**: Development of a more comprehensive and interactive management dashboard to provide users with deeper insights and greater control over their operations.
|
||||
- **Enhanced Yield Prediction Models**: Improving the accuracy and granularity of yield predictions by incorporating more variables and advanced machine learning techniques.
|
||||
- **Integrated Weather and Irrigation Advice**: Leveraging weather forecast data and soil moisture information (potentially from new data sources) to provide precise irrigation scheduling and weather-related agronomic advice.
|
||||
- **AI-Guided Agronomic Advice**: Implementing sophisticated AI algorithms to analyze integrated data (satellite, weather, soil, farm practices) and offer tailored, actionable agronomic recommendations.
|
||||
- **Automated Advice Generation**: Developing capabilities for the system to automatically generate and disseminate critical advice and alerts to users based on real-time data analysis.
|
||||
- **Expanded Data Source Integration**:
|
||||
- **Radar Data**: Incorporating radar satellite imagery (e.g., Sentinel-1) for all-weather monitoring capabilities, particularly useful during cloudy seasons for assessing crop structure, soil moisture, and biomass.
|
||||
- **IoT and Ground Sensors**: Integrating data from in-field IoT devices and soil sensors for highly localized and continuous monitoring of environmental and soil conditions.
|
||||
- **Client-Facing Portal**: Exploration and potential development of a client-facing portal to allow end-users direct access to their data, dashboards, and reports, complementing the current internal management interface.
|
||||
|
||||
These future developments aim to transform SmartCane into an even more powerful decision support system, fostering sustainable and efficient agricultural practices.
|
||||
|
||||
## Conclusion and Integration Summary
|
||||
|
||||
The SmartCane system architecture demonstrates a well-integrated solution that combines different technologies and subsystems to solve complex agricultural challenges. Here is a summary of how the key subsystems work together:
|
||||
|
||||
### Subsystem Integration
|
||||
|
||||
1. **Data Flow Sequence**
|
||||
- The Laravel Web App initiates the workflow and manages user interactions
|
||||
- Shell scripts orchestrate the execution sequence of the processing subsystems
|
||||
- The Python API Downloader acquires raw data from external sources
|
||||
- The R Processing Engine transforms this data into actionable insights
|
||||
- Results flow back to users through the web interface and email reports
|
||||
|
||||
2. **Technology Integration**
|
||||
- **Python + R**: Different programming languages are leveraged for their respective strengths—Python for API communication and data acquisition, R for statistical analysis and report generation
|
||||
- **Laravel + Processing Engine**: Clear separation between web presentation layer and computational backend
|
||||
- **File System + Database**: Hybrid data storage approach with file system for imagery and reports, database for metadata and user information
|
||||
|
||||
3. **Key Integration Mechanisms**
|
||||
- **File System Bridge**: The different subsystems primarily communicate through standardized file formats (GeoTIFF, GeoJSON, RDS, DOCX)
|
||||
- **Shell Script Orchestration**: Acts as the "glue" between subsystems, ensuring proper execution sequence and environment setup
|
||||
- **Standardized Data Formats**: Use of widely-accepted geospatial and data formats enables interoperability
|
||||
|
||||
4. **Extensibility and Scalability**
|
||||
- The modular architecture allows for replacement or enhancement of individual components
|
||||
- The clear subsystem boundaries enable parallel development and testing
|
||||
- Standard interfaces simplify integration of new data sources, algorithms, or output methods
|
||||
|
||||
The SmartCane architecture balances complexity with maintainability by using well-established technologies and clear boundaries between subsystems. The separation of concerns between data acquisition, processing, and presentation layers ensures that changes in one area minimally impact others, while the consistent data flow pathways ensure that information moves smoothly through the system.
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
# test_date_functions.R
|
||||
#
|
||||
# Tests for date-related functions in ci_extraction_utils.R
|
||||
#
|
||||
|
||||
# Load the test framework
|
||||
source("tests/test_framework.R")
|
||||
|
||||
# Set up test environment
|
||||
env <- setup_test_env()
|
||||
|
||||
# Load the functions to test
|
||||
source("../ci_extraction_utils.R")
|
||||
|
||||
# Test the date_list function
|
||||
test_that("date_list creates correct date sequences", {
|
||||
# Test with a specific date and offset
|
||||
dates <- date_list(as.Date("2023-01-15"), 7)
|
||||
|
||||
# Check the structure
|
||||
expect_type(dates, "list")
|
||||
expect_equal(names(dates), c("week", "year", "days_filter", "start_date", "end_date"))
|
||||
|
||||
# Check the values
|
||||
expect_equal(dates$week, lubridate::week(as.Date("2023-01-09")))
|
||||
expect_equal(dates$year, 2023)
|
||||
expect_equal(dates$start_date, as.Date("2023-01-09"))
|
||||
expect_equal(dates$end_date, as.Date("2023-01-15"))
|
||||
expect_equal(length(dates$days_filter), 7)
|
||||
expect_equal(dates$days_filter[1], "2023-01-09")
|
||||
expect_equal(dates$days_filter[7], "2023-01-15")
|
||||
|
||||
# Test with a different offset
|
||||
dates_short <- date_list(as.Date("2023-01-15"), 3)
|
||||
expect_equal(length(dates_short$days_filter), 3)
|
||||
expect_equal(dates_short$days_filter, c("2023-01-13", "2023-01-14", "2023-01-15"))
|
||||
|
||||
# Test with string date
|
||||
dates_string <- date_list("2023-01-15", 5)
|
||||
expect_equal(dates_string$days_filter,
|
||||
c("2023-01-11", "2023-01-12", "2023-01-13", "2023-01-14", "2023-01-15"))
|
||||
|
||||
# Test error handling
|
||||
expect_error(date_list("invalid-date", 7),
|
||||
"Invalid end_date provided")
|
||||
expect_error(date_list("2023-01-15", -1),
|
||||
"Invalid offset provided")
|
||||
})
|
||||
|
||||
# Test the date_extract function
|
||||
test_that("date_extract correctly extracts dates from file paths", {
|
||||
# Test with various file path formats
|
||||
expect_equal(date_extract("/some/path/2023-01-15_image.tif"), "2023-01-15")
|
||||
expect_equal(date_extract("path/to/planet_2023-01-15.tif"), "2023-01-15")
|
||||
expect_equal(date_extract("c:\\path\\with\\windows\\2023-01-15_file.tif"), "2023-01-15")
|
||||
expect_equal(date_extract("2023-01-15.tif"), "2023-01-15")
|
||||
expect_equal(date_extract("prefix-2023-01-15-suffix.tif"), "2023-01-15")
|
||||
|
||||
# Test with invalid file paths
|
||||
expect_warning(result <- date_extract("no-date-here.tif"), "Could not extract date")
|
||||
expect_true(is.na(result))
|
||||
})
|
||||
|
||||
# Clean up
|
||||
teardown_test_env()
|
||||
|
||||
# Print success message
|
||||
cat("Date function tests completed successfully\n")
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
# test_framework.R
|
||||
#
|
||||
# TEST FRAMEWORK FOR SMARTCANE
|
||||
# ===========================
|
||||
# This script provides a simple testing framework for the SmartCane project.
|
||||
# It includes utilities for setting up test environments and running tests.
|
||||
#
|
||||
|
||||
# Install required packages if not available
|
||||
if (!require("testthat", quietly = TRUE)) {
|
||||
install.packages("testthat", repos = "https://cran.rstudio.com/")
|
||||
}
|
||||
library(testthat)
|
||||
|
||||
# Define paths for testing
|
||||
test_root <- file.path(normalizePath(".."), "tests")
|
||||
test_data_dir <- file.path(test_root, "test_data")
|
||||
|
||||
# Create test directories if they don't exist
|
||||
dir.create(test_data_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# Set up a test environment with all necessary data
|
||||
setup_test_env <- function() {
|
||||
# Add working directory to the path
|
||||
.libPaths(c(.libPaths(), normalizePath("..")))
|
||||
|
||||
# Source required files with minimal dependencies
|
||||
tryCatch({
|
||||
source(file.path(normalizePath(".."), "packages.R"))
|
||||
skip_package_loading <- TRUE
|
||||
|
||||
# Load minimal dependencies for tests
|
||||
required_packages <- c("lubridate", "stringr", "purrr", "dplyr", "testthat")
|
||||
for (pkg in required_packages) {
|
||||
if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
|
||||
warning(paste("Package", pkg, "not available, some tests may fail"))
|
||||
}
|
||||
}
|
||||
}, error = function(e) {
|
||||
warning("Error loading dependencies: ", e$message)
|
||||
})
|
||||
|
||||
# Set up test logging
|
||||
assign("log_message", function(message, level = "INFO") {
|
||||
cat(paste0("[", level, "] ", message, "\n"))
|
||||
}, envir = .GlobalEnv)
|
||||
|
||||
# Create a mock project structure
|
||||
test_project <- list(
|
||||
project_dir = "test_project",
|
||||
data_dir = test_data_dir,
|
||||
daily_CI_vals_dir = file.path(test_data_dir, "extracted_ci", "daily_vals"),
|
||||
cumulative_CI_vals_dir = file.path(test_data_dir, "extracted_ci", "cumulative_vals"),
|
||||
merged_final = file.path(test_data_dir, "merged_final"),
|
||||
daily_vrt = file.path(test_data_dir, "daily_vrt")
|
||||
)
|
||||
|
||||
# Create the directories
|
||||
for (dir in test_project) {
|
||||
if (is.character(dir)) {
|
||||
dir.create(dir, recursive = TRUE, showWarnings = FALSE)
|
||||
}
|
||||
}
|
||||
|
||||
return(test_project)
|
||||
}
|
||||
|
||||
# Clean up test environment
|
||||
teardown_test_env <- function() {
|
||||
# Clean up only test-created files if needed
|
||||
# We'll leave the main directories for inspection
|
||||
}
|
||||
|
||||
# Run all tests in a directory
|
||||
run_tests <- function(pattern = "^test_.+\\.R$") {
|
||||
test_files <- list.files(
|
||||
path = test_root,
|
||||
pattern = pattern,
|
||||
full.names = TRUE
|
||||
)
|
||||
|
||||
# Exclude this file
|
||||
test_files <- test_files[!grepl("test_framework\\.R$", test_files)]
|
||||
|
||||
if (length(test_files) == 0) {
|
||||
cat("No test files found matching pattern:", pattern, "\n")
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
cat("Found", length(test_files), "test files:\n")
|
||||
cat(paste(" -", basename(test_files)), sep = "\n")
|
||||
cat("\n")
|
||||
|
||||
# Run each test file
|
||||
results <- lapply(test_files, function(file) {
|
||||
cat("Running tests in:", basename(file), "\n")
|
||||
tryCatch({
|
||||
source(file, local = TRUE)
|
||||
cat("✓ Tests completed\n\n")
|
||||
TRUE
|
||||
}, error = function(e) {
|
||||
cat("✗ Error:", e$message, "\n\n")
|
||||
FALSE
|
||||
})
|
||||
})
|
||||
|
||||
# Summary
|
||||
success_count <- sum(unlist(results))
|
||||
cat("\nTest Summary:", success_count, "of", length(test_files),
|
||||
"test files completed successfully\n")
|
||||
|
||||
return(all(unlist(results)))
|
||||
}
|
||||
|
||||
# If this script is run directly, run all tests
|
||||
if (!interactive() && (basename(sys.frame(1)$ofile) == "test_framework.R")) {
|
||||
setup_test_env()
|
||||
run_tests()
|
||||
teardown_test_env()
|
||||
}
|
||||
|
|
@ -1,280 +0,0 @@
|
|||
# test_report_utils.R
|
||||
#
|
||||
# Tests for visualization functions in report_utils.R
|
||||
#
|
||||
|
||||
# Load the test framework
|
||||
source("tests/test_framework.R")
|
||||
|
||||
# Set up test environment
|
||||
env <- setup_test_env()
|
||||
|
||||
# Required libraries for testing
|
||||
library(testthat)
|
||||
library(terra)
|
||||
library(sf)
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
|
||||
# Load the functions to test
|
||||
source("../report_utils.R")
|
||||
|
||||
# Create mock data for testing
|
||||
create_mock_data <- function() {
|
||||
# Create a simple raster for testing
|
||||
r <- terra::rast(nrows=10, ncols=10, xmin=0, xmax=10, ymin=0, ymax=10, vals=1:100)
|
||||
names(r) <- "CI"
|
||||
|
||||
# Create a simple field boundary
|
||||
field_boundaries <- sf::st_sf(
|
||||
field = c("Field1", "Field2"),
|
||||
sub_field = c("A", "B"),
|
||||
geometry = sf::st_sfc(
|
||||
sf::st_polygon(list(rbind(c(1,1), c(5,1), c(5,5), c(1,5), c(1,1)))),
|
||||
sf::st_polygon(list(rbind(c(6,6), c(9,6), c(9,9), c(6,9), c(6,6))))
|
||||
),
|
||||
crs = sf::st_crs(r)
|
||||
)
|
||||
|
||||
# Create mock harvest data
|
||||
harvesting_data <- data.frame(
|
||||
field = c("Field1", "Field2"),
|
||||
sub_field = c("A", "B"),
|
||||
age = c(100, 150),
|
||||
season_start = as.Date(c("2023-01-01", "2023-02-01")),
|
||||
year = c(2023, 2023)
|
||||
)
|
||||
|
||||
# Create mock CI quadrant data
|
||||
ci_quadrant <- data.frame(
|
||||
field = rep(c("Field1", "Field2"), each=10),
|
||||
sub_field = rep(c("A", "B"), each=10),
|
||||
Date = rep(seq(as.Date("2023-01-01"), by="week", length.out=10), 2),
|
||||
DOY = rep(1:10, 2),
|
||||
cumulative_CI = rep(cumsum(1:10), 2),
|
||||
value = rep(1:10, 2),
|
||||
season = rep(2023, 20),
|
||||
model = rep(c("northwest", "northeast", "southwest", "southeast"), 5)
|
||||
)
|
||||
|
||||
return(list(
|
||||
raster = r,
|
||||
field_boundaries = field_boundaries,
|
||||
harvesting_data = harvesting_data,
|
||||
ci_quadrant = ci_quadrant
|
||||
))
|
||||
}
|
||||
|
||||
# Test the create_CI_map function
|
||||
test_that("create_CI_map creates a valid tmap object", {
|
||||
mock_data <- create_mock_data()
|
||||
|
||||
# Test with all required parameters
|
||||
map <- create_CI_map(
|
||||
pivot_raster = mock_data$raster,
|
||||
pivot_shape = mock_data$field_boundaries[1,],
|
||||
pivot_spans = mock_data$field_boundaries[1,],
|
||||
week = "01",
|
||||
age = 10,
|
||||
borders = TRUE,
|
||||
use_breaks = TRUE
|
||||
)
|
||||
|
||||
# Check if it returned a tmap object
|
||||
expect_true("tmap" %in% class(map))
|
||||
|
||||
# Test with missing parameters
|
||||
expect_error(create_CI_map(pivot_shape = mock_data$field_boundaries[1,],
|
||||
pivot_spans = mock_data$field_boundaries[1,],
|
||||
week = "01", age = 10),
|
||||
"pivot_raster is required")
|
||||
|
||||
expect_error(create_CI_map(pivot_raster = mock_data$raster,
|
||||
pivot_spans = mock_data$field_boundaries[1,],
|
||||
week = "01", age = 10),
|
||||
"pivot_shape is required")
|
||||
})
|
||||
|
||||
# Test the create_CI_diff_map function
|
||||
test_that("create_CI_diff_map creates a valid tmap object", {
|
||||
mock_data <- create_mock_data()
|
||||
|
||||
# Test with all required parameters
|
||||
map <- create_CI_diff_map(
|
||||
pivot_raster = mock_data$raster,
|
||||
pivot_shape = mock_data$field_boundaries[1,],
|
||||
pivot_spans = mock_data$field_boundaries[1,],
|
||||
week_1 = "01",
|
||||
week_2 = "02",
|
||||
age = 10,
|
||||
borders = TRUE,
|
||||
use_breaks = TRUE
|
||||
)
|
||||
|
||||
# Check if it returned a tmap object
|
||||
expect_true("tmap" %in% class(map))
|
||||
|
||||
# Test with missing parameters
|
||||
expect_error(create_CI_diff_map(pivot_shape = mock_data$field_boundaries[1,],
|
||||
pivot_spans = mock_data$field_boundaries[1,],
|
||||
week_1 = "01", week_2 = "02", age = 10),
|
||||
"pivot_raster is required")
|
||||
|
||||
expect_error(create_CI_diff_map(pivot_raster = mock_data$raster,
|
||||
pivot_spans = mock_data$field_boundaries[1,],
|
||||
week_1 = "01", age = 10),
|
||||
"week_1 and week_2 parameters are required")
|
||||
})
|
||||
|
||||
# Test the ci_plot function
|
||||
test_that("ci_plot handles input parameters correctly", {
|
||||
mock_data <- create_mock_data()
|
||||
|
||||
# Capture output to avoid cluttering the test output
|
||||
temp_file <- tempfile()
|
||||
sink(temp_file)
|
||||
|
||||
# Test with all required parameters - should not throw an error
|
||||
expect_error(
|
||||
ci_plot(
|
||||
pivotName = "Field1",
|
||||
field_boundaries = mock_data$field_boundaries,
|
||||
current_ci = mock_data$raster,
|
||||
ci_minus_1 = mock_data$raster,
|
||||
ci_minus_2 = mock_data$raster,
|
||||
last_week_diff = mock_data$raster,
|
||||
three_week_diff = mock_data$raster,
|
||||
harvesting_data = mock_data$harvesting_data,
|
||||
week = "01",
|
||||
week_minus_1 = "52",
|
||||
week_minus_2 = "51",
|
||||
week_minus_3 = "50",
|
||||
use_breaks = TRUE,
|
||||
borders = TRUE
|
||||
),
|
||||
NA # Expect no error
|
||||
)
|
||||
|
||||
# Test with missing parameters
|
||||
expect_error(
|
||||
ci_plot(),
|
||||
"pivotName is required"
|
||||
)
|
||||
|
||||
# Test with invalid field name
|
||||
expect_error(
|
||||
ci_plot(
|
||||
pivotName = "NonExistentField",
|
||||
field_boundaries = mock_data$field_boundaries,
|
||||
current_ci = mock_data$raster,
|
||||
ci_minus_1 = mock_data$raster,
|
||||
ci_minus_2 = mock_data$raster,
|
||||
last_week_diff = mock_data$raster,
|
||||
three_week_diff = mock_data$raster,
|
||||
harvesting_data = mock_data$harvesting_data
|
||||
),
|
||||
regexp = NULL # We expect some error related to the field not being found
|
||||
)
|
||||
|
||||
# Reset output
|
||||
sink()
|
||||
unlink(temp_file)
|
||||
})
|
||||
|
||||
# Test the cum_ci_plot function
|
||||
test_that("cum_ci_plot handles input parameters correctly", {
|
||||
mock_data <- create_mock_data()
|
||||
|
||||
# Capture output to avoid cluttering the test output
|
||||
temp_file <- tempfile()
|
||||
sink(temp_file)
|
||||
|
||||
# Test with all required parameters - should not throw an error
|
||||
expect_error(
|
||||
cum_ci_plot(
|
||||
pivotName = "Field1",
|
||||
ci_quadrant_data = mock_data$ci_quadrant,
|
||||
plot_type = "value",
|
||||
facet_on = FALSE,
|
||||
x_unit = "days"
|
||||
),
|
||||
NA # Expect no error
|
||||
)
|
||||
|
||||
# Test with different plot types
|
||||
expect_error(
|
||||
cum_ci_plot(
|
||||
pivotName = "Field1",
|
||||
ci_quadrant_data = mock_data$ci_quadrant,
|
||||
plot_type = "CI_rate"
|
||||
),
|
||||
NA # Expect no error
|
||||
)
|
||||
|
||||
expect_error(
|
||||
cum_ci_plot(
|
||||
pivotName = "Field1",
|
||||
ci_quadrant_data = mock_data$ci_quadrant,
|
||||
plot_type = "cumulative_CI"
|
||||
),
|
||||
NA # Expect no error
|
||||
)
|
||||
|
||||
# Test with invalid plot type
|
||||
expect_error(
|
||||
cum_ci_plot(
|
||||
pivotName = "Field1",
|
||||
ci_quadrant_data = mock_data$ci_quadrant,
|
||||
plot_type = "invalid_type"
|
||||
),
|
||||
"plot_type must be one of: 'value', 'CI_rate', or 'cumulative_CI'"
|
||||
)
|
||||
|
||||
# Test with missing parameters
|
||||
expect_error(
|
||||
cum_ci_plot(),
|
||||
"pivotName is required"
|
||||
)
|
||||
|
||||
# Reset output
|
||||
sink()
|
||||
unlink(temp_file)
|
||||
})
|
||||
|
||||
# Test the get_week_path function
|
||||
test_that("get_week_path returns correct path", {
|
||||
# Test with valid inputs
|
||||
path <- get_week_path(
|
||||
mosaic_path = "ci_max_mosaics",
|
||||
input_date = "2023-01-15",
|
||||
week_offset = 0
|
||||
)
|
||||
|
||||
# Extract the week number and year from the path
|
||||
expect_match(path, "week_02_2023\\.tif$", all = FALSE) # Week 2 of 2023
|
||||
|
||||
# Test with offset
|
||||
path_minus_1 <- get_week_path(
|
||||
mosaic_path = "ci_max_mosaics",
|
||||
input_date = "2023-01-15",
|
||||
week_offset = -1
|
||||
)
|
||||
expect_match(path_minus_1, "week_01_2023\\.tif$", all = FALSE)
|
||||
|
||||
# Test with missing parameters
|
||||
expect_error(
|
||||
get_week_path(input_date = "2023-01-15", week_offset = 0),
|
||||
"mosaic_path is required"
|
||||
)
|
||||
|
||||
expect_error(
|
||||
get_week_path(mosaic_path = "ci_max_mosaics", week_offset = 0),
|
||||
"input_date is required"
|
||||
)
|
||||
})
|
||||
|
||||
# Clean up
|
||||
teardown_test_env()
|
||||
|
||||
# Print success message
|
||||
cat("Report utility function tests completed successfully\n")
|
||||
Loading…
Reference in a new issue