Fix CI report pipeline: update tmap v4 syntax, add continuous color scales, fix formatting

- Updated all CI maps to use tm_scale_continuous() for proper tmap v4 compatibility
- Added fixed color scale limits (1-8 for CI, -3 to +3 for differences) for consistent field comparison
- Fixed YAML header formatting issues in CI_report_dashboard_planet.Rmd
- Positioned RGB map before CI overview map as requested
- Removed all obsolete use_breaks parameter references
- Enhanced error handling and logging throughout the pipeline
- Added new experimental analysis scripts and improvements to mosaic creation
This commit is contained in:
Timon 2025-06-19 20:37:20 +02:00
parent bb2a599075
commit 6efcc8cfec
36 changed files with 7601 additions and 3273 deletions

513
.Rhistory
View file

@ -1 +1,512 @@
install.packages("xfun") })
# Verify farm_health_data exists and has content
if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
safe_log("farm_health_data not found or empty, generating default data", "WARNING")
# Create minimal fallback data
tryCatch({
# Get fields from boundaries
fields <- unique(AllPivots0$field)
# Create basic data frame with just field names
farm_health_data <- data.frame(
field = fields,
mean_ci = rep(NA, length(fields)),
ci_change = rep(NA, length(fields)),
ci_uniformity = rep(NA, length(fields)),
status = rep("Unknown", length(fields)),
anomaly_type = rep("Unknown", length(fields)),
priority_level = rep(5, length(fields)), # Low priority
age_weeks = rep(NA, length(fields)),
harvest_readiness = rep("Unknown", length(fields)),
stringsAsFactors = FALSE
)
safe_log("Created fallback farm_health_data with basic field information")
}, error = function(e) {
safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
farm_health_data <<- data.frame(
field = character(),
mean_ci = numeric(),
ci_change = numeric(),
ci_uniformity = numeric(),
status = character(),
anomaly_type = character(),
priority_level = numeric(),
age_weeks = numeric(),
harvest_readiness = character(),
stringsAsFactors = FALSE
)
})
}
# Calculate farm health summary metrics
tryCatch({
# Generate farm health summary data
farm_health_data <- generate_farm_health_summary(
field_boundaries = AllPivots0,
ci_current = CI,
ci_previous = CI_m1,
harvesting_data = harvesting_data
)
# Log the summary data
safe_log(paste("Generated farm health summary with", nrow(farm_health_data), "fields"))
}, error = function(e) {
safe_log(paste("Error in farm health calculation:", e$message), "ERROR")
# Create empty dataframe if calculation failed
farm_health_data <- data.frame(
field = character(),
mean_ci = numeric(),
ci_change = numeric(),
ci_uniformity = numeric(),
status = character(),
anomaly_type = character(),
priority_level = numeric(),
age_weeks = numeric(),
harvest_readiness = character(),
stringsAsFactors = FALSE
)
})
# Render the velocity and acceleration indicators
tryCatch({
# Create and display the indicators using the imported utility function
velocity_plot <- create_velocity_acceleration_indicator(
health_data = farm_health_data,
ci_current = CI,
ci_prev1 = CI_m1,
ci_prev2 = CI_m2,
ci_prev3 = CI_m3,
field_boundaries = AllPivots0
)
# Print the visualization
print(velocity_plot)
# Create a table of fields with significant velocity changes
field_ci_metrics <- list()
# Process each field to get metrics
fields <- unique(AllPivots0$field)
for (field_name in fields) {
tryCatch({
# Get field boundary
field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
if (nrow(field_shape) == 0) next
# Extract CI values
ci_curr_values <- terra::extract(CI, field_shape)
ci_prev1_values <- terra::extract(CI_m1, field_shape)
# Calculate metrics
mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
velocity <- mean_ci_curr - mean_ci_prev1
# Store in list
field_ci_metrics[[field_name]] <- list(
field = field_name,
ci_current = mean_ci_curr,
ci_prev1 = mean_ci_prev1,
velocity = velocity
)
}, error = function(e) {
safe_log(paste("Error processing field", field_name, "for velocity table:", e$message), "WARNING")
})
}
# Convert list to data frame
velocity_df <- do.call(rbind, lapply(field_ci_metrics, function(x) {
data.frame(
field = x$field,
ci_current = round(x$ci_current, 2),
ci_prev1 = round(x$ci_prev1, 2),
velocity = round(x$velocity, 2),
direction = ifelse(x$velocity >= 0, "Improving", "Declining")
)
}))
# Select top 5 positive and top 5 negative velocity fields
top_positive <- velocity_df %>%
dplyr::filter(velocity > 0) %>%
dplyr::arrange(desc(velocity)) %>%
dplyr::slice_head(n = 5)
top_negative <- velocity_df %>%
dplyr::filter(velocity < 0) %>%
dplyr::arrange(velocity) %>%
dplyr::slice_head(n = 5)
# Display the tables if we have data
if (nrow(top_positive) > 0) {
cat("<h4>Fields with Fastest Improvement</h4>")
knitr::kable(top_positive %>%
dplyr::select(Field = field,
`Current CI` = ci_current,
`Previous CI` = ci_prev1,
`Weekly Change` = velocity))
}
if (nrow(top_negative) > 0) {
cat("<h4>Fields with Fastest Decline</h4>")
knitr::kable(top_negative %>%
dplyr::select(Field = field,
`Current CI` = ci_current,
`Previous CI` = ci_prev1,
`Weekly Change` = velocity))
}
}, error = function(e) {
safe_log(paste("Error rendering velocity visualization:", e$message), "ERROR")
cat("<div class='alert alert-danger'>Error generating velocity visualization.</div>")
})
# Generate anomaly timeline visualization
tryCatch({
# Use the imported function to create the anomaly timeline
anomaly_timeline <- create_anomaly_timeline(
field_boundaries = AllPivots0,
ci_data = CI_quadrant,
days_to_include = 90 # Show last 90 days of data
)
# Display the timeline
print(anomaly_timeline)
}, error = function(e) {
safe_log(paste("Error generating anomaly timeline:", e$message), "ERROR")
cat("<div class='alert alert-danger'>Error generating anomaly timeline visualization.</div>")
})
# Verify farm_health_data exists and has content
if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
safe_log("farm_health_data not found or empty, generating default data", "WARNING")
# Create minimal fallback data
tryCatch({
# Get fields from boundaries
fields <- unique(AllPivots0$field)
# Create basic data frame with just field names
farm_health_data <- data.frame(
field = fields,
mean_ci = rep(NA, length(fields)),
ci_change = rep(NA, length(fields)),
ci_uniformity = rep(NA, length(fields)),
status = rep("Unknown", length(fields)),
anomaly_type = rep("Unknown", length(fields)),
priority_level = rep(5, length(fields)), # Low priority
age_weeks = rep(NA, length(fields)),
harvest_readiness = rep("Unknown", length(fields)),
stringsAsFactors = FALSE
)
safe_log("Created fallback farm_health_data with basic field information")
}, error = function(e) {
safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
farm_health_data <<- data.frame(
field = character(),
mean_ci = numeric(),
ci_change = numeric(),
ci_uniformity = numeric(),
status = character(),
anomaly_type = character(),
priority_level = numeric(),
age_weeks = numeric(),
harvest_readiness = character(),
stringsAsFactors = FALSE
)
})
}
# ADVANCED ANALYTICS FUNCTIONS
# Note: These functions are now imported from executive_report_utils.R
# The utility file contains functions for velocity/acceleration indicators,
# anomaly timeline creation, age cohort mapping, and cohort performance charts
safe_log("Using analytics functions from executive_report_utils.R")
# Chunk 1: setup_parameters
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
# Environment setup notes (commented out)
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# # Optioneel: Herstel de omgeving als dat nodig is
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
# Chunk 2: load_libraries
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Path management
library(here)
# Spatial data libraries
library(sf)
library(terra)
library(exactextractr)
# library(raster) - Removed as it's no longer maintained
# Data manipulation and visualization
library(tidyverse) # Includes dplyr, ggplot2, etc.
library(tmap)
library(lubridate)
library(zoo)
# Machine learning
library(rsample)
library(caret)
library(randomForest)
library(CAST)
# Load custom utility functions
tryCatch({
source("report_utils.R")
}, error = function(e) {
message(paste("Error loading report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "report_utils.R"))
}, error = function(e) {
stop("Could not load report_utils.R from either location: ", e$message)
})
})
# Load executive report utilities
tryCatch({
source("executive_report_utils.R")
}, error = function(e) {
message(paste("Error loading executive_report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "executive_report_utils.R"))
}, error = function(e) {
stop("Could not load executive_report_utils.R from either location: ", e$message)
})
})
# Chunk 1: setup_parameters
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
# Environment setup notes (commented out)
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# # Optioneel: Herstel de omgeving als dat nodig is
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
# Chunk 2: load_libraries
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Path management
library(here)
# Spatial data libraries
library(sf)
library(terra)
library(exactextractr)
# library(raster) - Removed as it's no longer maintained
# Data manipulation and visualization
library(tidyverse) # Includes dplyr, ggplot2, etc.
library(tmap)
library(lubridate)
library(zoo)
# Machine learning
library(rsample)
library(caret)
library(randomForest)
library(CAST)
# Load custom utility functions
tryCatch({
source("report_utils.R")
}, error = function(e) {
message(paste("Error loading report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "report_utils.R"))
}, error = function(e) {
stop("Could not load report_utils.R from either location: ", e$message)
})
})
# Load executive report utilities
tryCatch({
source("executive_report_utils.R")
}, error = function(e) {
message(paste("Error loading executive_report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "executive_report_utils.R"))
}, error = function(e) {
stop("Could not load executive_report_utils.R from either location: ", e$message)
})
})
# Chunk 1: setup_parameters
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
# Environment setup notes (commented out)
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# # Optioneel: Herstel de omgeving als dat nodig is
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
# Chunk 2: load_libraries
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Path management
library(here)
# Spatial data libraries
library(sf)
library(terra)
library(exactextractr)
# library(raster) - Removed as it's no longer maintained
# Data manipulation and visualization
library(tidyverse) # Includes dplyr, ggplot2, etc.
library(tmap)
library(lubridate)
library(zoo)
# Machine learning
library(rsample)
library(caret)
library(randomForest)
library(CAST)
# Load custom utility functions
# tryCatch({
# source("report_utils.R")
# }, error = function(e) {
# message(paste("Error loading report_utils.R:", e$message))
# # Try alternative path if the first one fails
# tryCatch({
source(here::here("r_app", "report_utils.R"))
# }, error = function(e) {
# stop("Could not load report_utils.R from either location: ", e$message)
# })
# })
# Load executive report utilities
# tryCatch({
# source("executive_report_utils.R")
# }, error = function(e) {
# message(paste("Error loading executive_report_utils.R:", e$message))
# # Try alternative path if the first one fails
# tryCatch({
source(here::here("r_app", "executive_report_utils.R"))
# Chunk 1: setup_parameters
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
# Environment setup notes (commented out)
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# # Optioneel: Herstel de omgeving als dat nodig is
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
# Chunk 2: load_libraries
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Path management
library(here)
# Spatial data libraries
library(sf)
library(terra)
library(exactextractr)
# library(raster) - Removed as it's no longer maintained
# Data manipulation and visualization
library(tidyverse) # Includes dplyr, ggplot2, etc.
library(tmap)
library(lubridate)
library(zoo)
# Machine learning
library(rsample)
library(caret)
library(randomForest)
library(CAST)
# Load custom utility functions
# tryCatch({
# source("report_utils.R")
# }, error = function(e) {
# message(paste("Error loading report_utils.R:", e$message))
# # Try alternative path if the first one fails
# tryCatch({
source(here::here("r_app", "report_utils.R"))
# }, error = function(e) {
# stop("Could not load report_utils.R from either location: ", e$message)
# })
# })
# Load executive report utilities
# tryCatch({
# source("executive_report_utils.R")
# }, error = function(e) {
# message(paste("Error loading executive_report_utils.R:", e$message))
# # Try alternative path if the first one fails
# tryCatch({
source(here::here("r_app", "exec_dashboard", "executive_report_utils.R"))
# }, error = function(e) {
# stop("Could not load executive_report_utils.R from either location: ", e$message)
# })
# })
# Chunk 3: initialize_project_config
# Set the project directory from parameters
project_dir <- params$data_dir
# Source project parameters with error handling
# tryCatch({
source(here::here("r_app", "parameters_project.R"))
# }, error = function(e) {
# stop("Error loading parameters_project.R: ", e$message)
# })
# Log initial configuration
safe_log("Starting the R Markdown script")
safe_log(paste("mail_day params:", params$mail_day))
safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
# Set locale for consistent date formatting
Sys.setlocale("LC_TIME", "C")
# Initialize date variables from parameters
today <- as.character(report_date)
mail_day_as_character <- as.character(mail_day)
# Calculate week days
report_date_as_week_day <- weekdays(lubridate::ymd(today))
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Calculate initial week number
week <- lubridate::week(today)
safe_log(paste("Initial week calculation:", week, "today:", today))
# Calculate previous dates for comparisons
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
# Log the weekday calculations for debugging
safe_log(paste("Report date weekday:", report_date_as_week_day))
safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
safe_log(paste("Mail day:", mail_day_as_character))
safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
# Adjust week calculation based on mail day
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
safe_log("Adjusting weeks because of mail day")
week <- lubridate::week(today) + 1
today_minus_1 <- as.character(lubridate::ymd(today))
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
}
# Generate subtitle for report
subtitle_var <- paste("Report generated on", Sys.Date())
# Calculate week numbers for previous weeks
week_minus_1 <- week - 1
week_minus_2 <- week - 2
week_minus_3 <- week - 3
# Format current week with leading zeros
week <- sprintf("%02d", week)
# Get years for each date
year <- lubridate::year(today)
year_1 <- lubridate::year(today_minus_1)
year_2 <- lubridate::year(today_minus_2)
year_3 <- lubridate::year(today_minus_3)
# Load CI index data with error handling
tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data")
}, error = function(e) {
stop("Error loading CI quadrant data: ", e$message)
})
# Get file paths for different weeks using the utility function
tryCatch({
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
# Log the calculated paths
safe_log("Required mosaic paths:")
safe_log(paste("Path to current week:", path_to_week_current))
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
# Validate that files exist
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
# Load raster data with terra functions
CI <- terra::rast(path_to_week_current)$CI
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
}, error = function(e) {
stop("Error loading raster data: ", e$message)
})

3
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,3 @@
{
"python.REPL.enableREPLSmartSend": false
}

Binary file not shown.

BIN
figure/sub_chunk_8433-1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,319 @@
import os
import argparse
import numpy as np
from pathlib import Path
from osgeo import gdal
import rasterio as rio
from rasterio.enums import Resampling
from rasterio.warp import reproject
from osgeo import osr
# Attempt to import OmniCloudMask and set a flag
try:
from omnicloudmask import predict_from_array, load_multiband
HAS_OCM = True
except ImportError:
HAS_OCM = False
def calculate_utm_zone_and_hemisphere(longitude, latitude):
"""
Calculate the UTM zone and hemisphere based on longitude and latitude.
"""
utm_zone = int((longitude + 180) / 6) + 1
is_southern = latitude < 0
return utm_zone, is_southern
def reproject_to_projected_crs(input_path, output_path):
"""
Reprojects a raster to a projected coordinate system (e.g., UTM).
"""
input_ds = gdal.Open(str(input_path))
if not input_ds:
raise ValueError(f"Failed to open input raster: {input_path}")
# Get the source spatial reference
source_srs = osr.SpatialReference()
source_srs.ImportFromWkt(input_ds.GetProjection())
# Get the geographic coordinates of the image's center
geo_transform = input_ds.GetGeoTransform()
width = input_ds.RasterXSize
height = input_ds.RasterYSize
center_x = geo_transform[0] + (width / 2) * geo_transform[1]
center_y = geo_transform[3] + (height / 2) * geo_transform[5]
# Calculate the UTM zone and hemisphere dynamically
utm_zone, is_southern = calculate_utm_zone_and_hemisphere(center_x, center_y)
# Define the target spatial reference
target_srs = osr.SpatialReference()
target_srs.SetWellKnownGeogCS("WGS84")
target_srs.SetUTM(utm_zone, is_southern)
# Create the warp options
warp_options = gdal.WarpOptions(
dstSRS=target_srs.ExportToWkt(),
format="GTiff"
)
# Perform the reprojection
gdal.Warp(str(output_path), input_ds, options=warp_options)
input_ds = None # Close the dataset
print(f"Reprojected raster saved to: {output_path}")
return output_path
def resample_image(input_path, output_path, resolution=(10, 10), resample_alg="bilinear"):
"""
Resamples a raster to a specified resolution using gdal.Translate.
"""
print(f"Resampling {input_path} to {resolution}m resolution -> {output_path}")
# Reproject the input image to a projected CRS
reprojected_path = str(Path(output_path).with_name(f"{Path(output_path).stem}_reprojected.tif"))
reproject_to_projected_crs(input_path, reprojected_path)
# Open the reprojected dataset
input_ds = gdal.Open(reprojected_path)
if not input_ds:
raise ValueError(f"Failed to open reprojected raster: {reprojected_path}")
# Perform the resampling
result = gdal.Translate(
str(output_path),
input_ds,
xRes=resolution[0],
yRes=resolution[1],
resampleAlg=resample_alg
)
input_ds = None # Explicitly dereference the GDAL dataset
if result is None:
raise ValueError(f"Failed to resample image to {output_path}")
print(f"Successfully resampled image saved to: {output_path}")
return output_path
def run_ocm_on_image(image_path_10m, ocm_output_dir, save_mask=True):
"""
Processes a 10m resolution image with OmniCloudMask.
Adapted from process_with_ocm in the notebook.
"""
if not HAS_OCM:
print("OmniCloudMask not available. Please install with: pip install omnicloudmask")
return None, None
image_path_10m = Path(image_path_10m)
ocm_output_dir = Path(ocm_output_dir)
ocm_output_dir.mkdir(exist_ok=True, parents=True)
mask_10m_path = ocm_output_dir / f"{image_path_10m.stem}_ocm_mask_10m.tif"
try:
# Open the image to check dimensions
with rio.open(image_path_10m) as src:
width, height = src.width, src.height
# Check if the image is too small for OmniCloudMask
if width < 50 or height < 50:
print(f"Warning: Image {image_path_10m} is too small for OmniCloudMask (width: {width}, height: {height}). Skipping.")
return None, None
# PlanetScope 4-band images are typically [B,G,R,NIR]
# OCM expects [R,G,NIR] for its default model.
# Band numbers for load_multiband are 1-based.
# If original is B(1),G(2),R(3),NIR(4), then R=3, G=2, NIR=4
band_order = [3, 2, 4]
print(f"Loading 10m image for OCM: {image_path_10m}")
# load_multiband resamples if resample_res is different from source,
# but here image_path_10m is already 10m.
# We pass resample_res=None to use the image's own resolution.
rgn_data, profile = load_multiband(
input_path=str(image_path_10m),
resample_res=10, # Explicitly set target resolution for OCM
band_order=band_order
)
print("Applying OmniCloudMask...")
prediction = predict_from_array(rgn_data)
if save_mask:
profile.update(count=1, dtype='uint8')
with rio.open(mask_10m_path, 'w', **profile) as dst:
dst.write(prediction.astype('uint8'), 1)
print(f"Saved 10m OCM mask to: {mask_10m_path}")
# Summary (optional, can be removed for cleaner script output)
n_total = prediction.size
n_clear = np.sum(prediction == 0)
n_thick = np.sum(prediction == 1)
n_thin = np.sum(prediction == 2)
n_shadow = np.sum(prediction == 3)
print(f" OCM: Clear: {100*n_clear/n_total:.1f}%, Thick: {100*n_thick/n_total:.1f}%, Thin: {100*n_thin/n_total:.1f}%, Shadow: {100*n_shadow/n_total:.1f}%")
return str(mask_10m_path), profile
except Exception as e:
print(f"Error processing 10m image with OmniCloudMask: {str(e)}")
return None, None
def upsample_mask_to_3m(mask_10m_path, target_3m_image_path, output_3m_mask_path):
"""
Upsamples a 10m OCM mask to match the 3m target image.
Adapted from upsample_mask_to_highres in the notebook.
"""
print(f"Upsampling 10m mask {mask_10m_path} to 3m, referencing {target_3m_image_path}")
with rio.open(mask_10m_path) as src_mask, rio.open(target_3m_image_path) as src_img_3m:
mask_data_10m = src_mask.read(1)
img_shape_3m = (src_img_3m.height, src_img_3m.width)
img_transform_3m = src_img_3m.transform
img_crs_3m = src_img_3m.crs
upsampled_mask_3m_data = np.zeros(img_shape_3m, dtype=mask_data_10m.dtype)
reproject(
source=mask_data_10m,
destination=upsampled_mask_3m_data,
src_transform=src_mask.transform,
src_crs=src_mask.crs,
dst_transform=img_transform_3m,
dst_crs=img_crs_3m,
resampling=Resampling.nearest
)
profile_3m_mask = src_img_3m.profile.copy()
profile_3m_mask.update({
'count': 1,
'dtype': upsampled_mask_3m_data.dtype
})
with rio.open(output_3m_mask_path, 'w', **profile_3m_mask) as dst:
dst.write(upsampled_mask_3m_data, 1)
print(f"Upsampled 3m OCM mask saved to: {output_3m_mask_path}")
return str(output_3m_mask_path)
def apply_3m_mask_to_3m_image(image_3m_path, mask_3m_path, final_masked_output_path):
"""
Applies an upsampled 3m OCM mask to the original 3m image.
Adapted from apply_upsampled_mask_to_highres in the notebook.
"""
print(f"Applying 3m mask {mask_3m_path} to 3m image {image_3m_path}")
image_3m_path = Path(image_3m_path)
mask_3m_path = Path(mask_3m_path)
final_masked_output_path = Path(final_masked_output_path)
final_masked_output_path.parent.mkdir(parents=True, exist_ok=True)
try:
with rio.open(image_3m_path) as src_img_3m, rio.open(mask_3m_path) as src_mask_3m:
img_data_3m = src_img_3m.read()
img_profile_3m = src_img_3m.profile.copy()
mask_data_3m = src_mask_3m.read(1)
if img_data_3m.shape[1:] != mask_data_3m.shape:
print(f"Warning: 3m image shape {img_data_3m.shape[1:]} and 3m mask shape {mask_data_3m.shape} do not match.")
# This should ideally not happen if upsampling was correct.
# OCM: 0=clear, 1=thick cloud, 2=thin cloud, 3=shadow
# We want to mask out (set to nodata) pixels where OCM is > 0
binary_mask = np.ones_like(mask_data_3m, dtype=np.uint8)
binary_mask[mask_data_3m > 0] = 0 # 0 for cloud/shadow, 1 for clear
masked_img_data_3m = img_data_3m.copy()
nodata_val = img_profile_3m.get('nodata', 0) # Use existing nodata or 0
for i in range(img_profile_3m['count']):
masked_img_data_3m[i][binary_mask == 0] = nodata_val
# Ensure dtype of profile matches data to be written
# If original image was float, but nodata is int (0), rasterio might complain
# It's safer to use the original image's dtype for the output.
img_profile_3m.update(dtype=img_data_3m.dtype)
with rio.open(final_masked_output_path, 'w', **img_profile_3m) as dst:
dst.write(masked_img_data_3m)
print(f"Final masked 3m image saved to: {final_masked_output_path}")
return str(final_masked_output_path)
except Exception as e:
print(f"Error applying 3m mask to 3m image: {str(e)}")
return None
def main():
parser = argparse.ArgumentParser(description="Process PlanetScope 3m imagery with OmniCloudMask.")
parser.add_argument("input_3m_image", type=str, help="Path to the input merged 3m PlanetScope GeoTIFF image.")
parser.add_argument("output_dir", type=str, help="Directory to save processed files (10m image, masks, final 3m masked image).")
args = parser.parse_args()
try:
# Resolve paths to absolute paths immediately
input_3m_path = Path(args.input_3m_image).resolve(strict=True)
# output_base_dir is the directory where outputs will be saved.
# It should exist when the script is called (created by the notebook).
output_base_dir = Path(args.output_dir).resolve(strict=True)
except FileNotFoundError as e:
print(f"Error: Path resolution failed. Input image or output base directory may not exist or is not accessible: {e}")
return
except Exception as e:
print(f"Error resolving paths: {e}")
return
# The check for input_3m_path.exists() is now covered by resolve(strict=True)
# Define intermediate and final file paths using absolute base paths
intermediate_dir = output_base_dir / "intermediate_ocm_files"
intermediate_dir.mkdir(parents=True, exist_ok=True)
image_10m_path = intermediate_dir / f"{input_3m_path.stem}_10m.tif"
# OCM mask (10m) will be saved inside run_ocm_on_image, in a subdir of intermediate_dir
ocm_mask_output_dir = intermediate_dir / "ocm_10m_mask_output"
# Upsampled OCM mask (3m)
mask_3m_upsampled_path = intermediate_dir / f"{input_3m_path.stem}_ocm_mask_3m_upsampled.tif"
# Final masked image (3m)
final_masked_3m_path = output_base_dir / f"{input_3m_path.stem}_ocm_masked_3m.tif"
print(f"--- Starting OCM processing for {input_3m_path.name} ---")
print(f"Input 3m image (absolute): {input_3m_path}")
print(f"Output base directory (absolute): {output_base_dir}")
print(f"Intermediate 10m image path: {image_10m_path}")
# 1. Resample 3m input to 10m for OCM
try:
resample_image(input_3m_path, image_10m_path, resolution=(10, 10))
except Exception as e:
print(f"Failed to resample to 10m: {e}")
return
# 2. Run OCM on the 10m image
mask_10m_generated_path, _ = run_ocm_on_image(image_10m_path, ocm_mask_output_dir)
if not mask_10m_generated_path:
print("OCM processing failed. Exiting.")
return
# 3. Upsample the 10m OCM mask to 3m
try:
upsample_mask_to_3m(mask_10m_generated_path, input_3m_path, mask_3m_upsampled_path)
except Exception as e:
print(f"Failed to upsample 10m OCM mask to 3m: {e}")
return
# 4. Apply the 3m upsampled mask to the original 3m image
try:
apply_3m_mask_to_3m_image(input_3m_path, mask_3m_upsampled_path, final_masked_3m_path)
except Exception as e:
print(f"Failed to apply 3m mask to 3m image: {e}")
return
print(f"--- Successfully completed OCM processing for {input_3m_path.name} ---")
print(f"Final 3m masked output: {final_masked_3m_path}")
if __name__ == "__main__":
if not HAS_OCM:
print("OmniCloudMask library is not installed. Please install it to run this script.")
print("You can typically install it using: pip install omnicloudmask")
else:
main()

View file

@ -2,18 +2,17 @@
params: params:
ref: "word-styles-reference-var1.docx" ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx output_file: CI_report.docx
report_date: "2024-08-28" report_date: "2024-07-18"
data_dir: "Chemba" data_dir: "chemba"
mail_day: "Wednesday" mail_day: "Wednesday"
borders: TRUE borders: TRUE
use_breaks: FALSE
output: output:
# html_document: # html_document:
# toc: yes # toc: yes
# df_print: paged # df_print: paged
word_document: word_document:
reference_docx: !expr file.path("word-styles-reference-var1.docx") reference_docx: !expr file.path("word-styles-reference-var1.docx")
toc: yes toc: no
editor_options: editor_options:
chunk_output_type: console chunk_output_type: console
--- ---
@ -22,8 +21,7 @@ editor_options:
# Set up basic report parameters from input values # Set up basic report parameters from input values
report_date <- params$report_date report_date <- params$report_date
mail_day <- params$mail_day mail_day <- params$mail_day
borders <- params$borders borders <- params$borders
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
# Environment setup notes (commented out) # Environment setup notes (commented out)
# # Activeer de renv omgeving # # Activeer de renv omgeving
@ -38,26 +36,21 @@ use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum
# Configure knitr options # Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE) knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Path management # Load all packages at once with suppressPackageStartupMessages
library(here) suppressPackageStartupMessages({
library(here)
# Spatial data libraries library(sf)
library(sf) library(terra)
library(terra) library(exactextractr)
library(exactextractr) library(tidyverse)
# library(raster) - Removed as it's no longer maintained library(tmap)
library(lubridate)
# Data manipulation and visualization library(zoo)
library(tidyverse) # Includes dplyr, ggplot2, etc. library(rsample)
library(tmap) library(caret)
library(lubridate) library(randomForest)
library(zoo) library(CAST)
})
# Machine learning
library(rsample)
library(caret)
library(randomForest)
library(CAST)
# Load custom utility functions # Load custom utility functions
tryCatch({ tryCatch({
@ -127,9 +120,6 @@ if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_
today_minus_3 <- as.character(lubridate::ymd(today) - 14) today_minus_3 <- as.character(lubridate::ymd(today) - 14)
} }
# Generate subtitle for report
subtitle_var <- paste("Report generated on", Sys.Date())
# Calculate week numbers for previous weeks # Calculate week numbers for previous weeks
week_minus_1 <- week - 1 week_minus_1 <- week - 1
week_minus_2 <- week - 2 week_minus_2 <- week - 2
@ -217,7 +207,95 @@ tryCatch({
}) })
``` ```
`r subtitle_var` ```{r create_front_page_variables, include=FALSE}
# Create variables for the front page
farm_name <- stringr::str_to_title(gsub("_", " ", project_dir))
# Format dates for display
report_date_formatted <- format(as.Date(report_date), "%B %d, %Y")
current_year <- format(Sys.Date(), "%Y")
# Get total field count and area if available
tryCatch({
total_fields <- length(unique(AllPivots0$field))
total_area_ha <- round(sum(sf::st_area(AllPivots0)) / 10000, 1) # Convert to hectares
}, error = function(e) {
total_fields <- "N/A"
total_area_ha <- "N/A"
})
```
---
title: ""
---
```{=openxml}
<w:p>
<w:pPr>
<w:jc w:val="center"/>
<w:spacing w:after="720"/>
</w:pPr>
<w:r>
<w:rPr>
<w:sz w:val="48"/>
<w:b/>
</w:rPr>
<w:t>SUGARCANE CROP MONITORING REPORT</w:t>
</w:r>
</w:p>
```
<div style="text-align: center; margin-top: 2cm; margin-bottom: 2cm;">
**`r farm_name`**
**Chlorophyll Index Analysis**
Report Date: **`r report_date_formatted`**
---
</div>
<div style="margin-top: 3cm; margin-bottom: 2cm;">
## Report Summary
**Farm Location:** `r farm_name`
**Report Period:** Week `r week` of `r current_year`
**Data Source:** Planet Labs Satellite Imagery
**Analysis Type:** Chlorophyll Index (CI) Monitoring
**Field Coverage:**
- Total Fields Monitored: `r total_fields`
- Total Area: `r total_area_ha` hectares
**Report Generated:** `r format(Sys.Date(), "%B %d, %Y at %H:%M")`
---
## About This Report
This automated report provides weekly analysis of sugarcane crop health using satellite-derived Chlorophyll Index (CI) measurements. The analysis helps identify:
- Field-level crop health variations
- Weekly changes in crop vigor
- Areas requiring agricultural attention
- Growth patterns across different field sections
**Key Features:**
- High-resolution satellite imagery analysis
- Week-over-week change detection
- Individual field performance metrics
- Actionable insights for crop management
</div>
\pagebreak
<!-- Original content starts here -->
\pagebreak \pagebreak
# Explanation of the Report # Explanation of the Report
@ -253,34 +331,104 @@ CI values typically range from 0 (bare soil or severely stressed vegetation) to
Use these insights to identify areas that may need irrigation, fertilization, or other interventions, and to track the effectiveness of your management practices over time. Use these insights to identify areas that may need irrigation, fertilization, or other interventions, and to track the effectiveness of your management practices over time.
\pagebreak \pagebreak
# Chlorophyll Index (CI) Overview Map - Current Week # RGB Satellite Image - Current Week (if available)
```{r render_ci_overview_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE} ```{r render_rgb_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
# Create overview chlorophyll index map # Check if RGB bands are available and create RGB map
tryCatch({ tryCatch({
# Base shape # Load the full raster to check available bands
map <- tmap::tm_shape(CI, unit = "m") full_raster <- terra::rast(path_to_week_current)
available_bands <- names(full_raster)
# Add raster layer with either breaks or continuous spectrum based on parameter # Check if RGB bands are available (look for red, green, blue or similar naming)
if (use_breaks) { rgb_bands_available <- any(grepl("red|Red|RED", available_bands, ignore.case = TRUE)) &&
map <- map + tmap::tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf), any(grepl("green|Green|GREEN", available_bands, ignore.case = TRUE)) &&
palette = "RdYlGn", any(grepl("blue|Blue|BLUE", available_bands, ignore.case = TRUE))
midpoint = NA,
legend.is.portrait = FALSE, # Alternative check for numbered bands that might be RGB (e.g., band_1, band_2, band_3)
title = "Chlorophyll Index (CI)") if (!rgb_bands_available && length(available_bands) >= 3) {
} else { # Check if we have at least 3 bands that could potentially be RGB
map <- map + tmap::tm_raster(palette = "RdYlGn", potential_rgb_bands <- grep("band_[1-3]|B[1-3]|[1-3]", available_bands, ignore.case = TRUE)
style = "cont", rgb_bands_available <- length(potential_rgb_bands) >= 3
midpoint = NA,
legend.is.portrait = FALSE,
title = "Chlorophyll Index (CI)")
} }
if (rgb_bands_available) {
safe_log("RGB bands detected - creating RGB visualization")
# Try to extract RGB bands (prioritize named bands first)
red_band <- NULL
green_band <- NULL
blue_band <- NULL
# Look for named RGB bands first
red_candidates <- grep("red|Red|RED", available_bands, ignore.case = TRUE, value = TRUE)
green_candidates <- grep("green|Green|GREEN", available_bands, ignore.case = TRUE, value = TRUE)
blue_candidates <- grep("blue|Blue|BLUE", available_bands, ignore.case = TRUE, value = TRUE)
if (length(red_candidates) > 0) red_band <- red_candidates[1]
if (length(green_candidates) > 0) green_band <- green_candidates[1]
if (length(blue_candidates) > 0) blue_band <- blue_candidates[1]
# Fallback to numbered bands if named bands not found
if (is.null(red_band) || is.null(green_band) || is.null(blue_band)) {
if (length(available_bands) >= 3) {
# Assume first 3 bands are RGB (common convention)
red_band <- available_bands[1]
green_band <- available_bands[2]
blue_band <- available_bands[3]
}
}
if (!is.null(red_band) && !is.null(green_band) && !is.null(blue_band)) {
# Extract RGB bands
rgb_raster <- c(full_raster[[red_band]], full_raster[[green_band]], full_raster[[blue_band]])
names(rgb_raster) <- c("red", "green", "blue")
# Create RGB map
map <- tmap::tm_shape(rgb_raster, unit = "m") +
tmap::tm_rgb() +
tmap::tm_scalebar(position = c("right", "bottom"), text.color = "white") +
tmap::tm_compass(position = c("right", "bottom"), text.color = "white") +
tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "white", lwd = 2) +
tmap::tm_text("sub_field", size = 0.6, col = "white") +
tmap::tm_layout(main.title = paste0("RGB Satellite Image - Week ", week),
main.title.size = 0.8,
main.title.color = "black")
# Print the map
print(map)
safe_log("RGB map created successfully")
} else {
safe_log("Could not identify RGB bands despite detection", "WARNING")
cat("RGB bands detected but could not be properly identified. Skipping RGB visualization.\n")
}
} else {
safe_log("No RGB bands available in the current week mosaic")
cat("**Note:** RGB satellite imagery is not available for this week. Only spectral index data is available.\n\n")
}
}, error = function(e) {
safe_log(paste("Error creating RGB map:", e$message), "ERROR")
cat("**Note:** Could not create RGB visualization for this week.\n\n")
})
```
\pagebreak
# Chlorophyll Index (CI) Overview Map - Current Week
```{r render_ci_overview_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
# Create overview chlorophyll index map
tryCatch({ # Base shape
map <- tmap::tm_shape(CI, unit = "m") # Add raster layer with continuous spectrum (fixed scale 1-8 for consistent comparison)
map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
limits = c(1, 8)),
col.legend = tm_legend(title = "Chlorophyll Index (CI)",
orientation = "landscape",
position = tm_pos_out("center", "bottom")))
# Complete the map with layout and other elements # Complete the map with layout and other elements
map <- map + tmap::tm_layout(legend.outside = TRUE, map <- map +
legend.outside.position = "bottom", tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") +
legend.show = TRUE) + tmap::tm_compass(position = c("right", "bottom"), text.color = "black") +
tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_shape(AllPivots0) + tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "black") + tmap::tm_borders(col = "black") +
tmap::tm_text("sub_field", size = 0.6, col = "black") tmap::tm_text("sub_field", size = 0.6, col = "black")
@ -293,36 +441,24 @@ tryCatch({
text(1, 1, "Error creating CI overview map", cex=1.5) text(1, 1, "Error creating CI overview map", cex=1.5)
}) })
``` ```
\newpage \pagebreak
# Weekly Chlorophyll Index Difference Map # Weekly Chlorophyll Index Difference Map
```{r render_ci_difference_map, echo=FALSE, fig.height=6.8, fig.width=9, message=FALSE, warning=FALSE} ```{r render_ci_difference_map, echo=FALSE, fig.height=6.9, fig.width=9, message=FALSE, warning=FALSE}
# Create chlorophyll index difference map # Create chlorophyll index difference map
tryCatch({ tryCatch({ # Base shape
# Base shape map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m") # Add raster layer with continuous spectrum (centered at 0 for difference maps, fixed scale)
map <- tmap::tm_shape(last_week_dif_raster_abs, unit = "m") map <- map + tmap::tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
midpoint = 0,
# Add raster layer with either breaks or continuous spectrum based on parameter limits = c(-3, 3)),
if (use_breaks) { col.legend = tm_legend(title = "Chlorophyll Index (CI) Change",
map <- map + tmap::tm_raster(breaks = c(-3,-2,-1,0,1,2,3), orientation = "landscape",
palette = "RdYlGn", position = tm_pos_out("center", "bottom")))
midpoint = 0,
legend.is.portrait = FALSE,
title = "Chlorophyll Index (CI) Change")
} else {
map <- map + tmap::tm_raster(palette = "RdYlGn",
style = "cont",
midpoint = 0,
legend.is.portrait = FALSE,
title = "Chlorophyll Index (CI) Change")
}
# Complete the map with layout and other elements # Complete the map with layout and other elements
map <- map + tmap::tm_layout(legend.outside = TRUE, map <- map +
legend.outside.position = "bottom", tmap::tm_scalebar(position = c("right", "bottom"), text.color = "black") +
legend.show = TRUE) + tmap::tm_compass(position = c("right", "bottom"), text.color = "black") +
tmap::tm_scale_bar(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_compass(position = tm_pos_out("right", "bottom"), text.color = "black") +
tmap::tm_shape(AllPivots0) + tmap::tm_shape(AllPivots0) +
tmap::tm_borders(col = "black") + tmap::tm_borders(col = "black") +
tmap::tm_text("sub_field", size = 0.6, col = "black") tmap::tm_text("sub_field", size = 0.6, col = "black")
@ -335,8 +471,8 @@ tryCatch({
text(1, 1, "Error creating CI difference map", cex=1.5) text(1, 1, "Error creating CI difference map", cex=1.5)
}) })
``` ```
\newpage \pagebreak
\newpage
```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'} ```{r generate_field_visualizations, eval=TRUE, fig.height=3.8, fig.width=10, message=FALSE,echo=FALSE, warning=FALSE, include=TRUE, results='asis'}
# Generate detailed visualizations for each field # Generate detailed visualizations for each field
@ -347,7 +483,7 @@ tryCatch({
dplyr::summarise(.groups = 'drop') dplyr::summarise(.groups = 'drop')
# Generate plots for each field # Generate plots for each field
purrr::walk(AllPivots_merged$field, function(field_name) { purrr::walk(AllPivots_merged$field[1:5], function(field_name) {
tryCatch({ tryCatch({
cat("\n") # Add an empty line for better spacing cat("\n") # Add an empty line for better spacing
@ -358,14 +494,12 @@ tryCatch({
current_ci = CI, current_ci = CI,
ci_minus_1 = CI_m1, ci_minus_1 = CI_m1,
ci_minus_2 = CI_m2, ci_minus_2 = CI_m2,
last_week_diff = last_week_dif_raster_abs, last_week_diff = last_week_dif_raster_abs, three_week_diff = three_week_dif_raster_abs,
three_week_diff = three_week_dif_raster_abs,
harvesting_data = harvesting_data, harvesting_data = harvesting_data,
week = week, week = week,
week_minus_1 = week_minus_1, week_minus_1 = week_minus_1,
week_minus_2 = week_minus_2, week_minus_2 = week_minus_2,
week_minus_3 = week_minus_3, week_minus_3 = week_minus_3,
use_breaks = use_breaks,
borders = borders borders = borders
) )
@ -513,7 +647,7 @@ tryCatch({
# Predict yields for the current season (focus on mature fields over 300 days) # Predict yields for the current season (focus on mature fields over 300 days)
pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>% pred_rf_current_season <- prepare_predictions(stats::predict(model_ffs_rf, newdata = prediction_yields), prediction_yields) %>%
dplyr::filter(Age_days > 300) %>% dplyr::filter(Age_days > 1) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1)) dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
safe_log("Successfully completed yield prediction calculations") safe_log("Successfully completed yield prediction calculations")
@ -565,3 +699,7 @@ tryCatch({
}) })
``` ```
\pagebreak

Binary file not shown.

Before

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -32,12 +32,12 @@ main <- function() {
end_date <- as.Date(args[1]) end_date <- as.Date(args[1])
if (is.na(end_date)) { if (is.na(end_date)) {
warning("Invalid end_date provided. Using default (current date).") warning("Invalid end_date provided. Using default (current date).")
#end_date <- Sys.Date() end_date <- Sys.Date()
end_date <- "2023-10-01" #end_date <- "2023-10-01"
} }
} else { } else {
#end_date <- Sys.Date() end_date <- Sys.Date()
end_date <- "2023-10-01" #end_date <- "2023-10-01"
} }
# Process offset argument # Process offset argument
@ -58,6 +58,10 @@ main <- function() {
project_dir <- "chemba" project_dir <- "chemba"
} }
# Make project_dir available globally so parameters_project.R can use it
assign("project_dir", project_dir, envir = .GlobalEnv)
# 3. Initialize project configuration # 3. Initialize project configuration
# -------------------------------- # --------------------------------
new_project_question <- FALSE new_project_question <- FALSE
@ -68,8 +72,10 @@ main <- function() {
}, error = function(e) { }, error = function(e) {
warning("Default source files not found. Attempting to source from 'r_app' directory.") warning("Default source files not found. Attempting to source from 'r_app' directory.")
tryCatch({ tryCatch({
source("r_app/parameters_project.R") source(here::here("r_app", "parameters_project.R"))
source("r_app/ci_extraction_utils.R") source(here::here("r_app", "ci_extraction_utils.R"))
warning(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) { }, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.") stop("Failed to source required files from both default and 'r_app' directories.")
}) })
@ -106,6 +112,6 @@ main <- function() {
}) })
} }
# Run the main function if the script is executed directly if (sys.nframe() == 0) {
main() main()
}

View file

@ -0,0 +1,572 @@
# CI_EXTRACTION_AND_YIELD_PREDICTION.R
# =====================================
#
# This standalone script demonstrates:
# 1. How Chlorophyll Index (CI) is extracted from satellite imagery
# 2. How yield prediction is performed based on CI values
#
# Created for sharing with colleagues to illustrate the core functionality
# of the SmartCane monitoring system.
#
# -----------------------------
# PART 1: LIBRARY DEPENDENCIES
# -----------------------------
suppressPackageStartupMessages({
# Spatial data processing
library(sf)
library(terra)
library(exactextractr)
# Data manipulation
library(tidyverse)
library(lubridate)
library(here)
# Machine learning for yield prediction
library(rsample)
library(caret)
library(randomForest)
library(CAST)
})
# ----------------------------------
# PART 2: LOGGING & UTILITY FUNCTIONS
# ----------------------------------
#' Safe logging function that works in any environment
#'
#' @param message The message to log
#' @param level The log level (default: "INFO")
#' @return NULL (used for side effects)
#'
safe_log <- function(message, level = "INFO") {
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
formatted_msg <- paste0("[", timestamp, "][", level, "] ", message)
if (level %in% c("ERROR", "WARNING")) {
warning(formatted_msg)
} else {
message(formatted_msg)
}
}
#' Generate a sequence of dates for processing
#'
#' @param end_date The end date for the sequence (Date object)
#' @param offset Number of days to look back from end_date
#' @return A list containing week number, year, and a sequence of dates for filtering
#'
date_list <- function(end_date, offset) {
# Input validation
if (!lubridate::is.Date(end_date)) {
end_date <- as.Date(end_date)
if (is.na(end_date)) {
stop("Invalid end_date provided. Expected a Date object or a string convertible to Date.")
}
}
offset <- as.numeric(offset)
if (is.na(offset) || offset < 1) {
stop("Invalid offset provided. Expected a positive number.")
}
# Calculate date range
offset <- offset - 1 # Adjust offset to include end_date
start_date <- end_date - lubridate::days(offset)
# Extract week and year information
week <- lubridate::week(start_date)
year <- lubridate::year(start_date)
# Generate sequence of dates
days_filter <- seq(from = start_date, to = end_date, by = "day")
days_filter <- format(days_filter, "%Y-%m-%d") # Format for consistent filtering
# Log the date range
safe_log(paste("Date range generated from", start_date, "to", end_date))
return(list(
"week" = week,
"year" = year,
"days_filter" = days_filter,
"start_date" = start_date,
"end_date" = end_date
))
}
# -----------------------------
# PART 3: CI EXTRACTION PROCESS
# -----------------------------
#' Find satellite imagery files within a specific date range
#'
#' @param image_folder Path to the folder containing satellite images
#' @param date_filter Vector of dates to filter by (in YYYY-MM-DD format)
#' @return Vector of file paths matching the date filter
#'
find_satellite_images <- function(image_folder, date_filter) {
# Validate inputs
if (!dir.exists(image_folder)) {
stop(paste("Image folder not found:", image_folder))
}
# List all files in the directory
all_files <- list.files(image_folder, pattern = "\\.tif$", full.names = TRUE, recursive = TRUE)
if (length(all_files) == 0) {
safe_log("No TIF files found in the specified directory", "WARNING")
return(character(0))
}
# Filter files by date pattern in filename
filtered_files <- character(0)
for (date in date_filter) {
# Format date for matching (remove dashes)
date_pattern <- gsub("-", "", date)
# Find files with matching date pattern
matching_files <- all_files[grepl(date_pattern, all_files)]
if (length(matching_files) > 0) {
filtered_files <- c(filtered_files, matching_files)
safe_log(paste("Found", length(matching_files), "files for date", date))
}
}
return(filtered_files)
}
#' Create a Chlorophyll Index (CI) from satellite imagery
#'
#' @param raster_obj A SpatRaster object with Red, Green, Blue, and NIR bands
#' @return A SpatRaster object with a CI band
#'
calculate_ci <- function(raster_obj) {
# Validate input has required bands
if (terra::nlyr(raster_obj) < 4) {
stop("Raster must have at least 4 bands (Red, Green, Blue, NIR)")
}
# Extract bands (assuming standard order: B, G, R, NIR)
blue_band <- raster_obj[[1]]
green_band <- raster_obj[[2]]
red_band <- raster_obj[[3]]
nir_band <- raster_obj[[4]]
# CI formula: (NIR / Red) - 1
# This highlights chlorophyll content in vegetation
ci_raster <- (nir_band / red_band) - 1
# Filter extreme values that may result from division operations
ci_raster[ci_raster > 10] <- 10 # Cap max value
ci_raster[ci_raster < 0] <- 0 # Cap min value
# Name the layer
names(ci_raster) <- "CI"
return(ci_raster)
}
#' Create a mask for cloudy pixels and shadows using thresholds
#'
#' @param raster_obj A SpatRaster object with multiple bands
#' @return A binary mask where 1=clear pixel, 0=cloudy or shadow pixel
#'
create_cloud_mask <- function(raster_obj) {
# Extract bands
blue_band <- raster_obj[[1]]
green_band <- raster_obj[[2]]
red_band <- raster_obj[[3]]
nir_band <- raster_obj[[4]]
# Create initial mask (all pixels valid)
mask <- blue_band * 0 + 1
# Calculate indices used for detection
ndvi <- (nir_band - red_band) / (nir_band + red_band)
brightness <- (blue_band + green_band + red_band) / 3
# CLOUD DETECTION CRITERIA
# ------------------------
# Clouds are typically very bright in all bands
bright_pixels <- (blue_band > 0.3) & (green_band > 0.3) & (red_band > 0.3)
# Snow/high reflectance clouds have high blue values
blue_dominant <- blue_band > (red_band * 1.2)
# Low NDVI areas that are bright are likely clouds
low_ndvi <- ndvi < 0.1
# Combine cloud criteria
cloud_pixels <- bright_pixels & (blue_dominant | low_ndvi)
# SHADOW DETECTION CRITERIA
# ------------------------
# Shadows typically have:
# 1. Low overall brightness across all bands
# 2. Lower NIR reflectance
# 3. Can still have reasonable NDVI (if over vegetation)
# Dark pixels in visible spectrum
dark_pixels <- brightness < 0.1
# Low NIR reflectance
low_nir <- nir_band < 0.15
# Shadows often have higher blue proportion relative to NIR
blue_nir_ratio <- blue_band / (nir_band + 0.01) # Add small constant to avoid division by zero
blue_enhanced <- blue_nir_ratio > 0.8
# Combine shadow criteria
shadow_pixels <- dark_pixels & (low_nir | blue_enhanced)
# Update mask (0 for cloud or shadow pixels)
mask[cloud_pixels | shadow_pixels] <- 0
# Optional: create different values for clouds vs shadows for visualization
# mask[cloud_pixels] <- 0 # Clouds
# mask[shadow_pixels] <- 0 # Shadows
return(mask)
}
#' Process satellite image, calculate CI, and crop to field boundaries
#'
#' @param file Path to the satellite image file
#' @param field_boundaries Field boundaries vector object
#' @param output_dir Directory to save the processed raster
#' @return Path to the processed raster file
#'
process_satellite_image <- function(file, field_boundaries, output_dir) {
# Validate inputs
if (!file.exists(file)) {
stop(paste("File not found:", file))
}
if (is.null(field_boundaries)) {
stop("Field boundaries are required but were not provided")
}
# Create output filename
basename_no_ext <- tools::file_path_sans_ext(basename(file))
output_file <- here::here(output_dir, paste0(basename_no_ext, "_CI.tif"))
# Process with error handling
tryCatch({
# Load and prepare raster
loaded_raster <- terra::rast(file)
# Calculate CI
ci_raster <- calculate_ci(loaded_raster)
# Create cloud mask
cloud_mask <- create_cloud_mask(loaded_raster)
# Apply cloud mask to CI
ci_masked <- ci_raster * cloud_mask
# Crop to field boundaries extent (for efficiency)
field_extent <- terra::ext(field_boundaries)
ci_cropped <- terra::crop(ci_masked, field_extent)
# Write output
terra::writeRaster(ci_cropped, output_file, overwrite = TRUE)
safe_log(paste("Successfully processed", basename(file)))
return(output_file)
}, error = function(e) {
safe_log(paste("Error processing", basename(file), ":", e$message), "ERROR")
return(NULL)
})
}
#' Extract CI statistics for each field
#'
#' @param ci_raster A SpatRaster with CI values
#' @param field_boundaries An sf object with field polygons
#' @return A data frame with CI statistics by field
#'
extract_ci_by_field <- function(ci_raster, field_boundaries) {
# Validate inputs
if (is.null(ci_raster)) {
stop("CI raster is required but was NULL")
}
if (is.null(field_boundaries) || nrow(field_boundaries) == 0) {
stop("Field boundaries are required but were empty")
}
# Extract statistics using exact extraction (weighted by coverage)
ci_stats <- exactextractr::exact_extract(
ci_raster,
field_boundaries,
fun = c("mean", "median", "min", "max", "stdev", "count"),
progress = FALSE
)
# Add field identifiers
ci_stats$field <- field_boundaries$field
if ("sub_field" %in% names(field_boundaries)) {
ci_stats$sub_field <- field_boundaries$sub_field
} else {
ci_stats$sub_field <- field_boundaries$field
}
# Add date info
ci_stats$date <- Sys.Date()
# Clean up names
names(ci_stats) <- gsub("CI\\.", "", names(ci_stats))
return(ci_stats)
}
# -----------------------------------------
# PART 4: YIELD PREDICTION IMPLEMENTATION
# -----------------------------------------
#' Prepare data for yield prediction model
#'
#' @param ci_data Data frame with cumulative CI values
#' @param harvest_data Data frame with harvest information
#' @return Data frame ready for modeling
#'
prepare_yield_prediction_data <- function(ci_data, harvest_data) {
# Join CI and yield data
ci_and_yield <- dplyr::left_join(ci_data, harvest_data, by = c("field", "sub_field", "season")) %>%
dplyr::group_by(sub_field, season) %>%
dplyr::slice(which.max(DOY)) %>%
dplyr::select(field, sub_field, tonnage_ha, cumulative_CI, DOY, season, sub_area) %>%
dplyr::mutate(CI_per_day = cumulative_CI / DOY)
# Split into training and prediction sets
ci_and_yield_train <- ci_and_yield %>%
as.data.frame() %>%
dplyr::filter(!is.na(tonnage_ha))
prediction_yields <- ci_and_yield %>%
as.data.frame() %>%
dplyr::filter(is.na(tonnage_ha))
return(list(
train = ci_and_yield_train,
predict = prediction_yields
))
}
#' Train a random forest model for yield prediction
#'
#' @param training_data Data frame with training data
#' @param predictors Vector of predictor variable names
#' @param response Name of the response variable
#' @return Trained model
#'
train_yield_model <- function(training_data, predictors = c("cumulative_CI", "DOY", "CI_per_day"), response = "tonnage_ha") {
# Configure model training parameters
ctrl <- caret::trainControl(
method = "cv",
savePredictions = TRUE,
allowParallel = TRUE,
number = 5,
verboseIter = TRUE
)
# Train the model with feature selection
set.seed(202) # For reproducibility
model_ffs_rf <- CAST::ffs(
training_data[, predictors],
training_data[, response],
method = "rf",
trControl = ctrl,
importance = TRUE,
withinSE = TRUE,
tuneLength = 5,
na.rm = TRUE
)
return(model_ffs_rf)
}
#' Format predictions into a clean data frame
#'
#' @param predictions Raw prediction results
#' @param newdata Original data frame with field information
#' @return Formatted predictions data frame
#'
prepare_predictions <- function(predictions, newdata) {
return(predictions %>%
as.data.frame() %>%
dplyr::rename(predicted_Tcha = ".") %>%
dplyr::mutate(
sub_field = newdata$sub_field,
field = newdata$field,
Age_days = newdata$DOY,
total_CI = round(newdata$cumulative_CI, 0),
predicted_Tcha = round(predicted_Tcha, 0),
season = newdata$season
) %>%
dplyr::select(field, sub_field, Age_days, total_CI, predicted_Tcha, season) %>%
dplyr::left_join(., newdata, by = c("field", "sub_field", "season"))
)
}
#' Predict yields for mature fields
#'
#' @param model Trained model
#' @param prediction_data Data frame with fields to predict
#' @param min_age Minimum age in days to qualify as mature (default: 300)
#' @return Data frame with yield predictions
#'
predict_yields <- function(model, prediction_data, min_age = 300) {
# Make predictions
predictions <- stats::predict(model, newdata = prediction_data)
# Format predictions
pred_formatted <- prepare_predictions(predictions, prediction_data) %>%
dplyr::filter(Age_days > min_age) %>%
dplyr::mutate(CI_per_day = round(total_CI / Age_days, 1))
return(pred_formatted)
}
# ------------------------------
# PART 5: DEMONSTRATION WORKFLOW
# ------------------------------
#' Demonstration workflow showing how to use the functions
#'
#' @param end_date The end date for processing satellite images
#' @param offset Number of days to look back
#' @param image_folder Path to the folder containing satellite images
#' @param field_boundaries_path Path to field boundaries shapefile
#' @param output_dir Path to save processed outputs
#' @param harvest_data_path Path to historical harvest data
#'
demo_workflow <- function(end_date = Sys.Date(), offset = 7,
image_folder = "path/to/satellite/images",
field_boundaries_path = "path/to/field_boundaries.shp",
output_dir = "path/to/output",
harvest_data_path = "path/to/harvest_data.csv") {
# Step 1: Generate date list for processing
dates <- date_list(end_date, offset)
safe_log(paste("Processing data for week", dates$week, "of", dates$year))
# Step 2: Load field boundaries
field_boundaries <- sf::read_sf(field_boundaries_path)
safe_log(paste("Loaded", nrow(field_boundaries), "field boundaries"))
# Step 3: Find satellite images for the specified date range
image_files <- find_satellite_images(image_folder, dates$days_filter)
safe_log(paste("Found", length(image_files), "satellite images for processing"))
# Step 4: Process each satellite image and calculate CI
ci_files <- list()
for (file in image_files) {
ci_file <- process_satellite_image(file, field_boundaries, output_dir)
if (!is.null(ci_file)) {
ci_files <- c(ci_files, ci_file)
}
}
# Step 5: Extract CI statistics for each field
ci_stats_list <- list()
for (ci_file in ci_files) {
ci_raster <- terra::rast(ci_file)
ci_stats <- extract_ci_by_field(ci_raster, field_boundaries)
ci_stats_list[[basename(ci_file)]] <- ci_stats
}
# Combine all stats
all_ci_stats <- dplyr::bind_rows(ci_stats_list)
safe_log(paste("Extracted CI statistics for", nrow(all_ci_stats), "field-date combinations"))
# Step 6: Prepare for yield prediction
if (file.exists(harvest_data_path)) {
# Load harvest data
harvest_data <- read.csv(harvest_data_path)
safe_log("Loaded harvest data for yield prediction")
# Make up cumulative_CI data for demonstration purposes
# In a real scenario, this would come from accumulating CI values over time
ci_data <- all_ci_stats %>%
dplyr::group_by(field, sub_field) %>%
dplyr::summarise(
cumulative_CI = sum(mean, na.rm = TRUE),
DOY = n(), # Days of year as the count of observations
season = lubridate::year(max(date, na.rm = TRUE)),
.groups = "drop"
)
# Prepare data for modeling
modeling_data <- prepare_yield_prediction_data(ci_data, harvest_data)
if (nrow(modeling_data$train) > 0) {
# Train yield prediction model
yield_model <- train_yield_model(modeling_data$train)
safe_log("Trained yield prediction model")
# Predict yields for mature fields
yield_predictions <- predict_yields(yield_model, modeling_data$predict)
safe_log(paste("Generated yield predictions for", nrow(yield_predictions), "fields"))
# Return results
return(list(
ci_stats = all_ci_stats,
yield_predictions = yield_predictions,
model = yield_model
))
} else {
safe_log("No training data available for yield prediction", "WARNING")
return(list(ci_stats = all_ci_stats))
}
} else {
safe_log("Harvest data not found, skipping yield prediction", "WARNING")
return(list(ci_stats = all_ci_stats))
}
}
# ------------------------------
# PART 6: USAGE EXAMPLE
# ------------------------------
# Uncomment and modify paths to run the demo workflow
# results <- demo_workflow(
# end_date = "2023-10-01",
# offset = 7,
# image_folder = "data/satellite_images",
# field_boundaries_path = "data/field_boundaries.shp",
# output_dir = "output/processed",
# harvest_data_path = "data/harvest_history.csv"
# )
#
# # Access results
# ci_stats <- results$ci_stats
# yield_predictions <- results$yield_predictions
#
# # Example: Plot CI distribution by field
# if (require(ggplot2)) {
# ggplot(ci_stats, aes(x = field, y = mean, fill = field)) +
# geom_boxplot() +
# labs(title = "CI Distribution by Field",
# x = "Field",
# y = "Mean CI") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1))
# }
#
# # Example: Plot predicted yield vs age
# if (exists("yield_predictions") && require(ggplot2)) {
# ggplot(yield_predictions, aes(x = Age_days, y = predicted_Tcha, color = field)) +
# geom_point(size = 3) +
# geom_text(aes(label = field), hjust = -0.2, vjust = -0.2) +
# labs(title = "Predicted Yield by Field Age",
# x = "Age (Days)",
# y = "Predicted Yield (Tonnes/ha)") +
# theme_minimal()
# }

View file

@ -0,0 +1,556 @@
# Cloud and Shadow Detection Analysis
# This script analyzes cloud and shadow detection parameters using the diagnostic GeoTIFF files
# and polygon-based classification to help optimize the detection algorithms
# Load required packages
library(terra)
library(sf)
library(dplyr)
library(ggplot2)
library(reshape2)
library(exactextractr) # For accurate polygon extraction
# Define diagnostic directory
diagnostic_dir <- "C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane/cloud_mask_diagnostics_20250515-164357"
# Simple logging function for this standalone script
safe_log <- function(message, level = "INFO") {
cat(paste0("[", level, "] ", message, "\n"))
}
safe_log("Starting cloud detection analysis on diagnostic rasters")
# Load all diagnostic rasters
safe_log("Loading diagnostic raster files...")
# Load original bands
red_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_red_band.tif"))
green_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_green_band.tif"))
blue_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_blue_band.tif"))
nir_band <- terra::rast(file.path(diagnostic_dir, "diagnostic_nir_band.tif"))
# Load derived indices
brightness <- terra::rast(file.path(diagnostic_dir, "diagnostic_brightness.tif"))
ndvi <- terra::rast(file.path(diagnostic_dir, "diagnostic_ndvi.tif"))
blue_ratio <- terra::rast(file.path(diagnostic_dir, "diagnostic_blue_ratio.tif"))
green_nir_ratio <- terra::rast(file.path(diagnostic_dir, "diagnostic_green_nir_ratio.tif"))
ndwi <- terra::rast(file.path(diagnostic_dir, "diagnostic_ndwi.tif"))
# Load cloud detection parameters
bright_pixels <- terra::rast(file.path(diagnostic_dir, "param_bright_pixels.tif"))
very_bright_pixels <- terra::rast(file.path(diagnostic_dir, "param_very_bright_pixels.tif"))
blue_dominant <- terra::rast(file.path(diagnostic_dir, "param_blue_dominant.tif"))
low_ndvi <- terra::rast(file.path(diagnostic_dir, "param_low_ndvi.tif"))
green_dominant_nir <- terra::rast(file.path(diagnostic_dir, "param_green_dominant_nir.tif"))
high_ndwi <- terra::rast(file.path(diagnostic_dir, "param_high_ndwi.tif"))
# Load shadow detection parameters
dark_pixels <- terra::rast(file.path(diagnostic_dir, "param_dark_pixels.tif"))
very_dark_pixels <- terra::rast(file.path(diagnostic_dir, "param_very_dark_pixels.tif"))
low_nir <- terra::rast(file.path(diagnostic_dir, "param_low_nir.tif"))
shadow_ndvi <- terra::rast(file.path(diagnostic_dir, "param_shadow_ndvi.tif"))
low_red_to_blue <- terra::rast(file.path(diagnostic_dir, "param_low_red_to_blue.tif"))
high_blue_to_nir_ratio <- terra::rast(file.path(diagnostic_dir, "param_high_blue_to_nir_ratio.tif"))
blue_nir_ratio_raw <- terra::rast(file.path(diagnostic_dir, "param_blue_nir_ratio_raw.tif"))
red_blue_ratio_raw <- terra::rast(file.path(diagnostic_dir, "param_red_blue_ratio_raw.tif"))
# Load edge detection parameters
brightness_focal_sd <- terra::rast(file.path(diagnostic_dir, "param_brightness_focal_sd.tif"))
edge_pixels <- terra::rast(file.path(diagnostic_dir, "param_edge_pixels.tif"))
# Load final masks
cloud_mask <- terra::rast(file.path(diagnostic_dir, "mask_cloud.tif"))
shadow_mask <- terra::rast(file.path(diagnostic_dir, "mask_shadow.tif"))
combined_mask <- terra::rast(file.path(diagnostic_dir, "mask_combined.tif"))
dilated_mask <- terra::rast(file.path(diagnostic_dir, "mask_dilated.tif"))
safe_log("Raster data loaded successfully")
# Try to read the classification polygons if they exist
tryCatch({
# Check if the classes.geojson file exists in the diagnostic directory
classes_file <- file.path(diagnostic_dir, "classes.geojson")
# If no classes file in this directory, look for the most recent one
if (!file.exists(classes_file)) {
# Look in parent directory for most recent cloud_mask_diagnostics folder
potential_dirs <- list.dirs(path = dirname(diagnostic_dir),
full.names = TRUE,
recursive = FALSE)
# Filter for diagnostic directories and find the most recent one that has classes.geojson
diagnostic_dirs <- potential_dirs[grepl("cloud_mask_diagnostics_", potential_dirs)]
for (dir in rev(sort(diagnostic_dirs))) { # Reverse sort to get newest first
potential_file <- file.path(dir, "classes.geojson")
if (file.exists(potential_file)) {
classes_file <- potential_file
break
}
}
}
# Check if we found a classes file
if (file.exists(classes_file)) {
safe_log(paste("Using classification polygons from:", classes_file))
# Load the classification polygons
classifications <- sf::st_read(classes_file, quiet = TRUE) %>% rename(class = type)
# Remove empty polygons
classifications <- classifications[!sf::st_is_empty(classifications), ]
# Create a list to store all rasters we want to extract values from
extraction_rasters <- list(
# Original bands
red = red_band,
green = green_band,
blue = blue_band,
nir = nir_band,
# Derived indices
brightness = brightness,
ndvi = ndvi,
blue_ratio = blue_ratio,
green_nir_ratio = green_nir_ratio,
ndwi = ndwi,
# Cloud detection parameters
bright_pixels = terra::ifel(bright_pixels, 1, 0),
very_bright_pixels = terra::ifel(very_bright_pixels, 1, 0),
blue_dominant = terra::ifel(blue_dominant, 1, 0),
low_ndvi = terra::ifel(low_ndvi, 1, 0),
green_dominant_nir = terra::ifel(green_dominant_nir, 1, 0),
high_ndwi = terra::ifel(high_ndwi, 1, 0),
# Shadow detection parameters
dark_pixels = terra::ifel(dark_pixels, 1, 0),
very_dark_pixels = terra::ifel(very_dark_pixels, 1, 0),
low_nir = terra::ifel(low_nir, 1, 0),
shadow_ndvi = terra::ifel(shadow_ndvi, 1, 0),
low_red_to_blue = terra::ifel(low_red_to_blue, 1, 0),
high_blue_to_nir_ratio = terra::ifel(high_blue_to_nir_ratio, 1, 0),
blue_nir_ratio_raw = (blue_band / (nir_band + 0.01)),
red_blue_ratio_raw = (red_band / (blue_band + 0.01)),
# Edge detection parameters
brightness_focal_sd = brightness_focal_sd,
edge_pixels = terra::ifel(edge_pixels, 1, 0),
# Final masks
cloud_mask = terra::ifel(cloud_mask, 1, 0),
shadow_mask = terra::ifel(shadow_mask, 1, 0),
combined_mask = terra::ifel(combined_mask, 1, 0),
dilated_mask = terra::ifel(dilated_mask, 1, 0)
)
# Create a stack of all rasters
extraction_stack <- terra::rast(extraction_rasters)
# User-provided simplified extraction for mean statistics per polygon
pivot_stats_sf <- cbind(
classifications,
round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2)
) %>%
sf::st_drop_geometry()
# Convert to a regular data frame for easier downstream processing
all_stats <- sf::st_drop_geometry(pivot_stats_sf)
# Ensure 'class_name' column exists, if not, use 'class' as 'class_name'
if (!("class_name" %in% colnames(all_stats)) && ("class" %in% colnames(all_stats))) {
all_stats$class_name <- all_stats$class
if (length(valid_class_ids) == 0) {
safe_log("No valid (non-NA) class IDs found for exactextractr processing.", "WARNING")
}
for (class_id in valid_class_ids) {
# Subset polygons for this class
class_polygons_sf <- classifications[which(classifications$class == class_id), ] # Use which for NA-safe subsetting
if (nrow(class_polygons_sf) == 0) {
safe_log(paste("Skipping empty class (no polygons after filtering):", class_id), "WARNING")
next
}
tryCatch({
safe_log(paste("Processing class:", class_id))
# Check if the polygon overlaps with the raster extent (check based on the combined extent of class polygons)
rast_extent <- terra::ext(extraction_stack)
poly_extent <- sf::st_bbox(class_polygons_sf)
if (poly_extent["xmin"] > rast_extent["xmax"] ||
poly_extent["xmax"] < rast_extent["xmin"] ||
poly_extent["ymin"] > rast_extent["ymax"] ||
poly_extent["ymax"] < rast_extent["ymin"]) {
safe_log(paste("Skipping class that doesn't overlap with raster:", class_id), "WARNING")
next
}
# exact_extract will process each feature in class_polygons_sf
# and return a list of data frames (one per feature)
per_polygon_stats_list <- exactextractr::exact_extract(
extraction_stack,
class_polygons_sf,
function(values, coverage_fraction) {
# Filter pixels by coverage (e.g., >50% of the pixel is covered by the polygon)
valid_pixels_idx <- coverage_fraction > 0.5
df_filtered <- values[valid_pixels_idx, , drop = FALSE]
if (nrow(df_filtered) == 0) {
# If no pixels meet coverage, return a data frame with NAs
# to maintain structure, matching expected column names.
# Column names are derived from the extraction_stack
stat_cols <- paste0(names(extraction_stack), "_mean")
na_df <- as.data.frame(matrix(NA_real_, nrow = 1, ncol = length(stat_cols)))
names(na_df) <- stat_cols
return(na_df)
}
# Calculate mean for each band (column in df_filtered)
stats_per_band <- lapply(names(df_filtered), function(band_name) {
col_data <- df_filtered[[band_name]]
if (length(col_data) > 0 && sum(!is.na(col_data)) > 0) {
mean_val <- mean(col_data, na.rm = TRUE)
return(setNames(mean_val, paste0(band_name, "_mean")))
} else {
return(setNames(NA_real_, paste0(band_name, "_mean")))
}
})
# Combine all stats (named values) into a single named vector then data frame
return(as.data.frame(t(do.call(c, stats_per_band))))
},
summarize_df = FALSE, # Important: get a list of DFs, one per polygon
force_df = TRUE # Ensure the output of the summary function is treated as a DF
)
# Combine all stats for this class if we have any
if (length(per_polygon_stats_list) > 0) {
# per_polygon_stats_list is now a list of single-row data.frames
class_stats_df <- do.call(rbind, per_polygon_stats_list)
# Remove rows that are all NA (from polygons with no valid pixels)
class_stats_df <- class_stats_df[rowSums(is.na(class_stats_df)) < ncol(class_stats_df), ]
if (nrow(class_stats_df) > 0) {
# Add class information
class_stats_df$class <- class_id
# Get class_name from the first polygon (assuming it's consistent for the class_id)
# Ensure class_polygons_sf is not empty before accessing class_name
if ("class_name" %in% names(class_polygons_sf) && nrow(class_polygons_sf) > 0) {
class_stats_df$class_name <- as.character(class_polygons_sf$class_name[1])
} else {
class_stats_df$class_name <- as.character(class_id) # Fallback
}
# Add to overall results
all_stats <- rbind(all_stats, class_stats_df)
safe_log(paste("Successfully extracted data for", nrow(class_stats_df), "polygons in class", class_id))
} else {
safe_log(paste("No valid data extracted for class (after NA removal):", class_id), "WARNING")
}
} else {
safe_log(paste("No data frames returned by exact_extract for class:", class_id), "WARNING")
}
}, error = function(e) {
safe_log(paste("Error processing class", class_id, "with exact_extract:", e$message), "ERROR")
})
}
# Save the extracted statistics to a CSV file
if (nrow(all_stats) > 0) {
stats_file <- file.path(diagnostic_dir, "class_spectral_stats_mean.csv") # New filename
write.csv(all_stats, stats_file, row.names = FALSE)
safe_log(paste("Saved MEAN spectral statistics by class to:", stats_file))
} else {
safe_log("No statistics were generated to save.", "WARNING")
}
# Calculate optimized thresholds for cloud/shadow detection (using only _mean columns)
if (nrow(all_stats) > 0 && ncol(all_stats) > 2) { # Check if all_stats has data and parameter columns
threshold_results <- data.frame(
parameter = character(),
best_threshold = numeric(),
direction = character(),
target_class = character(),
vs_class = character(),
accuracy = numeric(),
stringsAsFactors = FALSE
)
# Define class pairs to analyze
class_pairs <- list(
# Cloud vs various surfaces
c("cloud", "crop"),
c("cloud", "bare_soil_dry"),
c("cloud", "bare_soil_wet"),
# Shadow vs various surfaces
c("shadow_over_crop", "crop"),
c("shadow_over_bare_soil", "bare_soil_dry"),
c("shadow_over_bare_soil", "bare_soil_wet")
)
# For now, let's assume all _mean parameters derived from extraction_rasters are relevant for clouds/shadows
# This part might need more specific logic if you want to distinguish cloud/shadow params cloud_params <- grep("_mean$", names(extraction_rasters), value = TRUE)
params logic
# Parameters to analyze for shadows (now only _mean versions)tatistics by class to:", stats_file))
shadow_params <- cloud_params # Simplified: using the same set for now, adjust if specific shadow params are needed
lds for cloud/shadow detection
# Find optimal thresholdsframe(
if (length(class_pairs) > 0 && (length(cloud_params) > 0 || length(shadow_params) > 0)) {
for (pair in class_pairs) {c(),
target_class <- pair[1](),
vs_class <- pair[2](),
vs_class = character(),
# Select appropriate parameters based on whether we're analyzing clouds or shadows accuracy = numeric(),
if (grepl("cloud", target_class)) {
params_to_check <- cloud_params
} else {
params_to_check <- shadow_paramsto analyze
}
# For each parameter, find the best threshold to separate the classesc("cloud", "crop"),
for (param in params_to_check) {
if (param %in% colnames(all_stats)) {
# Get values for both classes
target_values <- all_stats[all_stats$class_name == target_class, param]
vs_values <- all_stats[all_stats$class_name == vs_class, param] c("shadow_over_crop", "crop"),
c("shadow_over_bare_soil", "bare_soil_dry"),
if (length(target_values) > 0 && length(vs_values) > 0) {_soil_wet")
# Calculate mean and sd for both classes
target_mean <- mean(target_values, na.rm = TRUE)
target_sd <- sd(target_values, na.rm = TRUE)# Parameters to analyze for clouds
vs_mean <- mean(vs_values, na.rm = TRUE), "blue_ratio_mean", "ndvi_mean",
vs_sd <- sd(vs_values, na.rm = TRUE)
# Determine if higher or lower values indicate the target classshadow_params <- c("brightness_mean", "dark_pixels_mean", "very_dark_pixels_mean",
if (target_mean > vs_mean) {r_mean", "shadow_ndvi_mean", "blue_nir_ratio_raw_mean",
direction <- ">"_ratio_raw_mean", "low_red_to_blue_mean")
# Try different thresholds
potential_thresholds <- seq(olds
min(min(target_values, na.rm = TRUE), vs_mean + 0.5 * vs_sd),r (pair in class_pairs) {
max(max(vs_values, na.rm = TRUE), target_mean - 0.5 * target_sd),
length.out = 20
)
} else { appropriate parameters based on whether we're analyzing clouds or shadows
direction <- "<"{
# Try different thresholds params_to_check <- cloud_params
potential_thresholds <- seq(} else {
min(min(vs_values, na.rm = TRUE), target_mean + 0.5 * target_sd),
max(max(target_values, na.rm = TRUE), vs_mean - 0.5 * vs_sd),
length.out = 20
)st threshold to separate the classes
}
# Calculate accuracy for each threshold# Get values for both classes
best_accuracy <- 0_class, param]
best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds))e == vs_class, param]
for (threshold in potential_thresholds) {ues) > 0) {
if (direction == ">") {
correct_target <- sum(target_values > threshold, na.rm = TRUE)a.rm = TRUE)
correct_vs <- sum(vs_values <= threshold, na.rm = TRUE)target_sd <- sd(target_values, na.rm = TRUE)
} else { vs_mean <- mean(vs_values, na.rm = TRUE)
correct_target <- sum(target_values < threshold, na.rm = TRUE)
correct_vs <- sum(vs_values >= threshold, na.rm = TRUE)
}
er values indicate the target class
total_target <- length(target_values)
total_vs <- length(vs_values)
accuracy <- (correct_target + correct_vs) / (total_target + total_vs)lds <- seq(
min(min(target_values, na.rm = TRUE), vs_mean + 0.5 * vs_sd),
if (accuracy > best_accuracy) {max(vs_values, na.rm = TRUE), target_mean - 0.5 * target_sd),
best_accuracy <- accuracy0
best_threshold <- threshold
}
}
# Add to resultslds <- seq(
threshold_results <- rbind(threshold_results, data.frame( min(min(vs_values, na.rm = TRUE), target_mean + 0.5 * target_sd),
parameter = gsub("_mean", "", param), max(max(target_values, na.rm = TRUE), vs_mean - 0.5 * vs_sd),
best_threshold = best_threshold, length.out = 20
direction = direction,
target_class = target_class,
vs_class = vs_class,
accuracy = best_accuracy,# Calculate accuracy for each threshold
stringsAsFactors = FALSE
))direction == ">", min(potential_thresholds), max(potential_thresholds))
}
}
}ction == ">") {
}
}
else {
# Save threshold results correct_target <- sum(target_values < threshold, na.rm = TRUE)
thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")shold, na.rm = TRUE)
write.csv(threshold_results, thresholds_file, row.names = FALSE)
safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file))
# Generate box plots for key parameters to visualize class differencestotal_vs <- length(vs_values)
if (requireNamespace("ggplot2", quietly = TRUE) && nrow(all_stats) > 0) {
# Reshape data for plotting (only _mean columns) + correct_vs) / (total_target + total_vs)
mean_cols <- grep("_mean$", colnames(all_stats), value = TRUE)
if (length(mean_cols) > 0) {f (accuracy > best_accuracy) {
plot_data <- reshape2::melt(all_stats, best_accuracy <- accuracy
id.vars = c("class", "class_name"), best_threshold <- threshold
measure.vars = mean_cols, # Use only _mean columns
variable.name = "parameter",
value.name = "value")
# Create directory for plotsnd(threshold_results, data.frame(
plots_dir <- file.path(diagnostic_dir, "class_plots"), param),
dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE)t_threshold,
# Create plots for selected key parameters (ensure they are _mean versions)ass,
# Adjust key_params to reflect the new column names (e.g., "brightness_mean")vs_class = vs_class,
key_params_plot <- intersect(c( accuracy = best_accuracy,
"brightness_mean", "ndvi_mean", "blue_ratio_mean", "ndwi_mean", stringsAsFactors = FALSE
"blue_nir_ratio_raw_mean", "red_blue_ratio_raw_mean" ))
), mean_cols) # Ensure these params exist }
}
for (param in key_params_plot) {
# param_data <- plot_data[plot_data$parameter == param,] # Exact match for parameter
# No, grepl was fine if plot_data only contains _mean parameters now.
# Let's ensure plot_data only has the _mean parameters for simplicity here.
param_data <- plot_data[plot_data$parameter == param, ]thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")
if (nrow(param_data) > 0) {file))
param_name <- gsub("_mean", "", param)
o visualize class differences
p <- ggplot2::ggplot(param_data, ggplot2::aes(x = class_name, y = value, fill = class_name)) +s) > 0) {
ggplot2::geom_boxplot() +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + id.vars = c("class", "class_name"),
ggplot2::labs(ariable.name = "parameter",
title = paste("Distribution of", param_name, "by Land Cover Class"),
x = "Class",
y = param_name,# Create directory for plots
fill = "Class"ass_plots")
)_dir, showWarnings = FALSE, recursive = TRUE)
# Save the plot
plot_file <- file.path(plots_dir, paste0("boxplot_", param_name, ".png"))ey_params <- c(
ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150) "brightness_mean", "ndvi_mean", "blue_ratio_mean", "ndwi_mean",
}, "red_blue_ratio_raw_mean"
}
# Create a summary plot showing multiple parameters
summary_data <- plot_data[plot_data$parameter %in% ram_data <- plot_data[grepl(param, plot_data$parameter),]
c("brightness_mean", "ndvi_mean",
"blue_nir_ratio_raw_mean", "red_blue_ratio_raw_mean"),] "", param)
if (nrow(summary_data) > 0) {= class_name)) +
# Clean up parameter names for displayboxplot() +
summary_data$parameter <- gsub("_mean$", "", summary_data$parameter) # Remove _mean suffix for display
summary_data$parameter <- gsub("_raw$", "", summary_data$parameter) # Keep this if _raw_mean was a thing(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
# Create faceted plot"Distribution of", param_name, "by Land Cover Class"),
p <- ggplot2::ggplot(summary_data, x = "Class",
ggplot2::aes(x = class_name, y = value, fill = class_name)) + y = param_name,
ggplot2::geom_boxplot() +ss"
ggplot2::facet_wrap(~parameter, scales = "free_y") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + # Save the plot
ggplot2::labs( plot_file <- file.path(plots_dir, paste0("boxplot_", param_name, ".png"))
title = "Key Spectral Parameters by Land Cover Class", ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)
x = "Class",
y = "Value",
fill = "Class"
)
summary_data <- plot_data[plot_data$parameter %in%
# Save the summary plot"brightness_mean", "ndvi_mean",
summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")atio_raw_mean", "red_blue_ratio_raw_mean"),]
ggplot2::ggsave(summary_file, p, width = 12, height = 8, dpi = 150)
}
# Clean up parameter names for display
safe_log(paste("Generated spectral parameter plots in:", plots_dir))r <- gsub("_mean", "", summary_data$parameter)
}w", "", summary_data$parameter)
} else {
safe_log("Package 'exactextractr' not available. Install it for more accurate polygon extraction.", "WARNING")
# Fall back to simple extraction using terra (calculating only mean)::aes(x = class_name, y = value, fill = class_name)) +
class_stats <- data.frame()
_wrap(~parameter, scales = "free_y") +
valid_class_names_fallback <- unique(classifications$class_name)
valid_class_names_fallback <- valid_class_names_fallback[!is.na(valid_class_names_fallback)](axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
if (length(valid_class_names_fallback) == 0) {pectral Parameters by Land Cover Class",
safe_log("No valid (non-NA) class names found for fallback terra::extract processing.", "WARNING") x = "Class",
} y = "Value",
for (class_name_fb in valid_class_names_fallback) {
class_polygons_fb <- classifications[which(classifications$class_name == class_name_fb), ]
# Save the summary plot
if(nrow(class_polygons_fb) == 0) next summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")
)
class_vect_fb <- terra::vect(class_polygons_fb) }
# Extract values for each raster
for (i in seq_along(extraction_rasters)) {}
raster_name <- names(extraction_rasters)[i]
# terra::extract returns a data.frame with ID and layer valuesractr' not available. Install it for more accurate polygon extraction.", "WARNING")
# For multiple polygons, it will have multiple rows per polygon if ID is not unique
# We need to aggregate per polygon, then per class if not already handled by exact_extract style
# However, for simplicity here, let's assume terra::extract gives one value per polygon for the mean
# This part of fallback might need more robust aggregation if polygons are complex
r (class_name in unique(classifications$class_name)) {
# A more robust terra::extract approach for means per polygon:s[classifications$class_name == class_name, ]
extracted_values_list <- terra::extract(extraction_rasters[[i]], class_vect_fb, fun = mean, na.rm = TRUE, ID = FALSE)
# extracted_values_list will be a data.frame with one column (the layer) and rows corresponding to polygons
if (nrow(extracted_values_list) > 0 && ncol(extracted_values_list) > 0) {r (i in seq_along(extraction_rasters)) {
# Average over all polygons in this class for this rastertraction_rasters)[i]
mean_val_for_class <- mean(extracted_values_list[[1]], na.rm = TRUE)ct(extraction_rasters[[i]], class_vect)
if (!is.na(mean_val_for_class)) {
stats_row <- data.frame(
class_name = class_name_fb, # Using class_name as the identifier here
parameter = paste0(raster_name, "_mean"),
value = mean_val_for_class),
) TRUE),
class_stats <- rbind(class_stats, stats_row) sd = sd(values[,2], na.rm = TRUE),
} min = min(values[,2], na.rm = TRUE),
}
} )
}
class_stats <- rbind(class_stats, stats)
# Save the statistics (if any were generated) }
if(nrow(class_stats) > 0) {
# Reshape class_stats from long to wide for consistency if needed, or save as is.
# For now, save as long format.
stats_file <- file.path(diagnostic_dir, "class_spectral_stats_simple_mean_long.csv")
write.csv(class_stats, stats_file, row.names = FALSE) stats_file <- file.path(diagnostic_dir, "class_spectral_stats_simple.csv")
safe_log(paste("Saved simple MEAN (long format) spectral statistics by class to:", stats_file)) write.csv(class_stats, stats_file, row.names = FALSE)
} else {e spectral statistics by class to:", stats_file))
safe_log("No statistics generated by fallback method.", "WARNING")
}
}ve RMarkdown generation
# Remove RMarkdown generation
# safe_log("RMarkdown report generation has been removed as per user request.")
NING")
} else {}
safe_log("No classification polygons file (classes.geojson) found. Skipping spectral analysis.", "WARNING")}, error = function(e) {
}cessing or spectral analysis:", e$message), "ERROR")
}, error = function(e) {})
safe_log(paste("Error in classification polygon processing or spectral analysis:", e$message), "ERROR")
}) detection analysis script finished.")
safe_log("Cloud detection analysis script finished.")# Clean up workspace
rm(list = ls())
# Clean up workspace
rm(list = ls())

View file

@ -0,0 +1,191 @@
```r
# Cloud detection analysis script
# Load necessary libraries
library(terra)
library(exactextractr)
library(sf)
library(dplyr)
library(ggplot2)
library(tidyr)
library(reshape2)
# Define file paths (these should be set to your actual file locations)
classes_file <- "path/to/classes.geojson"
rasters_dir <- "path/to/rasters"
diagnostic_dir <- "path/to/diagnostics"
# Helper function for logging
safe_log <- function(message, level = "INFO") {
timestamp <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
cat(paste0("[", timestamp, "] [", level, "] ", message, "\n"))
}
# Main processing block
# Load classification polygons
safe_log(paste("Loading classification polygons from:", classes_file))
classifications <- sf::st_read(classes_file, quiet = TRUE)
# Ensure the CRS is set (assuming WGS84 here, adjust if necessary)
safe_log("No CRS found for the classifications. Setting to WGS84 (EPSG:4326).", "WARNING")
sf::st_crs(classifications) <- 4326
# List all raster files in the directory
raster_files <- list.files(rasters_dir, pattern = "\\.tif$", full.names = TRUE)
# Create a named vector for extraction_rasters based on base names
extraction_rasters <- setNames(raster_files, tools::file_path_sans_ext(basename(raster_files)))
# Create a stack of all rasters
extraction_stack <- terra::rast(extraction_rasters)
# User-provided simplified extraction for mean statistics per polygon
safe_log("Extracting mean statistics per polygon using exactextractr...")
all_stats <- cbind(
classifications,
round(exactextractr::exact_extract(extraction_stack, classifications, fun = "mean", progress = FALSE), 2)
) %>%
sf::st_drop_geometry() # Ensures all_stats is a data frame
# Ensure 'class_name' column exists, if not, use 'class' as 'class_name'
all_stats$class_name <- all_stats$class
# Save the extracted statistics to a CSV file
stats_file <- file.path(diagnostic_dir, "polygon_mean_spectral_stats.csv")
write.csv(all_stats, stats_file, row.names = FALSE)
safe_log(paste("Saved mean spectral statistics per polygon to:", stats_file))
# Calculate optimized thresholds for cloud/shadow detection
threshold_results <- data.frame(
parameter = character(),
best_threshold = numeric(),
direction = character(),
target_class = character(),
vs_class = character(),
accuracy = numeric(),
stringsAsFactors = FALSE
)
class_pairs <- list(
c("cloud", "crop"),
c("cloud", "bare_soil_dry"),
c("cloud", "bare_soil_wet"),
c("shadow_over_crop", "crop"),
c("shadow_over_bare_soil", "bare_soil_dry"),
c("shadow_over_bare_soil", "bare_soil_wet")
)
cloud_detection_params_for_threshold <- intersect(
c("mean.brightness", "mean.very_bright_pixels", "mean.blue_dominant", "mean.low_ndvi", "mean.green_dominant_nir", "mean.high_ndwi", "mean.blue_ratio", "mean.ndvi"),
colnames(all_stats)
)
shadow_detection_params_for_threshold <- intersect(
c("mean.brightness", "mean.dark_pixels", "mean.very_dark_pixels", "mean.low_nir", "mean.shadow_ndvi", "mean.low_red_to_blue", "mean.high_blue_to_nir_ratio", "mean.blue_nir_ratio_raw", "mean.red_blue_ratio_raw"),
colnames(all_stats)
)
for (pair in class_pairs) {
target_class <- pair[1]
vs_class <- pair[2]
params_to_check <- c(cloud_detection_params_for_threshold, shadow_detection_params_for_threshold)
for (param in params_to_check) {
target_values <- all_stats[all_stats$class_name == target_class, param]
vs_values <- all_stats[all_stats$class_name == vs_class, param]
target_values <- target_values[!is.na(target_values)]
vs_values <- vs_values[!is.na(vs_values)]
# Only proceed if both groups have at least one value
if (length(target_values) > 0 && length(vs_values) > 0) {
target_mean <- mean(target_values)
target_sd <- sd(target_values)
vs_mean <- mean(vs_values)
vs_sd <- sd(vs_values)
target_sd[is.na(target_sd)] <- 0
vs_sd[is.na(vs_sd)] <- 0
direction <- ifelse(target_mean > vs_mean, ">", "<")
all_values <- c(target_values, vs_values)
min_val <- min(all_values)
max_val <- max(all_values)
# Only proceed if min and max are finite and not equal
if (is.finite(min_val) && is.finite(max_val) && min_val != max_val) {
potential_thresholds <- seq(min_val, max_val, length.out = 20)
best_accuracy <- -1
best_threshold <- ifelse(direction == ">", min(potential_thresholds), max(potential_thresholds))
for (threshold in potential_thresholds) {
if (direction == ">") {
correct_target <- sum(target_values > threshold)
correct_vs <- sum(vs_values <= threshold)
} else {
correct_target <- sum(target_values < threshold)
correct_vs <- sum(vs_values >= threshold)
}
accuracy <- (correct_target + correct_vs) / (length(target_values) + length(vs_values))
if (accuracy > best_accuracy) {
best_accuracy <- accuracy
best_threshold <- threshold
}
}
threshold_results <- rbind(threshold_results, data.frame(
parameter = param,
best_threshold = best_threshold,
direction = direction,
target_class = target_class,
vs_class = vs_class,
accuracy = best_accuracy,
stringsAsFactors = FALSE
))
}
}
}
}
thresholds_file <- file.path(diagnostic_dir, "optimal_thresholds.csv")
write.csv(threshold_results, thresholds_file, row.names = FALSE)
safe_log(paste("Saved optimal threshold recommendations to:", thresholds_file))
# Fix: get plot_measure_cols by matching raster base names to all_stats columns with 'mean.' prefix
plot_measure_cols <- intersect(names(extraction_rasters), gsub('^mean\\.', '', colnames(all_stats)))
plot_data <- reshape2::melt(
all_stats,
id.vars = c("class", "class_name"),
measure.vars = paste0("mean.", plot_measure_cols),
variable.name = "parameter",
value.name = "value"
)
# Remove 'mean.' prefix from parameter column for clarity
plot_data$parameter <- sub("^mean\\.", "", plot_data$parameter)
plots_dir <- file.path(diagnostic_dir, "class_plots")
dir.create(plots_dir, showWarnings = FALSE, recursive = TRUE)
key_params_for_plot_list <- c("brightness", "ndvi", "blue_ratio", "ndwi",
"blue_nir_ratio_raw", "red_blue_ratio_raw")
key_params_to_plot <- intersect(key_params_for_plot_list, plot_measure_cols)
for (param_to_plot in key_params_to_plot) {
param_data_subset <- plot_data[plot_data$parameter == param_to_plot, ]
p <- ggplot2::ggplot(param_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
ggplot2::geom_boxplot() +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
ggplot2::labs(
title = paste("Distribution of", param_to_plot, "by Land Cover Class"),
x = "Class",
y = param_to_plot,
fill = "Class"
)
plot_file <- file.path(plots_dir, paste0("boxplot_", param_to_plot, ".png"))
ggplot2::ggsave(plot_file, p, width = 10, height = 6, dpi = 150)
}
summary_params_for_plot_list <- c("brightness", "ndvi",
"blue_nir_ratio_raw", "red_blue_ratio_raw")
summary_params_to_plot <- intersect(summary_params_for_plot_list, plot_measure_cols)
summary_data_subset <- plot_data[plot_data$parameter %in% summary_params_to_plot,]
p_summary <- ggplot2::ggplot(summary_data_subset, ggplot2::aes(x = class_name, y = value, fill = class_name)) +
ggplot2::geom_boxplot() +
ggplot2::facet_wrap(~parameter, scales = "free_y") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
strip.text = ggplot2::element_text(size = 8)) +
ggplot2::labs(
title = "Summary of Key Spectral Parameters by Land Cover Class",
x = "Class",
y = "Value",
fill = "Class"
)
summary_file <- file.path(plots_dir, "spectral_parameters_summary.png")
ggplot2::ggsave(summary_file, p_summary, width = 12, height = 8, dpi = 150)
safe_log(paste("Generated spectral parameter plots in:", plots_dir))
safe_log("Cloud detection analysis script finished.")
```

View file

@ -0,0 +1,718 @@
---
params:
ref: "word-styles-reference-var1.docx"
output_file: CI_report.docx
report_date: "2024-08-28"
data_dir: "Chemba"
mail_day: "Wednesday"
borders: TRUE
use_breaks: FALSE
output:
# html_document:
# toc: yes
# df_print: paged
word_document:
reference_docx: !expr file.path("word-styles-reference-var1.docx")
toc: yes
editor_options:
chunk_output_type: console
---
```{r setup_parameters, include=FALSE}
# Set up basic report parameters from input values
report_date <- params$report_date
mail_day <- params$mail_day
borders <- params$borders
use_breaks <- params$use_breaks # Whether to use breaks or continuous spectrum in visualizations
# Environment setup notes (commented out)
# # Activeer de renv omgeving
# renv::activate()
# renv::deactivate()
# # Optioneel: Herstel de omgeving als dat nodig is
# # Je kunt dit commentaar geven als je het normaal niet wilt uitvoeren
# renv::restore()
```
```{r load_libraries, message=FALSE, warning=FALSE, include=FALSE}
# Configure knitr options
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Path management
library(here)
# Spatial data libraries
library(sf)
library(terra)
library(exactextractr)
# library(raster) - Removed as it's no longer maintained
# Data manipulation and visualization
library(tidyverse) # Includes dplyr, ggplot2, etc.
library(tmap)
library(lubridate)
library(zoo)
# Machine learning
library(rsample)
library(caret)
library(randomForest)
library(CAST)
# Load custom utility functions
tryCatch({
source("report_utils.R")
}, error = function(e) {
message(paste("Error loading report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "report_utils.R"))
}, error = function(e) {
stop("Could not load report_utils.R from either location: ", e$message)
})
})
# Load executive report utilities
tryCatch({
source("executive_report_utils.R")
}, error = function(e) {
message(paste("Error loading executive_report_utils.R:", e$message))
# Try alternative path if the first one fails
tryCatch({
source(here::here("r_app", "executive_report_utils.R"))
}, error = function(e) {
stop("Could not load executive_report_utils.R from either location: ", e$message)
})
})
safe_log("Successfully loaded utility functions")
```
```{r initialize_project_config, message=FALSE, warning=FALSE, include=FALSE}
# Set the project directory from parameters
project_dir <- params$data_dir
# Source project parameters with error handling
tryCatch({
source(here::here("r_app", "parameters_project.R"))
}, error = function(e) {
stop("Error loading parameters_project.R: ", e$message)
})
# Log initial configuration
safe_log("Starting the R Markdown script")
safe_log(paste("mail_day params:", params$mail_day))
safe_log(paste("report_date params:", params$report_date))
safe_log(paste("mail_day variable:", mail_day))
```
```{r calculate_dates_and_weeks, message=FALSE, warning=FALSE, include=FALSE}
# Set locale for consistent date formatting
Sys.setlocale("LC_TIME", "C")
# Initialize date variables from parameters
today <- as.character(report_date)
mail_day_as_character <- as.character(mail_day)
# Calculate week days
report_date_as_week_day <- weekdays(lubridate::ymd(today))
days_of_week <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
# Calculate initial week number
week <- lubridate::week(today)
safe_log(paste("Initial week calculation:", week, "today:", today))
# Calculate previous dates for comparisons
today_minus_1 <- as.character(lubridate::ymd(today) - 7)
today_minus_2 <- as.character(lubridate::ymd(today) - 14)
today_minus_3 <- as.character(lubridate::ymd(today) - 21)
# Log the weekday calculations for debugging
safe_log(paste("Report date weekday:", report_date_as_week_day))
safe_log(paste("Weekday index:", which(days_of_week == report_date_as_week_day)))
safe_log(paste("Mail day:", mail_day_as_character))
safe_log(paste("Mail day index:", which(days_of_week == mail_day_as_character)))
# Adjust week calculation based on mail day
if (which(days_of_week == report_date_as_week_day) > which(days_of_week == mail_day_as_character)) {
safe_log("Adjusting weeks because of mail day")
week <- lubridate::week(today) + 1
today_minus_1 <- as.character(lubridate::ymd(today))
today_minus_2 <- as.character(lubridate::ymd(today) - 7)
today_minus_3 <- as.character(lubridate::ymd(today) - 14)
}
# Generate subtitle for report
subtitle_var <- paste("Report generated on", Sys.Date())
# Calculate week numbers for previous weeks
week_minus_1 <- week - 1
week_minus_2 <- week - 2
week_minus_3 <- week - 3
# Format current week with leading zeros
week <- sprintf("%02d", week)
# Get years for each date
year <- lubridate::year(today)
year_1 <- lubridate::year(today_minus_1)
year_2 <- lubridate::year(today_minus_2)
year_3 <- lubridate::year(today_minus_3)
```
```{r data, message=TRUE, warning=TRUE, include=FALSE}
# Load CI index data with error handling
tryCatch({
CI_quadrant <- readRDS(here::here(cumulative_CI_vals_dir, "All_pivots_Cumulative_CI_quadrant_year_v2.rds"))
safe_log("Successfully loaded CI quadrant data")
}, error = function(e) {
stop("Error loading CI quadrant data: ", e$message)
})
# Get file paths for different weeks using the utility function
tryCatch({
path_to_week_current = get_week_path(weekly_CI_mosaic, today, 0)
path_to_week_minus_1 = get_week_path(weekly_CI_mosaic, today, -1)
path_to_week_minus_2 = get_week_path(weekly_CI_mosaic, today, -2)
path_to_week_minus_3 = get_week_path(weekly_CI_mosaic, today, -3)
# Log the calculated paths
safe_log("Required mosaic paths:")
safe_log(paste("Path to current week:", path_to_week_current))
safe_log(paste("Path to week minus 1:", path_to_week_minus_1))
safe_log(paste("Path to week minus 2:", path_to_week_minus_2))
safe_log(paste("Path to week minus 3:", path_to_week_minus_3))
# Validate that files exist
if (!file.exists(path_to_week_current)) warning("Current week mosaic file does not exist: ", path_to_week_current)
if (!file.exists(path_to_week_minus_1)) warning("Week minus 1 mosaic file does not exist: ", path_to_week_minus_1)
if (!file.exists(path_to_week_minus_2)) warning("Week minus 2 mosaic file does not exist: ", path_to_week_minus_2)
if (!file.exists(path_to_week_minus_3)) warning("Week minus 3 mosaic file does not exist: ", path_to_week_minus_3)
# Load raster data with terra functions
CI <- terra::rast(path_to_week_current)$CI
CI_m1 <- terra::rast(path_to_week_minus_1)$CI
CI_m2 <- terra::rast(path_to_week_minus_2)$CI
CI_m3 <- terra::rast(path_to_week_minus_3)$CI
}, error = function(e) {
stop("Error loading raster data: ", e$message)
})
```
```{r calculate_difference_rasters, message=TRUE, warning=TRUE, include=FALSE}
# Calculate difference rasters for comparisons
tryCatch({
# Calculate weekly difference
last_week_dif_raster_abs <- (CI - CI_m1)
safe_log("Calculated weekly difference raster")
# Calculate three-week difference
three_week_dif_raster_abs <- (CI - CI_m3)
safe_log("Calculated three-week difference raster")
}, error = function(e) {
safe_log(paste("Error calculating difference rasters:", e$message), "ERROR")
# Create placeholder rasters if calculations fail
if (!exists("last_week_dif_raster_abs")) {
last_week_dif_raster_abs <- CI * 0
}
if (!exists("three_week_dif_raster_abs")) {
three_week_dif_raster_abs <- CI * 0
}
})
```
```{r load_field_boundaries, message=TRUE, warning=TRUE, include=FALSE}
# Load field boundaries from parameters
tryCatch({
AllPivots0 <- field_boundaries_sf
safe_log("Successfully loaded field boundaries")
}, error = function(e) {
stop("Error loading field boundaries: ", e$message)
})
```
```{r create_farm_health_data, message=FALSE, warning=FALSE, include=FALSE}
# Create farm health summary data from scratch
tryCatch({
# Ensure we have the required data
if (!exists("AllPivots0") || !exists("CI") || !exists("CI_m1") || !exists("harvesting_data")) {
stop("Required input data (field boundaries, CI data, or harvesting data) not available")
}
safe_log("Starting to calculate farm health data")
# Get unique field names
fields <- unique(AllPivots0$field)
safe_log(paste("Found", length(fields), "unique fields"))
# Initialize result dataframe
farm_health_data <- data.frame(
field = character(),
mean_ci = numeric(),
ci_change = numeric(),
ci_uniformity = numeric(),
status = character(),
anomaly_type = character(),
priority_level = numeric(),
age_weeks = numeric(),
harvest_readiness = character(),
stringsAsFactors = FALSE
)
# Process each field with robust error handling
for (field_name in fields) {
tryCatch({
safe_log(paste("Processing field:", field_name))
# Get field boundary
field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
# Skip if field shape is empty
if (nrow(field_shape) == 0) {
safe_log(paste("Empty field shape for", field_name), "WARNING")
next
}
# Get field age from harvesting data - use direct filtering to avoid dplyr errors
field_age_data <- NULL
if (exists("harvesting_data") && !is.null(harvesting_data) && nrow(harvesting_data) > 0) {
field_age_data <- harvesting_data[harvesting_data$field == field_name, ]
if (nrow(field_age_data) > 0) {
field_age_data <- field_age_data[order(field_age_data$season_start, decreasing = TRUE), ][1, ]
}
}
# Default age if not available
field_age_weeks <- if (!is.null(field_age_data) && nrow(field_age_data) > 0 && !is.na(field_age_data$age)) {
field_age_data$age
} else {
10 # Default age
}
# Extract CI values using terra's extract function which is more robust
ci_values <- terra::extract(CI, field_shape)
ci_prev_values <- terra::extract(CI_m1, field_shape)
# Check if we got valid data
if (nrow(ci_values) == 0 || nrow(ci_prev_values) == 0) {
safe_log(paste("No CI data extracted for field", field_name), "WARNING")
# Add a placeholder row with Unknown status
farm_health_data <- rbind(farm_health_data, data.frame(
field = field_name,
mean_ci = NA,
ci_change = NA,
ci_uniformity = NA,
status = "Unknown",
anomaly_type = "Unknown",
priority_level = 5, # Low priority
age_weeks = field_age_weeks,
harvest_readiness = "Unknown",
stringsAsFactors = FALSE
))
next
}
# Calculate metrics - Handle NA values properly
ci_column <- if ("CI" %in% names(ci_values)) "CI" else colnames(ci_values)[1]
ci_prev_column <- if ("CI" %in% names(ci_prev_values)) "CI" else colnames(ci_prev_values)[1]
mean_ci <- mean(ci_values[[ci_column]], na.rm=TRUE)
mean_ci_prev <- mean(ci_prev_values[[ci_prev_column]], na.rm=TRUE)
ci_change <- mean_ci - mean_ci_prev
ci_sd <- sd(ci_values[[ci_column]], na.rm=TRUE)
ci_uniformity <- ci_sd / max(0.1, mean_ci) # Avoid division by zero
# Handle NaN or Inf results
if (is.na(mean_ci) || is.na(ci_change) || is.na(ci_uniformity) ||
is.nan(mean_ci) || is.nan(ci_change) || is.nan(ci_uniformity) ||
is.infinite(mean_ci) || is.infinite(ci_change) || is.infinite(ci_uniformity)) {
safe_log(paste("Invalid calculation results for field", field_name), "WARNING")
# Add a placeholder row with Unknown status
farm_health_data <- rbind(farm_health_data, data.frame(
field = field_name,
mean_ci = NA,
ci_change = NA,
ci_uniformity = NA,
status = "Unknown",
anomaly_type = "Unknown",
priority_level = 5, # Low priority
age_weeks = field_age_weeks,
harvest_readiness = "Unknown",
stringsAsFactors = FALSE
))
next
}
# Determine field status
status <- dplyr::case_when(
mean_ci >= 5 ~ "Excellent",
mean_ci >= 3.5 ~ "Good",
mean_ci >= 2 ~ "Fair",
mean_ci >= 1 ~ "Poor",
TRUE ~ "Critical"
)
# Determine anomaly type
anomaly_type <- dplyr::case_when(
ci_change > 2 ~ "Potential Weed Growth",
ci_change < -2 ~ "Potential Weeding/Harvesting",
ci_uniformity > 0.5 ~ "High Variability",
mean_ci < 1 ~ "Low Vigor",
TRUE ~ "None"
)
# Calculate priority level (1-5, with 1 being highest priority)
priority_score <- dplyr::case_when(
mean_ci < 1 ~ 1, # Critical - highest priority
anomaly_type == "Potential Weed Growth" ~ 2,
anomaly_type == "High Variability" ~ 3,
ci_change < -1 ~ 4,
TRUE ~ 5 # No urgent issues
)
# Determine harvest readiness
harvest_readiness <- dplyr::case_when(
field_age_weeks >= 52 & mean_ci >= 4 ~ "Ready for harvest",
field_age_weeks >= 48 & mean_ci >= 3.5 ~ "Approaching harvest",
field_age_weeks >= 40 & mean_ci >= 3 ~ "Mid-maturity",
field_age_weeks >= 12 ~ "Growing",
TRUE ~ "Early stage"
)
# Add to summary data
farm_health_data <- rbind(farm_health_data, data.frame(
field = field_name,
mean_ci = round(mean_ci, 2),
ci_change = round(ci_change, 2),
ci_uniformity = round(ci_uniformity, 2),
status = status,
anomaly_type = anomaly_type,
priority_level = priority_score,
age_weeks = field_age_weeks,
harvest_readiness = harvest_readiness,
stringsAsFactors = FALSE
))
}, error = function(e) {
safe_log(paste("Error processing field", field_name, ":", e$message), "ERROR")
# Add a placeholder row with Error status
farm_health_data <<- rbind(farm_health_data, data.frame(
field = field_name,
mean_ci = NA,
ci_change = NA,
ci_uniformity = NA,
status = "Unknown",
anomaly_type = "Unknown",
priority_level = 5, # Low priority since we don't know the status
age_weeks = NA,
harvest_readiness = "Unknown",
stringsAsFactors = FALSE
))
})
}
# Make sure we have data for all fields
if (nrow(farm_health_data) == 0) {
safe_log("No farm health data was created", "ERROR")
stop("Failed to create farm health data")
}
# Sort by priority level
farm_health_data <- farm_health_data %>% dplyr::arrange(priority_level, field)
safe_log(paste("Successfully created farm health data for", nrow(farm_health_data), "fields"))
}, error = function(e) {
safe_log(paste("Error creating farm health data:", e$message), "ERROR")
# Create an empty dataframe that can be filled by the verification chunk
})
```
```{r verify_farm_health_data, message=FALSE, warning=FALSE, include=FALSE}
# Verify farm_health_data exists and has content
if (!exists("farm_health_data") || nrow(farm_health_data) == 0) {
safe_log("farm_health_data not found or empty, generating default data", "WARNING")
# Create minimal fallback data
tryCatch({
# Get fields from boundaries
fields <- unique(AllPivots0$field)
# Create basic data frame with just field names
farm_health_data <- data.frame(
field = fields,
mean_ci = rep(NA, length(fields)),
ci_change = rep(NA, length(fields)),
ci_uniformity = rep(NA, length(fields)),
status = rep("Unknown", length(fields)),
anomaly_type = rep("Unknown", length(fields)),
priority_level = rep(5, length(fields)), # Low priority
age_weeks = rep(NA, length(fields)),
harvest_readiness = rep("Unknown", length(fields)),
stringsAsFactors = FALSE
)
safe_log("Created fallback farm_health_data with basic field information")
}, error = function(e) {
safe_log(paste("Error creating fallback farm_health_data:", e$message), "ERROR")
farm_health_data <<- data.frame(
field = character(),
mean_ci = numeric(),
ci_change = numeric(),
ci_uniformity = numeric(),
status = character(),
anomaly_type = character(),
priority_level = numeric(),
age_weeks = numeric(),
harvest_readiness = character(),
stringsAsFactors = FALSE
)
})
}
```
```{r calculate_farm_health, message=FALSE, warning=FALSE, include=FALSE}
# Calculate farm health summary metrics
tryCatch({
# Generate farm health summary data
farm_health_data <- generate_farm_health_summary(
field_boundaries = AllPivots0,
ci_current = CI,
ci_previous = CI_m1,
harvesting_data = harvesting_data
)
# Log the summary data
safe_log(paste("Generated farm health summary with", nrow(farm_health_data), "fields"))
}, error = function(e) {
safe_log(paste("Error in farm health calculation:", e$message), "ERROR")
# Create empty dataframe if calculation failed
farm_health_data <- data.frame(
field = character(),
mean_ci = numeric(),
ci_change = numeric(),
ci_uniformity = numeric(),
status = character(),
anomaly_type = character(),
priority_level = numeric(),
age_weeks = numeric(),
harvest_readiness = character(),
stringsAsFactors = FALSE
)
})
```
```{r advanced_analytics_functions, message=FALSE, warning=FALSE, include=FALSE}
# ADVANCED ANALYTICS FUNCTIONS
# Note: These functions are now imported from executive_report_utils.R
# The utility file contains functions for velocity/acceleration indicators,
# anomaly timeline creation, age cohort mapping, and cohort performance charts
safe_log("Using analytics functions from executive_report_utils.R")
```
\pagebreak
# Advanced Analytics
## Field Health Velocity and Acceleration
This visualization shows the rate of change in field health (velocity) and whether that change is speeding up or slowing down (acceleration). These metrics help identify if farm conditions are improving, stable, or deteriorating.
**How to interpret:**
- **Velocity gauge:** Shows the average weekly change in CI values across all fields
- Positive values (green/right side): Farm health improving week-to-week
- Negative values (red/left side): Farm health declining week-to-week
- **Acceleration gauge:** Shows whether the rate of change is increasing or decreasing
- Positive values (green/right side): Change is accelerating or improving faster
- Negative values (red/left side): Change is decelerating or slowing down
- **4-Week Trend:** Shows the overall CI value trajectory for the past month
```{r render_velocity_acceleration, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
# Render the velocity and acceleration indicators
tryCatch({
# Create and display the indicators using the imported utility function
velocity_plot <- create_velocity_acceleration_indicator(
health_data = farm_health_data,
ci_current = CI,
ci_prev1 = CI_m1,
ci_prev2 = CI_m2,
ci_prev3 = CI_m3,
field_boundaries = AllPivots0
)
# Print the visualization
print(velocity_plot)
# Create a table of fields with significant velocity changes
field_ci_metrics <- list()
# Process each field to get metrics
fields <- unique(AllPivots0$field)
for (field_name in fields) {
tryCatch({
# Get field boundary
field_shape <- AllPivots0 %>% dplyr::filter(field == field_name)
if (nrow(field_shape) == 0) next
# Extract CI values
ci_curr_values <- terra::extract(CI, field_shape)
ci_prev1_values <- terra::extract(CI_m1, field_shape)
# Calculate metrics
mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
velocity <- mean_ci_curr - mean_ci_prev1
# Store in list
field_ci_metrics[[field_name]] <- list(
field = field_name,
ci_current = mean_ci_curr,
ci_prev1 = mean_ci_prev1,
velocity = velocity
)
}, error = function(e) {
safe_log(paste("Error processing field", field_name, "for velocity table:", e$message), "WARNING")
})
}
# Convert list to data frame
velocity_df <- do.call(rbind, lapply(field_ci_metrics, function(x) {
data.frame(
field = x$field,
ci_current = round(x$ci_current, 2),
ci_prev1 = round(x$ci_prev1, 2),
velocity = round(x$velocity, 2),
direction = ifelse(x$velocity >= 0, "Improving", "Declining")
)
}))
# Select top 5 positive and top 5 negative velocity fields
top_positive <- velocity_df %>%
dplyr::filter(velocity > 0) %>%
dplyr::arrange(desc(velocity)) %>%
dplyr::slice_head(n = 5)
top_negative <- velocity_df %>%
dplyr::filter(velocity < 0) %>%
dplyr::arrange(velocity) %>%
dplyr::slice_head(n = 5)
# Display the tables if we have data
if (nrow(top_positive) > 0) {
cat("<h4>Fields with Fastest Improvement</h4>")
knitr::kable(top_positive %>%
dplyr::select(Field = field,
`Current CI` = ci_current,
`Previous CI` = ci_prev1,
`Weekly Change` = velocity))
}
if (nrow(top_negative) > 0) {
cat("<h4>Fields with Fastest Decline</h4>")
knitr::kable(top_negative %>%
dplyr::select(Field = field,
`Current CI` = ci_current,
`Previous CI` = ci_prev1,
`Weekly Change` = velocity))
}
}, error = function(e) {
safe_log(paste("Error rendering velocity visualization:", e$message), "ERROR")
cat("<div class='alert alert-danger'>Error generating velocity visualization.</div>")
})
```
\pagebreak
## Field Anomaly Timeline
This visualization shows the history of detected anomalies in fields across the monitoring period. It helps identify persistent issues or improvements over time.
**How to interpret:**
- **X-axis**: Dates of satellite observations
- **Y-axis**: Fields grouped by similar characteristics
- **Colors**: Red indicates negative anomalies, green indicates positive anomalies
- **Size**: Larger markers indicate stronger anomalies
```{r anomaly_timeline, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
# Generate anomaly timeline visualization
tryCatch({
# Use the imported function to create the anomaly timeline
anomaly_timeline <- create_anomaly_timeline(
field_boundaries = AllPivots0,
ci_data = CI_quadrant,
days_to_include = 90 # Show last 90 days of data
)
# Display the timeline
print(anomaly_timeline)
}, error = function(e) {
safe_log(paste("Error generating anomaly timeline:", e$message), "ERROR")
cat("<div class='alert alert-danger'>Error generating anomaly timeline visualization.</div>")
})
```
\pagebreak
## Field Age Cohorts Map
This map shows fields grouped by their crop age (weeks since planting). Understanding the distribution of crop ages helps interpret performance metrics and plan harvest scheduling.
**How to interpret:**
- **Colors**: Different colors represent different age groups (in weeks since planting)
- **Labels**: Each field is labeled with its name for easy reference
- **Legend**: Shows the age ranges in weeks and their corresponding colors
```{r age_cohort_map, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
# Generate age cohort map
tryCatch({
# Use the imported function to create the age cohort map
age_cohort_map <- create_age_cohort_map(
field_boundaries = AllPivots0,
harvesting_data = harvesting_data
)
# Display the map
print(age_cohort_map)
}, error = function(e) {
safe_log(paste("Error generating age cohort map:", e$message), "ERROR")
cat("<div class='alert alert-danger'>Error generating age cohort map visualization.</div>")
})
```
\pagebreak
## Cohort Performance Comparison
This visualization compares chlorophyll index (CI) performance across different age groups of fields. This helps identify if certain age groups are performing better or worse than expected.
**How to interpret:**
- **X-axis**: Field age groups in weeks since planting
- **Y-axis**: Average CI value for fields in that age group
- **Box plots**: Show the distribution of CI values within each age group
- **Line**: Shows the expected CI trajectory based on historical data
```{r cohort_performance_chart, echo=FALSE, fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
# Generate cohort performance comparison chart
tryCatch({
# Use the imported function to create the cohort performance chart
cohort_chart <- create_cohort_performance_chart(
field_boundaries = AllPivots0,
ci_current = CI,
harvesting_data = harvesting_data
)
# Display the chart
print(cohort_chart)
}, error = function(e) {
safe_log(paste("Error generating cohort performance chart:", e$message), "ERROR")
cat("<div class='alert alert-danger'>Error generating cohort performance visualization.</div>")
})
```

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,437 @@
# EXECUTIVE REPORT UTILITIES
# This file contains functions for creating advanced visualizations for the executive summary report
#' Create a velocity and acceleration indicator for CI change
#'
#' @param health_data Current farm health data
#' @param ci_current Current CI raster
#' @param ci_prev1 CI raster from 1 week ago
#' @param ci_prev2 CI raster from 2 weeks ago
#' @param ci_prev3 CI raster from 3 weeks ago
#' @param field_boundaries Field boundaries spatial data (sf object)
#' @return A ggplot2 object with velocity and acceleration gauges
#'
create_velocity_acceleration_indicator <- function(health_data, ci_current, ci_prev1, ci_prev2, ci_prev3, field_boundaries) {
tryCatch({
# Calculate farm-wide metrics for multiple weeks
mean_ci_current <- mean(health_data$mean_ci, na.rm = TRUE)
# Calculate previous week metrics
# Extract CI values for previous weeks
field_ci_metrics <- data.frame(field = character(),
week_current = numeric(),
week_minus_1 = numeric(),
week_minus_2 = numeric(),
week_minus_3 = numeric(),
stringsAsFactors = FALSE)
# Process each field
fields <- unique(field_boundaries$field)
for (field_name in fields) {
tryCatch({
# Get field boundary
field_shape <- field_boundaries %>% dplyr::filter(field == field_name)
if (nrow(field_shape) == 0) next
# Extract CI values for all weeks
ci_curr_values <- terra::extract(ci_current, field_shape)
ci_prev1_values <- terra::extract(ci_prev1, field_shape)
ci_prev2_values <- terra::extract(ci_prev2, field_shape)
ci_prev3_values <- terra::extract(ci_prev3, field_shape)
# Calculate mean CI for each week
mean_ci_curr <- mean(ci_curr_values$CI, na.rm = TRUE)
mean_ci_prev1 <- mean(ci_prev1_values$CI, na.rm = TRUE)
mean_ci_prev2 <- mean(ci_prev2_values$CI, na.rm = TRUE)
mean_ci_prev3 <- mean(ci_prev3_values$CI, na.rm = TRUE)
# Add to metrics table
field_ci_metrics <- rbind(field_ci_metrics, data.frame(
field = field_name,
week_current = mean_ci_curr,
week_minus_1 = mean_ci_prev1,
week_minus_2 = mean_ci_prev2,
week_minus_3 = mean_ci_prev3,
stringsAsFactors = FALSE
))
}, error = function(e) {
message(paste("Error processing field", field_name, "for velocity indicator:", e$message))
})
}
# Calculate farm-wide averages
farm_avg <- colMeans(field_ci_metrics[, c("week_current", "week_minus_1", "week_minus_2", "week_minus_3")], na.rm = TRUE)
# Calculate velocity (rate of change) - current week compared to last week
velocity <- farm_avg["week_current"] - farm_avg["week_minus_1"]
# Calculate previous velocity (last week compared to two weeks ago)
prev_velocity <- farm_avg["week_minus_1"] - farm_avg["week_minus_2"]
# Calculate acceleration (change in velocity)
acceleration <- velocity - prev_velocity
# Prepare data for velocity gauge
velocity_data <- data.frame(
label = "Weekly CI Change",
value = velocity
)
# Prepare data for acceleration gauge
acceleration_data <- data.frame(
label = "Change Acceleration",
value = acceleration
)
# Create velocity trend data
trend_data <- data.frame(
week = c(-3, -2, -1, 0),
ci_value = c(farm_avg["week_minus_3"], farm_avg["week_minus_2"],
farm_avg["week_minus_1"], farm_avg["week_current"])
)
# Create layout grid for the visualizations
layout_matrix <- matrix(c(1, 1, 2, 2, 3, 3), nrow = 2, byrow = TRUE)
# Create velocity gauge
velocity_gauge <- ggplot2::ggplot(velocity_data, ggplot2::aes(x = 0, y = 0)) +
ggplot2::geom_arc_bar(ggplot2::aes(
x0 = 0, y0 = 0,
r0 = 0.5, r = 1,
start = -pi/2, end = pi/2,
fill = "background"
), fill = "#f0f0f0") +
ggplot2::geom_arc_bar(ggplot2::aes(
x0 = 0, y0 = 0,
r0 = 0.5, r = 1,
start = -pi/2,
end = -pi/2 + (pi * (0.5 + (velocity / 2))), # Scale to range -1 to +1
fill = "velocity"
), fill = ifelse(velocity >= 0, "#1a9850", "#d73027")) +
ggplot2::geom_text(ggplot2::aes(label = sprintf("%.2f", velocity)),
size = 8, fontface = "bold") +
ggplot2::geom_text(ggplot2::aes(label = "Velocity"), y = -0.3, size = 4) +
ggplot2::coord_fixed() +
ggplot2::theme_void() +
ggplot2::scale_fill_manual(values = c("background" = "#f0f0f0", "velocity" = "steelblue"),
guide = "none") +
ggplot2::annotate("text", x = -0.85, y = 0, label = "Declining",
angle = 90, size = 3.5) +
ggplot2::annotate("text", x = 0.85, y = 0, label = "Improving",
angle = -90, size = 3.5) +
ggplot2::labs(title = "Farm Health Velocity",
subtitle = "CI change per week") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 12))
# Create acceleration gauge
acceleration_gauge <- ggplot2::ggplot(acceleration_data, ggplot2::aes(x = 0, y = 0)) +
ggplot2::geom_arc_bar(ggplot2::aes(
x0 = 0, y0 = 0,
r0 = 0.5, r = 1,
start = -pi/2, end = pi/2,
fill = "background"
), fill = "#f0f0f0") +
ggplot2::geom_arc_bar(ggplot2::aes(
x0 = 0, y0 = 0,
r0 = 0.5, r = 1,
start = -pi/2,
end = -pi/2 + (pi * (0.5 + (acceleration / 1))), # Scale to range -0.5 to +0.5
fill = "acceleration"
), fill = ifelse(acceleration >= 0, "#1a9850", "#d73027")) +
ggplot2::geom_text(ggplot2::aes(label = sprintf("%.2f", acceleration)),
size = 8, fontface = "bold") +
ggplot2::geom_text(ggplot2::aes(label = "Acceleration"), y = -0.3, size = 4) +
ggplot2::coord_fixed() +
ggplot2::theme_void() +
ggplot2::scale_fill_manual(values = c("background" = "#f0f0f0", "acceleration" = "steelblue"),
guide = "none") +
ggplot2::annotate("text", x = -0.85, y = 0, label = "Slowing",
angle = 90, size = 3.5) +
ggplot2::annotate("text", x = 0.85, y = 0, label = "Accelerating",
angle = -90, size = 3.5) +
ggplot2::labs(title = "Change Acceleration",
subtitle = "Increasing or decreasing trend") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 12))
# Create trend chart
trend_chart <- ggplot2::ggplot(trend_data, ggplot2::aes(x = week, y = ci_value)) +
ggplot2::geom_line(size = 1.5, color = "steelblue") +
ggplot2::geom_point(size = 3, color = "steelblue") +
ggplot2::geom_hline(yintercept = trend_data$ci_value[1], linetype = "dashed", color = "gray50") +
ggplot2::labs(
title = "4-Week CI Trend",
x = "Weeks from current",
y = "Average CI Value"
) +
ggplot2::theme_minimal() +
ggplot2::scale_x_continuous(breaks = c(-3, -2, -1, 0))
# Create table of top velocity changes
field_ci_metrics$velocity <- field_ci_metrics$week_current - field_ci_metrics$week_minus_1
top_velocity_fields <- field_ci_metrics %>%
dplyr::arrange(desc(abs(velocity))) %>%
dplyr::slice_head(n = 5) %>%
dplyr::select(field, velocity) %>%
dplyr::mutate(direction = ifelse(velocity >= 0, "Improving", "Declining"))
# Combine into multi-panel figure
main_plot <- gridExtra::grid.arrange(
gridExtra::grid.arrange(velocity_gauge, acceleration_gauge, ncol = 2),
trend_chart,
heights = c(1.5, 1),
nrow = 2
)
return(main_plot)
}, error = function(e) {
message(paste("Error in create_velocity_acceleration_indicator:", e$message))
return(ggplot2::ggplot() +
ggplot2::annotate("text", x = 0, y = 0, label = paste("Error creating velocity indicator:", e$message)) +
ggplot2::theme_void())
})
}
#' Generate a field health score based on CI values and trends
#'
#' @param ci_current Current CI raster
#' @param ci_change CI change raster
#' @param field_age_weeks Field age in weeks
#' @return List containing score, status, and component scores
#'
generate_field_health_score <- function(ci_current, ci_change, field_age_weeks) {
# Get mean CI value for the field
mean_ci <- terra::global(ci_current, "mean", na.rm=TRUE)[[1]]
# Get mean CI change
mean_change <- terra::global(ci_change, "mean", na.rm=TRUE)[[1]]
# Get CI uniformity (coefficient of variation)
ci_sd <- terra::global(ci_current, "sd", na.rm=TRUE)[[1]]
ci_uniformity <- ifelse(mean_ci > 0, ci_sd / mean_ci, 1)
# Calculate base score from current CI (scale 0-5)
# Adjusted for crop age - expectations increase with age
expected_ci <- min(5, field_age_weeks / 10) # Simple linear model
ci_score <- max(0, min(5, 5 - 2 * abs(mean_ci - expected_ci)))
# Add points for positive change (scale 0-3)
change_score <- max(0, min(3, 1 + mean_change))
# Add points for uniformity (scale 0-2)
uniformity_score <- max(0, min(2, 2 * (1 - ci_uniformity)))
# Calculate total score (0-10)
total_score <- ci_score + change_score + uniformity_score
# Create status label
status <- dplyr::case_when(
total_score >= 8 ~ "Excellent",
total_score >= 6 ~ "Good",
total_score >= 4 ~ "Fair",
total_score >= 2 ~ "Needs Attention",
TRUE ~ "Critical"
)
# Return results
return(list(
score = round(total_score, 1),
status = status,
components = list(
ci = round(ci_score, 1),
change = round(change_score, 1),
uniformity = round(uniformity_score, 1)
)
))
}
#' Create an irrigation recommendation map
#'
#' @param ci_current Current CI raster
#' @param ci_change CI change raster
#' @param field_shape Field boundary shape
#' @param title Map title
#' @return A tmap object with irrigation recommendations
#'
create_irrigation_map <- function(ci_current, ci_change, field_shape, title = "Irrigation Priority Zones") {
# Create a new raster for irrigation recommendations
irrigation_priority <- ci_current * 0
# Extract values for processing
ci_values <- terra::values(ci_current)
change_values <- terra::values(ci_change)
# Create priority zones:
# 3 = High priority (low CI, negative trend)
# 2 = Medium priority (low CI but stable, or good CI with negative trend)
# 1 = Low priority (watch, good CI with slight decline)
# 0 = No action needed (good CI, stable/positive trend)
priority_values <- rep(NA, length(ci_values))
# High priority: Low CI (< 2) and negative change (< 0)
high_priority <- which(ci_values < 2 & change_values < 0 & !is.na(ci_values) & !is.na(change_values))
priority_values[high_priority] <- 3
# Medium priority: Low CI (< 2) with stable/positive change, or moderate CI (2-4) with significant negative change (< -1)
medium_priority <- which(
(ci_values < 2 & change_values >= 0 & !is.na(ci_values) & !is.na(change_values)) |
(ci_values >= 2 & ci_values < 4 & change_values < -1 & !is.na(ci_values) & !is.na(change_values))
)
priority_values[medium_priority] <- 2
# Low priority (watch): Moderate/good CI (>= 2) with mild negative change (-1 to 0)
low_priority <- which(
ci_values >= 2 & change_values < 0 & change_values >= -1 & !is.na(ci_values) & !is.na(change_values)
)
priority_values[low_priority] <- 1
# No action needed: Good CI (>= 2) with stable/positive change (>= 0)
no_action <- which(ci_values >= 2 & change_values >= 0 & !is.na(ci_values) & !is.na(change_values))
priority_values[no_action] <- 0
# Set values in the irrigation priority raster
terra::values(irrigation_priority) <- priority_values
# Create the map
tm_shape(irrigation_priority) +
tm_raster(
style = "cat",
palette = c("#1a9850", "#91cf60", "#fc8d59", "#d73027"),
labels = c("No Action", "Watch", "Medium Priority", "High Priority"),
title = "Irrigation Need"
) +
tm_shape(field_shape) +
tm_borders(lwd = 2) +
tm_layout(
main.title = title,
legend.outside = FALSE,
legend.position = c("left", "bottom")
)
}
#' Simple mock function to get weather data for a field
#' In a real implementation, this would fetch data from a weather API
#'
#' @param start_date Start date for weather data
#' @param end_date End date for weather data
#' @param lat Latitude of the field center
#' @param lon Longitude of the field center
#' @return A data frame of weather data
#'
get_weather_data <- function(start_date, end_date, lat = -16.1, lon = 34.7) {
# This is a mock implementation - in production, you'd replace with actual API call
# to a service like OpenWeatherMap, NOAA, or other weather data provider
# Create date sequence
dates <- seq.Date(from = as.Date(start_date), to = as.Date(end_date), by = "day")
n_days <- length(dates)
# Generate some random but realistic weather data with seasonal patterns
# More rain in summer, less in winter (Southern hemisphere)
month_nums <- as.numeric(format(dates, "%m"))
# Simplified seasonal patterns - adjust for your local climate
is_rainy_season <- month_nums %in% c(11, 12, 1, 2, 3, 4)
# Generate rainfall - more in rainy season, occasional heavy rainfall
rainfall <- numeric(n_days)
rainfall[is_rainy_season] <- pmax(0, rnorm(sum(is_rainy_season), mean = 4, sd = 8))
rainfall[!is_rainy_season] <- pmax(0, rnorm(sum(!is_rainy_season), mean = 0.5, sd = 2))
# Add some rare heavy rainfall events
heavy_rain_days <- sample(which(is_rainy_season), size = max(1, round(sum(is_rainy_season) * 0.1)))
rainfall[heavy_rain_days] <- rainfall[heavy_rain_days] + runif(length(heavy_rain_days), 20, 50)
# Generate temperatures - seasonal variation
temp_mean <- 18 + 8 * sin((month_nums - 1) * pi/6) # Peak in January (month 1)
temp_max <- temp_mean + rnorm(n_days, mean = 5, sd = 1)
temp_min <- temp_mean - rnorm(n_days, mean = 5, sd = 1)
# Create weather data frame
weather_data <- data.frame(
date = dates,
rainfall_mm = round(rainfall, 1),
temp_max_c = round(temp_max, 1),
temp_min_c = round(temp_min, 1),
temp_mean_c = round((temp_max + temp_min) / 2, 1)
)
return(weather_data)
}
#' Creates a weather summary visualization integrated with CI data
#'
#' @param pivotName Name of the pivot field
#' @param ci_data CI quadrant data
#' @param days_to_show Number of days of weather to show
#' @return ggplot object
#'
create_weather_ci_plot <- function(pivotName, ci_data = CI_quadrant, days_to_show = 30) {
# Get field data
field_data <- ci_data %>%
dplyr::filter(field == pivotName) %>%
dplyr::arrange(Date) %>%
dplyr::filter(!is.na(value))
if (nrow(field_data) == 0) {
return(ggplot() +
annotate("text", x = 0, y = 0, label = "No data available") +
theme_void())
}
# Get the latest date and 30 days before
latest_date <- max(field_data$Date, na.rm = TRUE)
start_date <- latest_date - days_to_show
# Filter for recent data only
recent_field_data <- field_data %>%
dplyr::filter(Date >= start_date)
# Get center point coordinates for the field (would be calculated from geometry in production)
# This is mocked for simplicity
lat <- -16.1 # Mock latitude
lon <- 34.7 # Mock longitude
# Get weather data
weather_data <- get_weather_data(start_date, latest_date, lat, lon)
# Aggregate CI data to daily mean across subfields if needed
daily_ci <- recent_field_data %>%
dplyr::group_by(Date) %>%
dplyr::summarize(mean_ci = mean(value, na.rm = TRUE))
# Create combined plot with dual y-axis
g <- ggplot() +
# Rainfall as bars
geom_col(data = weather_data, aes(x = date, y = rainfall_mm),
fill = "#1565C0", alpha = 0.7, width = 0.7) +
# CI as a line
geom_line(data = daily_ci, aes(x = Date, y = mean_ci * 10),
color = "#2E7D32", size = 1) +
geom_point(data = daily_ci, aes(x = Date, y = mean_ci * 10),
color = "#2E7D32", size = 2) +
# Temperature range as ribbon
geom_ribbon(data = weather_data,
aes(x = date, ymin = temp_min_c, ymax = temp_max_c),
fill = "#FF9800", alpha = 0.2) +
# Primary y-axis (rainfall)
scale_y_continuous(
name = "Rainfall (mm)",
sec.axis = sec_axis(~./10, name = "Chlorophyll Index & Temperature (°C)")
) +
labs(
title = paste("Field", pivotName, "- Weather and CI Relationship"),
subtitle = paste("Last", days_to_show, "days"),
x = "Date"
) +
theme_minimal() +
theme(
axis.title.y.left = element_text(color = "#1565C0"),
axis.title.y.right = element_text(color = "#2E7D32"),
legend.position = "bottom"
)
return(g)
}

View file

@ -0,0 +1,421 @@
# Optimal CI Analysis - Day 30 CI values with quadratic fitting
# Author: SmartCane Analysis Team
# Date: 2025-06-12
# Load required libraries
library(ggplot2)
library(dplyr)
library(readr)
# Set file path
rds_file_path <- "C:/Users/timon/Resilience BV/4020 SCane ESA DEMO - Documenten/General/4020 SCDEMO Team/4020 TechnicalData/WP3/smartcane/laravel_app/storage/app/chemba/Data/extracted_ci/cumulative_vals/All_pivots_Cumulative_CI_quadrant_year_v2.rds"
# Check if file exists
if (!file.exists(rds_file_path)) {
stop("RDS file not found at specified path: ", rds_file_path)
}
# Load the data
cat("Loading RDS file...\n")
ci_data <- readRDS(rds_file_path)
# Display structure of the data to understand it better
cat("Data structure:\n")
str(ci_data)
cat("\nFirst few rows:\n")
head(ci_data)
cat("\nColumn names:\n")
print(colnames(ci_data))
# Filter data based on requirements
cat("\nApplying data filters...\n")
# 1. Filter out models that don't reach at least DOY 300
model_doy_max <- ci_data %>%
group_by(model) %>%
summarise(max_doy = max(DOY, na.rm = TRUE), .groups = 'drop')
valid_models <- model_doy_max %>%
filter(max_doy >= 300) %>%
pull(model)
cat(paste("Models before filtering:", n_distinct(ci_data$model), "\n"))
cat(paste("Models reaching at least DOY 300:", length(valid_models), "\n"))
ci_data <- ci_data %>%
filter(model %in% valid_models)
# 2. Apply IQR filtering per DOY (remove outliers outside IQR)
cat("Applying IQR filtering per DOY...\n")
original_rows <- nrow(ci_data)
ci_data <- ci_data %>%
group_by(DOY) %>%
mutate(
Q1 = quantile(FitData, 0.25, na.rm = TRUE),
Q3 = quantile(FitData, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
lower_bound = Q1 - 1.5 * IQR,
upper_bound = Q3 + 1.5 * IQR
) %>%
filter(FitData >= lower_bound & FitData <= upper_bound) %>%
select(-Q1, -Q3, -IQR, -lower_bound, -upper_bound) %>%
ungroup()
filtered_rows <- nrow(ci_data)
cat(paste("Rows before IQR filtering:", original_rows, "\n"))
cat(paste("Rows after IQR filtering:", filtered_rows, "\n"))
cat(paste("Removed", original_rows - filtered_rows, "outliers (",
round(100 * (original_rows - filtered_rows) / original_rows, 1), "%)\n"))
# Check what day values are available after filtering
if ("DOY" %in% colnames(ci_data)) {
cat("\nUnique DOY values after filtering:\n")
print(sort(unique(ci_data$DOY)))
} else {
cat("\nNo 'DOY' column found. Available columns:\n")
print(colnames(ci_data))
}
# Extract CI values at day 30 for each field
cat("\nExtracting day 30 CI values...\n")
# Set column names based on known structure
day_col <- "DOY"
ci_col <- "FitData"
field_col <- "model"
# Try different possible field column name combinations
if ("field" %in% colnames(ci_data)) {
field_col <- "field"
} else if ("Field" %in% colnames(ci_data)) {
field_col <- "Field"
} else if ("pivot" %in% colnames(ci_data)) {
field_col <- "pivot"
} else if ("Pivot" %in% colnames(ci_data)) {
field_col <- "Pivot"
} else if ("field_id" %in% colnames(ci_data)) {
field_col <- "field_id"
} else if ("Field_ID" %in% colnames(ci_data)) {
field_col <- "Field_ID"
}
# Check if we found the required columns
if (!("DOY" %in% colnames(ci_data))) {
stop("DOY column not found in data")
}
if (!("FitData" %in% colnames(ci_data))) {
stop("FitData column not found in data")
}
if (is.null(field_col)) {
cat("Could not automatically identify field column. Please check column names.\n")
cat("Available columns: ", paste(colnames(ci_data), collapse = ", "), "\n")
stop("Manual field column identification required")
}
cat(paste("Using columns - DOY:", day_col, "Field:", field_col, "FitData:", ci_col, "\n"))
# Extract day 30 data
day_30_data <- ci_data %>%
filter(DOY %% 30 == 0) %>%
select(field = !!sym(field_col), ci = !!sym(ci_col), DOY) %>%
na.omit() %>%
arrange(field)
cat(paste("Found", nrow(day_30_data), "fields with day 30 CI values\n"))
if (nrow(day_30_data) == 0) {
stop("No data found for day 30. Check if day 30 exists in the dataset.")
}
# Display summary of day 30 data
cat("\nSummary of day 30 CI values:\n")
print(summary(day_30_data))
# Add field index for plotting (assuming fields represent some spatial or sequential order)
#day_30_data$field_index <- 1:nrow(day_30_data)
# Create scatter plot
p1 <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
geom_point(color = "blue", size = 3, alpha = 0.7) +
labs(
title = "CI Values at Day 30 for All Fields",
x = "Day of Year (DOY)",
y = "CI Value",
subtitle = paste("Total fields:", nrow(day_30_data))
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
print(p1)
# Try multiple curve fitting approaches
cat("\nTrying different curve fitting approaches...\n")
# Aggregate data by DOY (take mean CI for each DOY to reduce noise)
aggregated_data <- day_30_data %>%
group_by(DOY) %>%
summarise(
mean_ci = mean(ci, na.rm = TRUE),
median_ci = median(ci, na.rm = TRUE),
count = n(),
.groups = 'drop'
) %>%
filter(count >= 5) # Only keep DOY values with sufficient data points
cat(paste("Aggregated to", nrow(aggregated_data), "DOY points with sufficient data\n"))
# Create prediction data for smooth curves
x_smooth <- seq(min(aggregated_data$DOY), max(aggregated_data$DOY), length.out = 100)
# 1. Quadratic model (as requested)
cat("\n1. Fitting quadratic model...\n")
quad_model <- lm(mean_ci ~ poly(DOY, 2, raw = TRUE), data = aggregated_data)
quad_pred <- predict(quad_model, newdata = data.frame(DOY = x_smooth))
quad_r2 <- summary(quad_model)$r.squared
cat(paste("Quadratic R² =", round(quad_r2, 3), "\n"))
# 2. Logistic growth model: y = K / (1 + exp(-r*(x-x0))) - biologically realistic
cat("\n2. Fitting logistic growth model...\n")
tryCatch({
# Estimate starting values
K_start <- max(aggregated_data$mean_ci) * 1.1 # Carrying capacity
r_start <- 0.05 # Growth rate
x0_start <- mean(aggregated_data$DOY) # Inflection point
logistic_model <- nls(mean_ci ~ K / (1 + exp(-r * (DOY - x0))),
data = aggregated_data,
start = list(K = K_start, r = r_start, x0 = x0_start),
control = nls.control(maxiter = 1000))
logistic_pred <- predict(logistic_model, newdata = data.frame(DOY = x_smooth))
logistic_r2 <- 1 - sum(residuals(logistic_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
cat(paste("Logistic growth R² =", round(logistic_r2, 3), "\n"))
}, error = function(e) {
cat("Logistic growth model failed to converge\n")
logistic_model <- NULL
logistic_pred <- NULL
logistic_r2 <- NA
})
# 3. Beta function model: good for crop growth curves with clear peak
cat("\n3. Fitting Beta function model...\n")
tryCatch({
# Normalize DOY to 0-1 range for Beta function
doy_min <- min(aggregated_data$DOY)
doy_max <- max(aggregated_data$DOY)
aggregated_data$doy_norm <- (aggregated_data$DOY - doy_min) / (doy_max - doy_min)
# Beta function: y = a * (x^(p-1)) * ((1-x)^(q-1)) + c
beta_model <- nls(mean_ci ~ a * (doy_norm^(p-1)) * ((1-doy_norm)^(q-1)) + c,
data = aggregated_data,
start = list(a = max(aggregated_data$mean_ci) * 20, p = 2, q = 3, c = min(aggregated_data$mean_ci)),
control = nls.control(maxiter = 1000))
# Predict on normalized scale then convert back
x_smooth_norm <- (x_smooth - doy_min) / (doy_max - doy_min)
beta_pred <- predict(beta_model, newdata = data.frame(doy_norm = x_smooth_norm))
beta_r2 <- 1 - sum(residuals(beta_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
cat(paste("Beta function R² =", round(beta_r2, 3), "\n"))
}, error = function(e) {
cat("Beta function model failed to converge\n")
beta_model <- NULL
beta_pred <- NULL
beta_r2 <- NA
})
# 4. Gaussian (normal) curve: good for symmetric growth patterns
cat("\n4. Fitting Gaussian curve...\n")
tryCatch({
# Gaussian: y = a * exp(-((x-mu)^2)/(2*sigma^2)) + c
gaussian_model <- nls(mean_ci ~ a * exp(-((DOY - mu)^2)/(2 * sigma^2)) + c,
data = aggregated_data,
start = list(a = max(aggregated_data$mean_ci),
mu = aggregated_data$DOY[which.max(aggregated_data$mean_ci)],
sigma = 50,
c = min(aggregated_data$mean_ci)),
control = nls.control(maxiter = 1000))
gaussian_pred <- predict(gaussian_model, newdata = data.frame(DOY = x_smooth))
gaussian_r2 <- 1 - sum(residuals(gaussian_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
cat(paste("Gaussian R² =", round(gaussian_r2, 3), "\n"))
}, error = function(e) {
cat("Gaussian model failed to converge\n")
gaussian_model <- NULL
gaussian_pred <- NULL
gaussian_r2 <- NA
})
# 5. LOESS (local regression) - for comparison
cat("\n5. Fitting LOESS smoothing...\n")
loess_model <- loess(mean_ci ~ DOY, data = aggregated_data, span = 0.5)
loess_pred <- predict(loess_model, newdata = data.frame(DOY = x_smooth))
loess_r2 <- 1 - sum(residuals(loess_model)^2) / sum((aggregated_data$mean_ci - mean(aggregated_data$mean_ci))^2)
cat(paste("LOESS R² =", round(loess_r2, 3), "\n"))
# Calculate confidence intervals for both models
cat("\nCalculating confidence intervals...\n")
# Function to calculate confidence intervals using residual-based method
calculate_ci <- function(model, data, newdata, alpha = 0.5) {
# Get model predictions
pred_vals <- predict(model, newdata = newdata)
# For parametric models (lm), use built-in prediction intervals
if(class(model)[1] == "lm") {
pred_intervals <- predict(model, newdata = newdata, interval = "confidence", level = 1 - alpha)
return(list(
lower = pred_intervals[, "lwr"],
upper = pred_intervals[, "upr"]
))
}
# For LOESS, calculate confidence intervals using residual bootstrap
if(class(model)[1] == "loess") {
# Calculate residuals from the original model
fitted_vals <- fitted(model)
residuals_vals <- residuals(model)
residual_sd <- sd(residuals_vals, na.rm = TRUE)
# Use normal approximation for confidence intervals
# For 50% CI, use 67% quantile (approximately 0.67 standard deviations)
margin <- qnorm(1 - alpha/2) * residual_sd
return(list(
lower = pred_vals - margin,
upper = pred_vals + margin
))
}
# Fallback method
residual_sd <- sd(residuals(model), na.rm = TRUE)
margin <- qnorm(1 - alpha/2) * residual_sd
return(list(
lower = pred_vals - margin,
upper = pred_vals + margin
))
}# Calculate CIs for quadratic model using aggregated data (same as model fitting)
quad_ci <- calculate_ci(quad_model, aggregated_data, data.frame(DOY = x_smooth))
# Calculate CIs for LOESS model using aggregated data (same as model fitting)
loess_ci <- calculate_ci(loess_model, aggregated_data, data.frame(DOY = x_smooth))
# Create separate plots for LOESS and Quadratic models
cat("\nCreating LOESS plot with confidence intervals...\n")
# LOESS plot
p_loess <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
geom_point(color = "lightblue", size = 1.5, alpha = 0.4) +
geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci),
color = "darkblue", size = 3, alpha = 0.8) +
geom_ribbon(data = data.frame(DOY = x_smooth,
lower = loess_ci$lower,
upper = loess_ci$upper),
aes(x = DOY, ymin = lower, ymax = upper),
alpha = 0.3, fill = "purple", inherit.aes = FALSE) +
geom_line(data = data.frame(DOY = x_smooth, loess = loess_pred),
aes(x = DOY, y = loess),
color = "purple", size = 1.5) +
labs(
title = "LOESS Model - CI Values Over Growing Season",
x = "Day of Year (DOY)",
y = "CI Value",
subtitle = paste("LOESS R² =", round(loess_r2, 3), "| 50% Confidence Interval")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
# Find optimal point for LOESS
loess_max_idx <- which.max(loess_pred)
loess_optimal_doy <- x_smooth[loess_max_idx]
loess_optimal_ci <- loess_pred[loess_max_idx]
p_loess <- p_loess +
geom_point(aes(x = loess_optimal_doy, y = loess_optimal_ci),
color = "red", size = 5, shape = 8) +
annotate("text",
x = loess_optimal_doy + 30,
y = loess_optimal_ci,
label = paste("Optimal: DOY", round(loess_optimal_doy, 1), "\nCI =", round(loess_optimal_ci, 3)),
color = "red", size = 4, fontface = "bold")
print(p_loess)
cat("\nCreating Quadratic plot with confidence intervals...\n")
# Quadratic plot
p_quadratic <- ggplot(day_30_data, aes(x = DOY, y = ci)) +
geom_point(color = "lightcoral", size = 1.5, alpha = 0.4) +
geom_point(data = aggregated_data, aes(x = DOY, y = mean_ci),
color = "darkred", size = 3, alpha = 0.8) +
geom_ribbon(data = data.frame(DOY = x_smooth,
lower = quad_ci$lower,
upper = quad_ci$upper),
aes(x = DOY, ymin = lower, ymax = upper),
alpha = 0.3, fill = "red", inherit.aes = FALSE) +
geom_line(data = data.frame(DOY = x_smooth, quadratic = quad_pred),
aes(x = DOY, y = quadratic),
color = "red", size = 1.5) +
labs(
title = "Quadratic Model - CI Values Over Growing Season",
x = "Day of Year (DOY)",
y = "CI Value",
subtitle = paste("Quadratic R² =", round(quad_r2, 3), "| 50% Confidence Interval")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
# Find optimal point for Quadratic
quad_coeffs <- coef(quad_model)
a <- quad_coeffs[3]
b <- quad_coeffs[2]
if(a != 0) {
quad_optimal_doy <- -b / (2*a)
doy_range <- range(aggregated_data$DOY)
if(quad_optimal_doy >= doy_range[1] && quad_optimal_doy <= doy_range[2]) {
quad_optimal_ci <- predict(quad_model, newdata = data.frame(DOY = quad_optimal_doy))
} else {
quad_optimal_doy <- x_smooth[which.max(quad_pred)]
quad_optimal_ci <- max(quad_pred)
}
} else {
quad_optimal_doy <- x_smooth[which.max(quad_pred)]
quad_optimal_ci <- max(quad_pred)
}
p_quadratic <- p_quadratic +
geom_point(aes(x = quad_optimal_doy, y = quad_optimal_ci),
color = "darkred", size = 5, shape = 8) +
annotate("text",
x = quad_optimal_doy + 30,
y = quad_optimal_ci,
label = paste("Optimal: DOY", round(quad_optimal_doy, 1), "\nCI =", round(quad_optimal_ci, 3)),
color = "darkred", size = 4, fontface = "bold")
print(p_quadratic)
print(p_loess)
# Print results summary
cat("\n=== RESULTS SUMMARY ===\n")
cat(paste("LOESS Model - R² =", round(loess_r2, 3), "\n"))
cat(paste(" Optimal DOY:", round(loess_optimal_doy, 1), "\n"))
cat(paste(" Optimal CI:", round(loess_optimal_ci, 4), "\n\n"))
cat(paste("Quadratic Model - R² =", round(quad_r2, 3), "\n"))
cat(paste(" Optimal DOY:", round(quad_optimal_doy, 1), "\n"))
cat(paste(" Optimal CI:", round(quad_optimal_ci, 4), "\n"))

View file

@ -1,15 +1,40 @@
install.packages('CAST') # Install required packages for SmartCane project
install.packages("packages/CIprep_0.1.4.tar.gz",repos=NULL, type="source") # This script installs all packages needed to run the CI report dashboard
install.packages('caret')
install.packages('exactextractr') # List of required packages
install.packages('googledrive') required_packages <- c(
install.packages('here') # Core packages
install.packages('lubridate') "here", "tidyverse", "sf", "terra", "tmap", "lubridate",
install.packages('raster')
install.packages('readxl') # Additional data manipulation
install.packages('rsample') "zoo", "readxl", "knitr", "rmarkdown", "dplyr", "purrr", "stringr",
install.packages('sf')
install.packages('terra') # Spatial analysis
install.packages('tidyverse') "exactextractr",
install.packages('tmap')
install.packages('zoo') # Machine learning and statistics
"rsample", "caret", "randomForest", "CAST"
)
# Function to install missing packages
install_if_missing <- function(pkg) {
if (!requireNamespace(pkg, quietly = TRUE)) {
message(paste("Installing package:", pkg))
install.packages(pkg, repos = "https://cloud.r-project.org")
} else {
message(paste("Package already installed:", pkg))
}
}
# Install missing packages
for (pkg in required_packages) {
install_if_missing(pkg)
}
# Load core packages to verify installation
library(here)
library(tidyverse)
library(sf)
library(terra)
message("All required packages have been installed!")

View file

@ -32,15 +32,20 @@ main <- function() {
message("No project_dir provided. Using default:", project_dir) message("No project_dir provided. Using default:", project_dir)
} }
# Make project_dir available globally so parameters_project.R can use it
assign("project_dir", project_dir, envir = .GlobalEnv)
# Initialize project configuration and load utility functions # Initialize project configuration and load utility functions
tryCatch({ tryCatch({
source("parameters_project.R") source("parameters_project.R")
source("ci_extraction_utils.R") source("growth_model_utils.R")
}, error = function(e) { }, error = function(e) {
warning("Default source files not found. Attempting to source from 'r_app' directory.") warning("Default source files not found. Attempting to source from 'r_app' directory.")
tryCatch({ tryCatch({
source("r_app/parameters_project.R") source(here::here("r_app", "parameters_project.R"))
source("r_app/ci_extraction_utils.R") source(here::here("r_app", "growth_model_utils.R"))
warning(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) { }, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.") stop("Failed to source required files from both default and 'r_app' directories.")
}) })
@ -92,5 +97,6 @@ main <- function() {
}) })
} }
# Run the main function if the script is executed directly if (sys.nframe() == 0) {
main() main()
}

View file

@ -28,19 +28,31 @@ main <- function() {
# Capture command line arguments # Capture command line arguments
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
# Process project_dir argument with default
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else {
# Default project directory
project_dir <- "chemba"
message("No project_dir provided. Using default:", project_dir)
}
# Make project_dir available globally so parameters_project.R can use it
assign("project_dir", project_dir, envir = .GlobalEnv)
# Process end_date argument with default # Process end_date argument with default
if (length(args) >= 1 && !is.na(args[1])) { if (length(args) >= 1 && !is.na(args[1])) {
end_date <- as.Date(args[1]) end_date <- as.Date(args[1])
if (is.na(end_date)) { if (is.na(end_date)) {
message("Invalid end_date provided. Using current date.") message("Invalid end_date provided. Using current date.")
#end_date <- Sys.Date() end_date <- Sys.Date()
end_date <- "2023-10-01" # Default date for testing #end_date <- "2024-08-25" # Default date for testing
} }
} else { } else {
# Default to current date if no argument is provided # Default to current date if no argument is provided
#end_date <- Sys.Date() end_date <- Sys.Date()
end_date <- "2023-10-01" # Default date for testing #end_date <- "2024-08-25" # Default date for testing
message("No end_date provided. Using current date:", format(end_date)) message("No end_date provided. Using current date: ", format(end_date))
} }
# Process offset argument with default # Process offset argument with default
@ -56,25 +68,20 @@ main <- function() {
message("No offset provided. Using default:", offset, "days") message("No offset provided. Using default:", offset, "days")
} }
# Process project_dir argument with default
if (length(args) >= 3 && !is.na(args[3])) {
project_dir <- as.character(args[3])
} else {
# Default project directory
project_dir <- "chemba"
message("No project_dir provided. Using default:", project_dir)
}
# 3. Initialize project configuration # 3. Initialize project configuration
# -------------------------------- # --------------------------------
tryCatch({ tryCatch({
source("parameters_project.R") source("parameters_project.R")
source("ci_extraction_utils.R") source("mosaic_creation_utils.R")
safe_log(paste("Successfully sourced files from default directory."))
}, error = function(e) { }, error = function(e) {
warning("Default source files not found. Attempting to source from 'r_app' directory.") warning("Default source files not found. Attempting to source from 'r_app' directory.")
tryCatch({ tryCatch({
source("r_app/parameters_project.R") source(here::here("r_app", "parameters_project.R"))
source("r_app/ci_extraction_utils.R") source(here::here("r_app", "mosaic_creation_utils.R"))
warning(paste("Successfully sourced files from 'r_app' directory."))
}, error = function(e) { }, error = function(e) {
stop("Failed to source required files from both default and 'r_app' directories.") stop("Failed to source required files from both default and 'r_app' directories.")
}) })
@ -82,8 +89,8 @@ main <- function() {
# 4. Generate date range for processing # 4. Generate date range for processing
# --------------------------------- # ---------------------------------
dates <- date_list(end_date, offset) dates <- date_list(end_date, offset)
log_message(paste("Processing data for week", dates$week, "of", dates$year)) safe_log(paste("Processing data for week", dates$week, "of", dates$year))
# Create output filename # Create output filename
file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) { file_name_tif <- if (length(args) >= 4 && !is.na(args[4])) {
@ -92,7 +99,7 @@ main <- function() {
paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif") paste0("week_", sprintf("%02d", dates$week), "_", dates$year, ".tif")
} }
log_message(paste("Output will be saved as:", file_name_tif)) safe_log(paste("Output will be saved as:", file_name_tif))
# 5. Create weekly mosaic using the function from utils # 5. Create weekly mosaic using the function from utils
# ------------------------------------------------- # -------------------------------------------------
@ -107,5 +114,6 @@ main <- function() {
) )
} }
# Run the main function if the script is executed directly if (sys.nframe() == 0) {
main() main()
}

View file

@ -3,8 +3,7 @@
# Utility functions for creating weekly mosaics from daily satellite imagery. # Utility functions for creating weekly mosaics from daily satellite imagery.
# These functions support cloud cover assessment, date handling, and mosaic creation. # These functions support cloud cover assessment, date handling, and mosaic creation.
#' Safe logging function that works whether log_message exists or not #' Safe logging function
#'
#' @param message The message to log #' @param message The message to log
#' @param level The log level (default: "INFO") #' @param level The log level (default: "INFO")
#' @return NULL (used for side effects) #' @return NULL (used for side effects)
@ -158,34 +157,46 @@ count_cloud_coverage <- function(vrt_list, field_boundaries) {
tryCatch({ tryCatch({
# Calculate total pixel area using the first VRT file # Calculate total pixel area using the first VRT file
total_pix_area <- terra::rast(vrt_list[1]) %>% total_pix_area <- terra::rast(vrt_list[1]) |>
terra::subset(1) %>% terra::subset(1) |>
terra::setValues(1) %>% terra::setValues(1) |>
terra::crop(field_boundaries, mask = TRUE) %>% terra::crop(field_boundaries, mask = TRUE) |>
terra::global(fun = "notNA") terra::global(fun = "notNA")
# Extract layer 1 from all rasters (for cloud detection) # Process each raster to detect clouds and shadows
layer_5_list <- purrr::map(vrt_list, function(file) { processed_rasters <- list()
terra::rast(file) %>% terra::subset(1) cloud_masks <- list()
}) %>% terra::rast()
# Create data frame for missing pixels count
missing_pixels_df <- data.frame(
filename = vrt_list,
notNA = numeric(length(vrt_list)),
total_pixels = numeric(length(vrt_list)),
missing_pixels_percentage = numeric(length(vrt_list)),
thres_5perc = numeric(length(vrt_list)),
thres_40perc = numeric(length(vrt_list))
)
# Fill in the data frame with missing pixel statistics
for (i in seq_along(processed_rasters)) {
notna_count <- terra::global(processed_rasters[[i]][[1]], fun = "notNA")$notNA
missing_pixels_df$notNA[i] <- notna_count
missing_pixels_df$total_pixels[i] <- total_pix_area$notNA
missing_pixels_df$missing_pixels_percentage[i] <- round(100 - ((notna_count / total_pix_area$notNA) * 100))
missing_pixels_df$thres_5perc[i] <- as.integer(missing_pixels_df$missing_pixels_percentage[i] < 5)
missing_pixels_df$thres_40perc[i] <- as.integer(missing_pixels_df$missing_pixels_percentage[i] < 45)
}
# Calculate percentage of missing pixels (clouds) # Store processed rasters and cloud masks as attributes
missing_pixels_count <- terra::global(layer_5_list, fun = "notNA") %>% attr(missing_pixels_df, "cloud_masks") <- cloud_masks
dplyr::mutate( attr(missing_pixels_df, "processed_rasters") <- processed_rasters
total_pixels = total_pix_area$notNA,
missing_pixels_percentage = round(100 - ((notNA / total_pix_area$notNA) * 100)),
thres_5perc = as.integer(missing_pixels_percentage < 5),
thres_40perc = as.integer(missing_pixels_percentage < 45)
)
# Log results # Log results
safe_log(paste( safe_log(paste(
"Cloud cover assessment completed for", length(vrt_list), "files.", "Cloud cover assessment completed for", length(vrt_list), "files.",
sum(missing_pixels_count$thres_5perc), "files with <5% cloud cover,", sum(missing_pixels_df$thres_5perc), "files with <5% cloud cover,",
sum(missing_pixels_count$thres_40perc), "files with <45% cloud cover" sum(missing_pixels_df$thres_40perc), "files with <45% cloud cover"
)) ))
return(missing_pixels_df)
return(missing_pixels_count)
}, error = function(e) { }, error = function(e) {
warning("Error in cloud coverage calculation: ", e$message) warning("Error in cloud coverage calculation: ", e$message)
return(NULL) return(NULL)
@ -209,8 +220,8 @@ create_mosaic <- function(vrt_list, missing_pixels_count, field_boundaries = NUL
safe_log("No images available for this period, creating empty mosaic", "WARNING") safe_log("No images available for this period, creating empty mosaic", "WARNING")
x <- terra::rast(raster_files_final[1]) %>% x <- terra::rast(raster_files_final[1]) |>
terra::setValues(0) %>% terra::setValues(0) |>
terra::crop(field_boundaries, mask = TRUE) terra::crop(field_boundaries, mask = TRUE)
names(x) <- c("Red", "Green", "Blue", "NIR", "CI") names(x) <- c("Red", "Green", "Blue", "NIR", "CI")
@ -227,40 +238,111 @@ create_mosaic <- function(vrt_list, missing_pixels_count, field_boundaries = NUL
return(x) return(x)
} }
# Determine best rasters to use based on cloud coverage # Check if we have processed rasters from cloud detection
index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc)) processed_rasters <- attr(missing_pixels_count, "processed_rasters")
index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc)) cloud_masks <- attr(missing_pixels_count, "cloud_masks")
# Create mosaic based on available cloud-free images if (!is.null(processed_rasters) && length(processed_rasters) > 0) {
if (sum(missing_pixels_count$thres_5perc) > 1) { safe_log("Using cloud-masked rasters for mosaic creation")
safe_log("Creating max composite from multiple cloud-free images (<5% clouds)")
cloudy_rasters_list <- vrt_list[index_5perc] # Determine best rasters to use based on cloud coverage
rsrc <- terra::sprc(cloudy_rasters_list) index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
x <- terra::mosaic(rsrc, fun = "max") index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
} else if (sum(missing_pixels_count$thres_5perc) == 1) {
safe_log("Using single cloud-free image (<5% clouds)")
x <- terra::rast(vrt_list[index_5perc[1]])
} else if (sum(missing_pixels_count$thres_40perc) > 1) {
safe_log("Creating max composite from partially cloudy images (<40% clouds)", "WARNING")
cloudy_rasters_list <- vrt_list[index_40perc]
rsrc <- terra::sprc(cloudy_rasters_list)
x <- terra::mosaic(rsrc, fun = "max")
} else if (sum(missing_pixels_count$thres_40perc) == 1) {
safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
x <- terra::rast(vrt_list[index_40perc[1]])
# Create mosaic based on available cloud-free images
if (sum(missing_pixels_count$thres_5perc) > 1) {
safe_log("Creating max composite from multiple cloud-free images (<5% clouds)")
# Use the cloud-masked rasters instead of original files
cloudy_rasters_list <- processed_rasters[index_5perc]
rsrc <- terra::sprc(cloudy_rasters_list)
x <- terra::mosaic(rsrc, fun = "max")
# Also create a composite mask showing where data is valid
mask_list <- cloud_masks[index_5perc]
mask_rsrc <- terra::sprc(mask_list)
mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
attr(x, "cloud_mask") <- mask_composite
} else if (sum(missing_pixels_count$thres_5perc) == 1) {
safe_log("Using single cloud-free image (<5% clouds)")
# Use the cloud-masked raster
x <- processed_rasters[[index_5perc[1]]]
attr(x, "cloud_mask") <- cloud_masks[[index_5perc[1]]]
} else if (sum(missing_pixels_count$thres_40perc) > 1) {
safe_log("Creating max composite from partially cloudy images (<40% clouds)", "WARNING")
# Use the cloud-masked rasters
cloudy_rasters_list <- processed_rasters[index_40perc]
rsrc <- terra::sprc(cloudy_rasters_list)
x <- terra::mosaic(rsrc, fun = "max")
# Also create a composite mask
mask_list <- cloud_masks[index_40perc]
mask_rsrc <- terra::sprc(mask_list)
mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
attr(x, "cloud_mask") <- mask_composite
} else if (sum(missing_pixels_count$thres_40perc) == 1) {
safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
# Use the cloud-masked raster
x <- processed_rasters[[index_40perc[1]]]
attr(x, "cloud_mask") <- cloud_masks[[index_40perc[1]]]
} else {
safe_log("No cloud-free images available, using all cloud-masked images", "WARNING")
# Use all cloud-masked rasters
rsrc <- terra::sprc(processed_rasters)
x <- terra::mosaic(rsrc, fun = "max")
# Also create a composite mask
mask_rsrc <- terra::sprc(cloud_masks)
mask_composite <- terra::mosaic(mask_rsrc, fun = "max")
attr(x, "cloud_mask") <- mask_composite
}
} else { } else {
safe_log("No cloud-free images available, using all images", "WARNING") # Fall back to original behavior if no cloud-masked rasters available
safe_log("No cloud-masked rasters available, using original images", "WARNING")
rsrc <- terra::sprc(vrt_list) # Determine best rasters to use based on cloud coverage
x <- terra::mosaic(rsrc, fun = "max") index_5perc <- which(missing_pixels_count$thres_5perc == max(missing_pixels_count$thres_5perc))
index_40perc <- which(missing_pixels_count$thres_40perc == max(missing_pixels_count$thres_40perc))
# Create mosaic based on available cloud-free images
if (sum(missing_pixels_count$thres_5perc) > 1) {
safe_log("Creating max composite from multiple cloud-free images (<5% clouds)")
cloudy_rasters_list <- vrt_list[index_5perc]
rsrc <- terra::sprc(cloudy_rasters_list)
x <- terra::mosaic(rsrc, fun = "max")
} else if (sum(missing_pixels_count$thres_5perc) == 1) {
safe_log("Using single cloud-free image (<5% clouds)")
x <- terra::rast(vrt_list[index_5perc[1]])
} else if (sum(missing_pixels_count$thres_40perc) > 1) {
safe_log("Creating max composite from partially cloudy images (<40% clouds)", "WARNING")
cloudy_rasters_list <- vrt_list[index_40perc]
rsrc <- terra::sprc(cloudy_rasters_list)
x <- terra::mosaic(rsrc, fun = "max")
} else if (sum(missing_pixels_count$thres_40perc) == 1) {
safe_log("Using single partially cloudy image (<40% clouds)", "WARNING")
x <- terra::rast(vrt_list[index_40perc[1]])
} else {
safe_log("No cloud-free images available, using all images", "WARNING")
rsrc <- terra::sprc(vrt_list)
x <- terra::mosaic(rsrc, fun = "max")
}
} }
# Set consistent layer names # Set consistent layer names
@ -288,18 +370,51 @@ save_mosaic <- function(mosaic_raster, output_dir, file_name, plot_result = FALS
# Create full file path # Create full file path
file_path <- here::here(output_dir, file_name) file_path <- here::here(output_dir, file_name)
# Get cloud mask if it exists
cloud_mask <- attr(mosaic_raster, "cloud_mask")
# Save raster # Save raster
terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE) terra::writeRaster(mosaic_raster, file_path, overwrite = TRUE)
# Save cloud mask if available
if (!is.null(cloud_mask)) {
# Create mask filename by adding _mask before extension
mask_file_name <- gsub("\\.(tif|TIF)$", "_mask.\\1", file_name)
mask_file_path <- here::here(output_dir, mask_file_name)
# Save the mask
terra::writeRaster(cloud_mask, mask_file_path, overwrite = TRUE)
safe_log(paste("Cloud/shadow mask saved to:", mask_file_path))
}
# Create plots if requested # Create plots if requested
if (plot_result) { if (plot_result) {
# Plot the CI band
if ("CI" %in% names(mosaic_raster)) { if ("CI" %in% names(mosaic_raster)) {
terra::plot(mosaic_raster$CI, main = paste("CI map", file_name)) terra::plot(mosaic_raster$CI, main = paste("CI map", file_name))
} }
# Plot RGB image
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) { if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster))) {
terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name)) terra::plotRGB(mosaic_raster, main = paste("RGB map", file_name))
} }
# Plot cloud mask if available
if (!is.null(cloud_mask)) {
terra::plot(cloud_mask, main = paste("Cloud/shadow mask", file_name),
col = c("red", "green"))
}
# If we have both RGB and cloud mask, create a side-by-side comparison
if (all(c("Red", "Green", "Blue") %in% names(mosaic_raster)) && !is.null(cloud_mask)) {
old_par <- par(mfrow = c(1, 2))
terra::plotRGB(mosaic_raster, main = "RGB Image")
# Create a colored mask for visualization (red = cloud/shadow, green = clear)
mask_plot <- cloud_mask
terra::plot(mask_plot, main = "Cloud/Shadow Mask", col = c("red", "green"))
par(old_par)
}
} }
# Log save completion # Log save completion

View file

@ -55,10 +55,9 @@ subchunkify <- function(g, fig_height=7, fig_width=5) {
#' @param week Week number to display in the title #' @param week Week number to display in the title
#' @param age Age of the crop in weeks #' @param age Age of the crop in weeks
#' @param borders Whether to display field borders (default: FALSE) #' @param borders Whether to display field borders (default: FALSE)
#' @param use_breaks Whether to use breaks or continuous spectrum for the raster (default: TRUE)
#' @return A tmap object with the CI map #' @return A tmap object with the CI map
#' #'
create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE, use_breaks = TRUE){ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week, age, borders = FALSE){
# Input validation # Input validation
if (missing(pivot_raster) || is.null(pivot_raster)) { if (missing(pivot_raster) || is.null(pivot_raster)) {
stop("pivot_raster is required") stop("pivot_raster is required")
@ -75,34 +74,17 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
if (missing(age) || is.null(age)) { if (missing(age) || is.null(age)) {
stop("age parameter is required") stop("age parameter is required")
} }
# Create the base map
# Create the base map map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (fixed scale 1-8 for consistent comparison)
map <- tm_shape(pivot_raster, unit = "m") map <- map + tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
limits = c(1, 8)),
# Add raster with either breaks or continuous spectrum based on parameter col.legend = tm_legend(title = "CI",
if (use_breaks) { orientation = if(legend_is_portrait) "portrait" else "landscape",
map <- map + tm_raster(breaks = c(0,0.5,1,2,3,4,5,6,7,Inf), show = show_legend,
palette = "RdYlGn", position = c("left", "bottom")))
legend.is.portrait = legend_is_portrait, # Add layout elements
midpoint = NA,
title = "CI")
} else {
map <- map + tm_raster(palette = "RdYlGn",
legend.is.portrait = legend_is_portrait,
style = "cont", # Use continuous spectrum
title = "CI")
}
# Add layout elements
map <- map + tm_layout(main.title = paste0("Max CI week ", week,"\n", age, " weeks old"), map <- map + tm_layout(main.title = paste0("Max CI week ", week,"\n", age, " weeks old"),
main.title.size = 0.7, main.title.size = 0.7)
legend.show = show_legend,
legend.position = c("left", "bottom"),
# legend.width = 0.5,
# legend.height = 0.5,
# legend.text.size = 0.8,
# legend.title.size = 0.9,
legend.outside = FALSE)
# Add borders if requested # Add borders if requested
if (borders) { if (borders) {
@ -128,10 +110,9 @@ create_CI_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend =
#' @param week_2 Second week number for comparison #' @param week_2 Second week number for comparison
#' @param age Age of the crop in weeks #' @param age Age of the crop in weeks
#' @param borders Whether to display field borders (default: TRUE) #' @param borders Whether to display field borders (default: TRUE)
#' @param use_breaks Whether to use breaks or continuous spectrum for the raster (default: TRUE)
#' @return A tmap object with the CI difference map #' @return A tmap object with the CI difference map
#' #'
create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE, use_breaks = TRUE){ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_legend = F, legend_is_portrait = F, week_1, week_2, age, borders = TRUE){
# Input validation # Input validation
if (missing(pivot_raster) || is.null(pivot_raster)) { if (missing(pivot_raster) || is.null(pivot_raster)) {
stop("pivot_raster is required") stop("pivot_raster is required")
@ -148,35 +129,18 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
if (missing(age) || is.null(age)) { if (missing(age) || is.null(age)) {
stop("age parameter is required") stop("age parameter is required")
} }
# Create the base map
# Create the base map map <- tm_shape(pivot_raster, unit = "m") # Add raster with continuous spectrum (centered at 0 for difference maps, fixed scale)
map <- tm_shape(pivot_raster, unit = "m") map <- map + tm_raster(col.scale = tm_scale_continuous(values = "brewer.rd_yl_gn",
midpoint = 0,
# Add raster with either breaks or continuous spectrum based on parameter limits = c(-3, 3)),
if (use_breaks) { col.legend = tm_legend(title = "CI difference",
map <- map + tm_raster(breaks = c(-3,-2,-1,0,1,2,3), orientation = if(legend_is_portrait) "portrait" else "landscape",
palette = "RdYlGn", show = show_legend,
legend.is.portrait = legend_is_portrait, position = c("left", "bottom")))
midpoint = 0, # Add layout elements
title = "CI difference")
} else {
map <- map + tm_raster(palette = "RdYlGn",
legend.is.portrait = legend_is_portrait,
style = "cont", # Use continuous spectrum
midpoint = 0,
title = "CI difference")
}
# Add layout elements
map <- map + tm_layout(main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks old"), map <- map + tm_layout(main.title = paste0("CI change week ", week_1, " - week ", week_2, "\n", age, " weeks old"),
main.title.size = 0.7, main.title.size = 0.7)
legend.show = show_legend,
legend.position = c("left", "bottom"),
# legend.width = 0.5,
# legend.height = 0.5,
# legend.text.size = 0.8,
# legend.title.size = 0.9,
legend.outside = FALSE)
# Add borders if requested # Add borders if requested
if (borders) { if (borders) {
@ -205,7 +169,6 @@ create_CI_diff_map <- function(pivot_raster, pivot_shape, pivot_spans, show_lege
#' @param week_minus_1 Previous week number #' @param week_minus_1 Previous week number
#' @param week_minus_2 Two weeks ago week number #' @param week_minus_2 Two weeks ago week number
#' @param week_minus_3 Three weeks ago week number #' @param week_minus_3 Three weeks ago week number
#' @param use_breaks Whether to use discrete breaks or continuous spectrum (default: TRUE)
#' @param borders Whether to display field borders (default: TRUE) #' @param borders Whether to display field borders (default: TRUE)
#' @return NULL (adds output directly to R Markdown document) #' @return NULL (adds output directly to R Markdown document)
#' #'
@ -221,7 +184,6 @@ ci_plot <- function(pivotName,
week_minus_1 = week_minus_1, week_minus_1 = week_minus_1,
week_minus_2 = week_minus_2, week_minus_2 = week_minus_2,
week_minus_3 = week_minus_3, week_minus_3 = week_minus_3,
use_breaks = TRUE,
borders = TRUE){ borders = TRUE){
# Input validation # Input validation
if (missing(pivotName) || is.null(pivotName) || pivotName == "") { if (missing(pivotName) || is.null(pivotName) || pivotName == "") {
@ -282,35 +244,28 @@ ci_plot <- function(pivotName,
# Create spans for borders # Create spans for borders
joined_spans2 <- field_boundaries %>% joined_spans2 <- field_boundaries %>%
sf::st_transform(sf::st_crs(pivotShape)) %>% sf::st_transform(sf::st_crs(pivotShape)) %>% dplyr::filter(field %in% pivotName)
dplyr::filter(field %in% pivotName)
# Create the maps for different timepoints # Create the maps for different timepoints
CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2, CImap_m2 <- create_CI_map(singlePivot_m2, AllPivots2, joined_spans2,
show_legend = TRUE, legend_is_portrait = TRUE, show_legend = TRUE, legend_is_portrait = TRUE,
week = week_minus_2, age = age - 2, borders = borders, week = week_minus_2, age = age - 2, borders = borders)
use_breaks = use_breaks)
CImap_m1 <- create_CI_map(singlePivot_m1, AllPivots2, joined_spans2, CImap_m1 <- create_CI_map(singlePivot_m1, AllPivots2, joined_spans2,
show_legend = FALSE, legend_is_portrait = FALSE, show_legend = FALSE, legend_is_portrait = FALSE,
week = week_minus_1, age = age - 1, borders = borders, week = week_minus_1, age = age - 1, borders = borders)
use_breaks = use_breaks)
CImap <- create_CI_map(singlePivot, AllPivots2, joined_spans2, CImap <- create_CI_map(singlePivot, AllPivots2, joined_spans2,
show_legend = FALSE, legend_is_portrait = FALSE, show_legend = FALSE, legend_is_portrait = FALSE,
week = week, age = age, borders = borders, week = week, age = age, borders = borders)
use_breaks = use_breaks) # Create difference maps - only show legend on the second one to avoid redundancy
# Create difference maps - only show legend on the second one to avoid redundancy
CI_max_abs_last_week <- create_CI_diff_map(abs_CI_last_week, AllPivots2, joined_spans2, CI_max_abs_last_week <- create_CI_diff_map(abs_CI_last_week, AllPivots2, joined_spans2,
show_legend = FALSE, legend_is_portrait = TRUE, show_legend = FALSE, legend_is_portrait = FALSE,
week_1 = week, week_2 = week_minus_1, age = age, borders = borders, week_1 = week, week_2 = week_minus_1, age = age, borders = borders)
use_breaks = use_breaks)
CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2, CI_max_abs_three_week <- create_CI_diff_map(abs_CI_three_week, AllPivots2, joined_spans2,
show_legend = TRUE, legend_is_portrait = TRUE, show_legend = TRUE, legend_is_portrait = TRUE,
week_1 = week, week_2 = week_minus_3, age = age, borders = borders, week_1 = week, week_2 = week_minus_3, age = age, borders = borders)
use_breaks = use_breaks)
# Arrange the maps # Arrange the maps
tst <- tmap_arrange(CImap_m2, CImap_m1, CImap, CI_max_abs_last_week, CI_max_abs_three_week, nrow = 1) tst <- tmap_arrange(CImap_m2, CImap_m1, CImap, CI_max_abs_last_week, CI_max_abs_three_week, nrow = 1)

View file

@ -0,0 +1,409 @@
<!-- filepath: c:\Users\timon\Resilience BV\4020 SCane ESA DEMO - Documenten\General\4020 SCDEMO Team\4020 TechnicalData\WP3\smartcane\r_app\system_architecture.md -->
# SmartCane System Architecture
## Overview
The SmartCane system is a comprehensive agricultural intelligence platform that processes satellite imagery and farm data to provide agronomic insights for sugarcane farmers. The system architecture follows a modular, layered approach with clear separation of concerns between data acquisition, processing, and presentation.
## Architectural Layers
The SmartCane system follows a layered architecture pattern, which is a standard approach in software engineering for organizing complex systems. This architecture divides the system into distinct functional layers, each with specific responsibilities. While these layers aren't explicitly shown as separate visual elements in the diagrams, they help conceptualize how components are organized by their function:
### 1. Data Acquisition Layer
- **Role**: Responsible for fetching raw data from external sources and user inputs
- **Components**: Manual Sentinel Hub Requests, Python API Downloader, User Input Interface
- **Functions**: Manual request setup on Sentinel Hub Requests Builder for specific client fields, connects to satellite data providers, downloads imagery, manages API credentials, performs preliminary data validation
### 2. Processing Layer (SmartCane Engine)
- **Role**: Core analytical engine that transforms raw data into actionable insights
- **Components**: Python API Downloader (pre-processing), R Processing Engine (analytics)
- **Functions**: Image processing, cloud masking, crop index calculation, field boundary processing, statistical analysis, report generation
### 3. Presentation Layer
- **Role**: Delivers insights to end users in accessible formats
- **Components**: Laravel Web App, Email Delivery System
- **Functions**: Interactive dashboards, visualization, report delivery, user management, project scheduling
### 4. Data Storage Layer
- **Role**: Persists system data across processing cycles
- **Components**: File System, Database
- **Functions**: Stores raw imagery, processed rasters, analytical results, user data, configuration
## Key Subsystems
### 1. Python API Downloader
- **Role**: Acquires and pre-processes satellite imagery
- **Inputs**: API credentials, field boundaries, date parameters, evaluation scripts
- **Outputs**: Raw satellite images, merged GeoTIFFs, virtual rasters
- **Interfaces**: External satellite APIs (Planet via Sentinel Hub), file system
- **Orchestration**: Triggered by shell scripts from the Laravel application
### 2. R Processing Engine
- **Role**: Performs advanced analytics and generates insights
- **Inputs**: Processed satellite imagery, field boundaries, harvest data, project parameters
- **Outputs**: Crop indices, mosaics, RDS data files, agronomic reports
- **Interfaces**: File system, report templates
- **Orchestration**: Triggered by shell scripts from the Laravel application
### 3. Laravel Web Application
- **Role**: Provides operator interface and orchestrates the overall system
- **Inputs**: User data, configuration settings
- **Outputs**: Web interface, scheduling, report delivery
- **Interfaces**: Users, database, file system
- **Orchestration**: Controls execution of the SmartCane Engine via shell scripts
### 4. Shell Script Orchestration
- **Role**: Bridges between web application and processing components
- **Functions**: Triggers processing workflows, manages execution environment, handles errors
- **Examples**: runcane.sh, runpython.sh, build_mosaic.sh, build_report.sh
## Data Flow
1. **Input Stage**:
- Operators (internal team) manually prepare and submit requests on Sentinel Hub Requests Builder for the specific fields of a client.
- Operators (internal team) provide farm data (field boundaries, harvest data) via the Laravel Web App.
- System schedules data acquisition for specific dates/regions
2. **Acquisition Stage**:
- Laravel triggers Python API Downloader via shell scripts
- Python connects to satellite data providers and downloads raw imagery
- Downloaded data is stored in the file system
3. **Processing Stage**:
- Laravel triggers R Processing Engine via shell scripts
- R scripts read satellite imagery and farm data
- Processing produces crop indices, analytics, and reports
- Results are stored in the file system
4. **Output Stage**:
- Laravel Web App accesses processed results
- Reports are delivered to users via email
## System Integration Points
- **Python-R Integration**: Data handover via file system (GeoTIFF, virtual rasters)
- **Engine-Laravel Integration**: Orchestration via shell scripts, data exchange via file system and database
- **User-System Integration**: Web interface, file uploads, email notifications
## Developed/Customized Elements
- **Custom Cloud Masking Algorithm**: Specialized for agricultural applications in tropical regions
- **Crop Index Extraction Pipeline**: Tailored to sugarcane spectral characteristics
- **Reporting Templates**: Designed for agronomic decision support
- **Shell Script Orchestration**: Custom workflow management for the system's components
## Strategic Role of Satellite Data
Satellite data is central to the SmartCane system, providing:
- Regular, non-invasive field monitoring
- Detection of spatial patterns not visible from ground level
- Historical analysis of crop performance
- Early warning of crop stress or disease
- Quantification of field variability for precision agriculture
## Pilot Utilization Sites
The SmartCane system is currently operational in Mozambique, Kenya, and Tanzania. Future pilot deployments and expansions are planned for Uganda, Colombia, Mexico, Guatemala, South Africa, and Zambia.
---
## System Architecture Diagrams
Below are diagrams illustrating the system architecture from different perspectives.
### Overall System Architecture
This diagram provides a high-level overview of the complete SmartCane system, showing how major components interact. It focuses on the system boundaries and main data flows between the Python API Downloader, R Processing Engine, Laravel Web App, and data storage components. This view helps understand how the system works as a whole.
```mermaid
graph TD
A["fa:fa-satellite External Satellite Data Providers API"] --> PyDL["fa:fa-download Python API Downloader"];
C["fa:fa-users Users: Farm Data Input e.g., GeoJSON, Excel"] --> D{"fa:fa-laptop-code Laravel Web App"};
subgraph SmartCane System
PyDL --> G["fa:fa-folder-open File System: Raw Satellite Imagery, Rasters, RDS, Reports, Boundaries"];
E["fa:fa-cogs R Processing Engine"] -- Reads --> G;
E -- Writes --> G;
D -- Manages/Triggers --> F["fa:fa-terminal Shell Script Orchestration"];
F -- Executes --> PyDL;
F -- Executes --> E;
D -- Manages/Accesses --> G;
D -- Reads/Writes --> H["fa:fa-database Database: Project Metadata, Users, Schedules"];
E -- Generates --> I["fa:fa-file-alt Agronomic Reports: DOCX, HTML"];
D -- Accesses/Delivers --> I;
end
D --> J["fa:fa-desktop Users: Web Interface (future)"];
I -- Via Email (SMTP) --> K["fa:fa-envelope Users: Email Reports"];
style E fill:#f9f,stroke:#333,stroke-width:2px
style D fill:#bbf,stroke:#333,stroke-width:2px
style PyDL fill:#ffdd57,stroke:#333,stroke-width:2px
```
### R Processing Engine Detail
This diagram zooms in on the R Processing Engine subsystem, detailing the internal components and data flow. It shows how raw satellite imagery and field data progress through various R scripts to produce crop indices and reports. The diagram highlights the data transformation pipeline within this analytical core of the SmartCane system.
```mermaid
graph TD
subgraph R Processing Engine
direction TB
subgraph Inputs
SatelliteImages["fa:fa-image Raw Satellite Imagery"]
FieldBoundaries["fa:fa-map-marker-alt Field Boundaries .geojson"]
HarvestData["fa:fa-file-excel Harvest Data .xlsx"]
ProjectParams["fa:fa-file-code Project Parameters .R"]
end
subgraph Core R Scripts & Processes
ParamConfig("fa:fa-cogs parameters_project.R")
MosaicScript("fa:fa-images mosaic_creation.R")
CIExtractionScript("fa:fa-microscope ci_extraction.R")
ReportUtils("fa:fa-tools executive_report_utils.R")
DashboardRmd("fa:fa-tachometer-alt CI_report_dashboard_planet_enhanced.Rmd")
SummaryRmd("fa:fa-list-alt CI_report_executive_summary.Rmd")
end
subgraph Outputs
WeeklyMosaics["fa:fa-file-image Weekly Mosaics .tif"]
CIDataRDS["fa:fa-database CI Data .rds"]
CIRasters["fa:fa-layer-group CI Rasters .tif"]
DashboardReport["fa:fa-chart-bar Dashboard Report .docx/.html"]
SummaryReport["fa:fa-file-invoice Executive Summary .docx/.html"]
end
%% Data Flow
ProjectParams --> ParamConfig;
SatelliteImages --> MosaicScript;
FieldBoundaries --> MosaicScript;
ParamConfig --> MosaicScript;
MosaicScript --> WeeklyMosaics;
WeeklyMosaics --> CIExtractionScript;
FieldBoundaries --> CIExtractionScript;
ParamConfig --> CIExtractionScript;
CIExtractionScript --> CIDataRDS;
CIExtractionScript --> CIRasters;
CIDataRDS --> ReportUtils;
CIRasters --> ReportUtils;
HarvestData --> ReportUtils;
ParamConfig --> ReportUtils;
ReportUtils --> DashboardRmd;
ReportUtils --> SummaryRmd;
ParamConfig --> DashboardRmd;
ParamConfig --> SummaryRmd;
DashboardRmd --> DashboardReport;
SummaryRmd --> SummaryReport;
end
ShellOrchestration["fa:fa-terminal Shell Scripts e.g., build_mosaic.sh, build_report.sh"] -->|Triggers| R_Processing_Engine["fa:fa-cogs R Processing Engine"]
style R_Processing_Engine fill:#f9f,stroke:#333,stroke-width:2px
style Inputs fill:#ccf,stroke:#333,stroke-width:1px
style Outputs fill:#cfc,stroke:#333,stroke-width:1px
style Core_R_Scripts_Processes fill:#ffc,stroke:#333,stroke-width:1px
```
### Python API Downloader Detail
This diagram focuses on the Python API Downloader subsystem, showing its internal components and workflow. It illustrates how API credentials, field boundaries, and other inputs are processed through various Python functions to download, process, and prepare satellite imagery. This view reveals the technical implementation details of the data acquisition layer.
```mermaid
graph TD
subgraph Python API Downloader
direction TB
subgraph Inputs_Py [Inputs]
APICreds["fa:fa-key API Credentials (SH_CLIENT_ID, SH_CLIENT_SECRET)"]
DateRangeParams["fa:fa-calendar-alt Date Range Parameters (days_needed, specific_date)"]
GeoJSONInput["fa:fa-map-marker-alt Field Boundaries (pivot.geojson)"]
ProjectConfig["fa:fa-cogs Project Configuration (project_name, paths)"]
EvalScripts["fa:fa-file-code Evalscripts (JS for cloud masking & band selection)"]
end
subgraph Core_Python_Logic_Py [Core Python Logic & Libraries]
SetupConfig["fa:fa-cog SentinelHubConfig & BYOC Definition"]
DateSlotGen["fa:fa-calendar-check Date Slot Generation (slots)"]
GeoProcessing["fa:fa-map GeoJSON Parsing & BBox Splitting (geopandas, BBoxSplitter)"]
AvailabilityCheck["fa:fa-search-location Image Availability Check (SentinelHubCatalog)"]
RequestHandler["fa:fa-paper-plane Request Generation (SentinelHubRequest, get_true_color_request_day)"]
DownloadClient["fa:fa-cloud-download-alt Image Download (SentinelHubDownloadClient, download_function)"]
MergeUtility["fa:fa-object-group Tile Merging (gdal.BuildVRT, gdal.Translate, merge_files)"]
CleanupUtility["fa:fa-trash-alt Intermediate File Cleanup (empty_folders)"]
end
subgraph Outputs_Py [Outputs]
RawSatImages["fa:fa-file-image Raw Downloaded Satellite Imagery Tiles (response.tiff in dated subfolders)"]
MergedTifs["fa:fa-images Merged TIFs (merged_tif/{slot}.tif)"]
VirtualRasters["fa:fa-layer-group Virtual Rasters (merged_virtual/merged{slot}.vrt)"]
DownloadLogs["fa:fa-file-alt Console Output Logs (print statements)"]
end
ExternalSatAPI["fa:fa-satellite External Satellite Data Providers API (Planet via Sentinel Hub)"]
%% Data Flow for Python Downloader
APICreds --> SetupConfig;
DateRangeParams --> DateSlotGen;
GeoJSONInput --> GeoProcessing;
ProjectConfig --> SetupConfig;
ProjectConfig --> GeoProcessing;
ProjectConfig --> MergeUtility;
ProjectConfig --> CleanupUtility;
EvalScripts --> RequestHandler;
DateSlotGen -- Available Slots --> AvailabilityCheck;
GeoProcessing -- BBox List --> AvailabilityCheck;
SetupConfig --> AvailabilityCheck;
AvailabilityCheck -- Filtered Slots & BBoxes --> RequestHandler;
RequestHandler -- Download Requests --> DownloadClient;
SetupConfig --> DownloadClient;
DownloadClient -- Downloads Data From --> ExternalSatAPI;
ExternalSatAPI -- Returns Image Data --> DownloadClient;
DownloadClient -- Writes --> RawSatImages;
DownloadClient -- Generates --> DownloadLogs;
RawSatImages --> MergeUtility;
MergeUtility -- Writes --> MergedTifs;
MergeUtility -- Writes --> VirtualRasters;
end
ShellOrchestratorPy["fa:fa-terminal Shell Scripts (e.g., runpython.sh triggering planet_download.ipynb)"] -->|Triggers| Python_API_Downloader["fa:fa-download Python API Downloader"];
style Python_API_Downloader fill:#ffdd57,stroke:#333,stroke-width:2px
style Inputs_Py fill:#cdeeff,stroke:#333,stroke-width:1px
style Outputs_Py fill:#d4efdf,stroke:#333,stroke-width:1px
style Core_Python_Logic_Py fill:#fff5cc,stroke:#333,stroke-width:1px
style ExternalSatAPI fill:#f5b7b1,stroke:#333,stroke-width:2px
```
### SmartCane Engine Integration Diagram
This diagram illustrates the integration of Python and R components within the SmartCane Engine. Unlike the first diagram that shows the overall system, this one specifically focuses on how the two processing components interact with each other and the rest of the system. It emphasizes the orchestration layer and data flows between the core processing components and external systems.
```mermaid
graph TD
%% External Systems & Users
Users_DataInput["fa:fa-user Users: Farm Data Input (GeoJSON, Excel, etc.)"] --> Laravel_WebApp;
ExternalSatAPI["fa:fa-satellite External Satellite Data Providers API"];
%% Main Application Components
Laravel_WebApp["fa:fa-globe Laravel Web App (Frontend & Control Plane)"];
Shell_Orchestration["fa:fa-terminal Shell Script Orchestration (e.g., runcane.sh, runpython.sh, build_mosaic.sh)"]; subgraph SmartCane_Engine ["SmartCane Engine (Data Processing Core)"]
direction TB
Python_Downloader["fa:fa-download Python API Downloader"];
R_Engine["fa:fa-chart-line R Processing Engine"];
end
%% Data Storage
FileSystem["fa:fa-folder File System (Raw Imagery, Rasters, RDS, Reports, Boundaries)"];
Database["fa:fa-database Database (Project Metadata, Users, Schedules)"];
%% User Outputs
Users_WebView["fa:fa-desktop Users: Web Interface (future)"];
Users_EmailReports["fa:fa-envelope Users: Email Reports (Agronomic Reports)"];
AgronomicReports["fa:fa-file-alt Agronomic Reports (DOCX, HTML)"];
%% --- Data Flows & Interactions ---
%% Laravel to Orchestration & Engine
Laravel_WebApp -- Manages/Triggers --> Shell_Orchestration;
Shell_Orchestration -- Executes --> Python_Downloader;
Shell_Orchestration -- Executes --> R_Engine;
%% Python Downloader within Engine
ExternalSatAPI -- Satellite Data --> Python_Downloader;
Python_Downloader -- Writes Raw Data --> FileSystem;
%% Inputs to Python (simplified for this view - details in Python-specific diagram)
%% Laravel_WebApp -- Provides Config/Boundaries --> Python_Downloader;
%% R Engine within Engine
%% Inputs to R (simplified - details in R-specific diagram)
%% Laravel_WebApp -- Provides Config/Boundaries --> R_Engine;
R_Engine -- Reads Processed Data/Imagery --> FileSystem;
R_Engine -- Writes Derived Products --> FileSystem;
R_Engine -- Generates --> AgronomicReports;
%% Laravel interaction with Data Storage
Laravel_WebApp -- Manages/Accesses --> FileSystem;
Laravel_WebApp -- Reads/Writes --> Database;
%% Output Delivery
Laravel_WebApp --> Users_WebView;
AgronomicReports --> Users_EmailReports;
%% Assuming a mechanism like SMTP, potentially triggered by Laravel or R-Engine completion
Laravel_WebApp -- Delivers/Displays --> AgronomicReports;
%% Styling
style SmartCane_Engine fill:#e6ffe6,stroke:#333,stroke-width:2px
style Python_Downloader fill:#ffdd57,stroke:#333,stroke-width:2px
style R_Engine fill:#f9f,stroke:#333,stroke-width:2px
style Laravel_WebApp fill:#bbf,stroke:#333,stroke-width:2px
style Shell_Orchestration fill:#f0ad4e,stroke:#333,stroke-width:2px
style FileSystem fill:#d1e0e0,stroke:#333,stroke-width:1px
style Database fill:#d1e0e0,stroke:#333,stroke-width:1px
style ExternalSatAPI fill:#f5b7b1,stroke:#333,stroke-width:2px
style AgronomicReports fill:#d4efdf,stroke:#333,stroke-width:1px
```
## Future Directions
The SmartCane platform is poised for significant evolution, with several key enhancements and new capabilities planned to further empower users and expand its utility:
- **Advanced Management Dashboard**: Development of a more comprehensive and interactive management dashboard to provide users with deeper insights and greater control over their operations.
- **Enhanced Yield Prediction Models**: Improving the accuracy and granularity of yield predictions by incorporating more variables and advanced machine learning techniques.
- **Integrated Weather and Irrigation Advice**: Leveraging weather forecast data and soil moisture information (potentially from new data sources) to provide precise irrigation scheduling and weather-related agronomic advice.
- **AI-Guided Agronomic Advice**: Implementing sophisticated AI algorithms to analyze integrated data (satellite, weather, soil, farm practices) and offer tailored, actionable agronomic recommendations.
- **Automated Advice Generation**: Developing capabilities for the system to automatically generate and disseminate critical advice and alerts to users based on real-time data analysis.
- **Expanded Data Source Integration**:
- **Radar Data**: Incorporating radar satellite imagery (e.g., Sentinel-1) for all-weather monitoring capabilities, particularly useful during cloudy seasons for assessing crop structure, soil moisture, and biomass.
- **IoT and Ground Sensors**: Integrating data from in-field IoT devices and soil sensors for highly localized and continuous monitoring of environmental and soil conditions.
- **Client-Facing Portal**: Exploration and potential development of a client-facing portal to allow end-users direct access to their data, dashboards, and reports, complementing the current internal management interface.
These future developments aim to transform SmartCane into an even more powerful decision support system, fostering sustainable and efficient agricultural practices.
## Conclusion and Integration Summary
The SmartCane system architecture demonstrates a well-integrated solution that combines different technologies and subsystems to solve complex agricultural challenges. Here is a summary of how the key subsystems work together:
### Subsystem Integration
1. **Data Flow Sequence**
- The Laravel Web App initiates the workflow and manages user interactions
- Shell scripts orchestrate the execution sequence of the processing subsystems
- The Python API Downloader acquires raw data from external sources
- The R Processing Engine transforms this data into actionable insights
- Results flow back to users through the web interface and email reports
2. **Technology Integration**
- **Python + R**: Different programming languages are leveraged for their respective strengths—Python for API communication and data acquisition, R for statistical analysis and report generation
- **Laravel + Processing Engine**: Clear separation between web presentation layer and computational backend
- **File System + Database**: Hybrid data storage approach with file system for imagery and reports, database for metadata and user information
3. **Key Integration Mechanisms**
- **File System Bridge**: The different subsystems primarily communicate through standardized file formats (GeoTIFF, GeoJSON, RDS, DOCX)
- **Shell Script Orchestration**: Acts as the "glue" between subsystems, ensuring proper execution sequence and environment setup
- **Standardized Data Formats**: Use of widely-accepted geospatial and data formats enables interoperability
4. **Extensibility and Scalability**
- The modular architecture allows for replacement or enhancement of individual components
- The clear subsystem boundaries enable parallel development and testing
- Standard interfaces simplify integration of new data sources, algorithms, or output methods
The SmartCane architecture balances complexity with maintainability by using well-established technologies and clear boundaries between subsystems. The separation of concerns between data acquisition, processing, and presentation layers ensures that changes in one area minimally impact others, while the consistent data flow pathways ensure that information moves smoothly through the system.

View file

@ -1,68 +0,0 @@
# test_date_functions.R
#
# Tests for date-related functions in ci_extraction_utils.R
#
# Load the test framework
source("tests/test_framework.R")
# Set up test environment
env <- setup_test_env()
# Load the functions to test
source("../ci_extraction_utils.R")
# Test the date_list function
test_that("date_list creates correct date sequences", {
# Test with a specific date and offset
dates <- date_list(as.Date("2023-01-15"), 7)
# Check the structure
expect_type(dates, "list")
expect_equal(names(dates), c("week", "year", "days_filter", "start_date", "end_date"))
# Check the values
expect_equal(dates$week, lubridate::week(as.Date("2023-01-09")))
expect_equal(dates$year, 2023)
expect_equal(dates$start_date, as.Date("2023-01-09"))
expect_equal(dates$end_date, as.Date("2023-01-15"))
expect_equal(length(dates$days_filter), 7)
expect_equal(dates$days_filter[1], "2023-01-09")
expect_equal(dates$days_filter[7], "2023-01-15")
# Test with a different offset
dates_short <- date_list(as.Date("2023-01-15"), 3)
expect_equal(length(dates_short$days_filter), 3)
expect_equal(dates_short$days_filter, c("2023-01-13", "2023-01-14", "2023-01-15"))
# Test with string date
dates_string <- date_list("2023-01-15", 5)
expect_equal(dates_string$days_filter,
c("2023-01-11", "2023-01-12", "2023-01-13", "2023-01-14", "2023-01-15"))
# Test error handling
expect_error(date_list("invalid-date", 7),
"Invalid end_date provided")
expect_error(date_list("2023-01-15", -1),
"Invalid offset provided")
})
# Test the date_extract function
test_that("date_extract correctly extracts dates from file paths", {
# Test with various file path formats
expect_equal(date_extract("/some/path/2023-01-15_image.tif"), "2023-01-15")
expect_equal(date_extract("path/to/planet_2023-01-15.tif"), "2023-01-15")
expect_equal(date_extract("c:\\path\\with\\windows\\2023-01-15_file.tif"), "2023-01-15")
expect_equal(date_extract("2023-01-15.tif"), "2023-01-15")
expect_equal(date_extract("prefix-2023-01-15-suffix.tif"), "2023-01-15")
# Test with invalid file paths
expect_warning(result <- date_extract("no-date-here.tif"), "Could not extract date")
expect_true(is.na(result))
})
# Clean up
teardown_test_env()
# Print success message
cat("Date function tests completed successfully\n")

View file

@ -1,120 +0,0 @@
# test_framework.R
#
# TEST FRAMEWORK FOR SMARTCANE
# ===========================
# This script provides a simple testing framework for the SmartCane project.
# It includes utilities for setting up test environments and running tests.
#
# Install required packages if not available
if (!require("testthat", quietly = TRUE)) {
install.packages("testthat", repos = "https://cran.rstudio.com/")
}
library(testthat)
# Define paths for testing
test_root <- file.path(normalizePath(".."), "tests")
test_data_dir <- file.path(test_root, "test_data")
# Create test directories if they don't exist
dir.create(test_data_dir, recursive = TRUE, showWarnings = FALSE)
# Set up a test environment with all necessary data
setup_test_env <- function() {
# Add working directory to the path
.libPaths(c(.libPaths(), normalizePath("..")))
# Source required files with minimal dependencies
tryCatch({
source(file.path(normalizePath(".."), "packages.R"))
skip_package_loading <- TRUE
# Load minimal dependencies for tests
required_packages <- c("lubridate", "stringr", "purrr", "dplyr", "testthat")
for (pkg in required_packages) {
if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
warning(paste("Package", pkg, "not available, some tests may fail"))
}
}
}, error = function(e) {
warning("Error loading dependencies: ", e$message)
})
# Set up test logging
assign("log_message", function(message, level = "INFO") {
cat(paste0("[", level, "] ", message, "\n"))
}, envir = .GlobalEnv)
# Create a mock project structure
test_project <- list(
project_dir = "test_project",
data_dir = test_data_dir,
daily_CI_vals_dir = file.path(test_data_dir, "extracted_ci", "daily_vals"),
cumulative_CI_vals_dir = file.path(test_data_dir, "extracted_ci", "cumulative_vals"),
merged_final = file.path(test_data_dir, "merged_final"),
daily_vrt = file.path(test_data_dir, "daily_vrt")
)
# Create the directories
for (dir in test_project) {
if (is.character(dir)) {
dir.create(dir, recursive = TRUE, showWarnings = FALSE)
}
}
return(test_project)
}
# Clean up test environment
teardown_test_env <- function() {
# Clean up only test-created files if needed
# We'll leave the main directories for inspection
}
# Run all tests in a directory
run_tests <- function(pattern = "^test_.+\\.R$") {
test_files <- list.files(
path = test_root,
pattern = pattern,
full.names = TRUE
)
# Exclude this file
test_files <- test_files[!grepl("test_framework\\.R$", test_files)]
if (length(test_files) == 0) {
cat("No test files found matching pattern:", pattern, "\n")
return(FALSE)
}
cat("Found", length(test_files), "test files:\n")
cat(paste(" -", basename(test_files)), sep = "\n")
cat("\n")
# Run each test file
results <- lapply(test_files, function(file) {
cat("Running tests in:", basename(file), "\n")
tryCatch({
source(file, local = TRUE)
cat("✓ Tests completed\n\n")
TRUE
}, error = function(e) {
cat("✗ Error:", e$message, "\n\n")
FALSE
})
})
# Summary
success_count <- sum(unlist(results))
cat("\nTest Summary:", success_count, "of", length(test_files),
"test files completed successfully\n")
return(all(unlist(results)))
}
# If this script is run directly, run all tests
if (!interactive() && (basename(sys.frame(1)$ofile) == "test_framework.R")) {
setup_test_env()
run_tests()
teardown_test_env()
}

View file

@ -1,280 +0,0 @@
# test_report_utils.R
#
# Tests for visualization functions in report_utils.R
#
# Load the test framework
source("tests/test_framework.R")
# Set up test environment
env <- setup_test_env()
# Required libraries for testing
library(testthat)
library(terra)
library(sf)
library(dplyr)
library(ggplot2)
# Load the functions to test
source("../report_utils.R")
# Create mock data for testing
create_mock_data <- function() {
# Create a simple raster for testing
r <- terra::rast(nrows=10, ncols=10, xmin=0, xmax=10, ymin=0, ymax=10, vals=1:100)
names(r) <- "CI"
# Create a simple field boundary
field_boundaries <- sf::st_sf(
field = c("Field1", "Field2"),
sub_field = c("A", "B"),
geometry = sf::st_sfc(
sf::st_polygon(list(rbind(c(1,1), c(5,1), c(5,5), c(1,5), c(1,1)))),
sf::st_polygon(list(rbind(c(6,6), c(9,6), c(9,9), c(6,9), c(6,6))))
),
crs = sf::st_crs(r)
)
# Create mock harvest data
harvesting_data <- data.frame(
field = c("Field1", "Field2"),
sub_field = c("A", "B"),
age = c(100, 150),
season_start = as.Date(c("2023-01-01", "2023-02-01")),
year = c(2023, 2023)
)
# Create mock CI quadrant data
ci_quadrant <- data.frame(
field = rep(c("Field1", "Field2"), each=10),
sub_field = rep(c("A", "B"), each=10),
Date = rep(seq(as.Date("2023-01-01"), by="week", length.out=10), 2),
DOY = rep(1:10, 2),
cumulative_CI = rep(cumsum(1:10), 2),
value = rep(1:10, 2),
season = rep(2023, 20),
model = rep(c("northwest", "northeast", "southwest", "southeast"), 5)
)
return(list(
raster = r,
field_boundaries = field_boundaries,
harvesting_data = harvesting_data,
ci_quadrant = ci_quadrant
))
}
# Test the create_CI_map function
test_that("create_CI_map creates a valid tmap object", {
mock_data <- create_mock_data()
# Test with all required parameters
map <- create_CI_map(
pivot_raster = mock_data$raster,
pivot_shape = mock_data$field_boundaries[1,],
pivot_spans = mock_data$field_boundaries[1,],
week = "01",
age = 10,
borders = TRUE,
use_breaks = TRUE
)
# Check if it returned a tmap object
expect_true("tmap" %in% class(map))
# Test with missing parameters
expect_error(create_CI_map(pivot_shape = mock_data$field_boundaries[1,],
pivot_spans = mock_data$field_boundaries[1,],
week = "01", age = 10),
"pivot_raster is required")
expect_error(create_CI_map(pivot_raster = mock_data$raster,
pivot_spans = mock_data$field_boundaries[1,],
week = "01", age = 10),
"pivot_shape is required")
})
# Test the create_CI_diff_map function
test_that("create_CI_diff_map creates a valid tmap object", {
mock_data <- create_mock_data()
# Test with all required parameters
map <- create_CI_diff_map(
pivot_raster = mock_data$raster,
pivot_shape = mock_data$field_boundaries[1,],
pivot_spans = mock_data$field_boundaries[1,],
week_1 = "01",
week_2 = "02",
age = 10,
borders = TRUE,
use_breaks = TRUE
)
# Check if it returned a tmap object
expect_true("tmap" %in% class(map))
# Test with missing parameters
expect_error(create_CI_diff_map(pivot_shape = mock_data$field_boundaries[1,],
pivot_spans = mock_data$field_boundaries[1,],
week_1 = "01", week_2 = "02", age = 10),
"pivot_raster is required")
expect_error(create_CI_diff_map(pivot_raster = mock_data$raster,
pivot_spans = mock_data$field_boundaries[1,],
week_1 = "01", age = 10),
"week_1 and week_2 parameters are required")
})
# Test the ci_plot function
test_that("ci_plot handles input parameters correctly", {
mock_data <- create_mock_data()
# Capture output to avoid cluttering the test output
temp_file <- tempfile()
sink(temp_file)
# Test with all required parameters - should not throw an error
expect_error(
ci_plot(
pivotName = "Field1",
field_boundaries = mock_data$field_boundaries,
current_ci = mock_data$raster,
ci_minus_1 = mock_data$raster,
ci_minus_2 = mock_data$raster,
last_week_diff = mock_data$raster,
three_week_diff = mock_data$raster,
harvesting_data = mock_data$harvesting_data,
week = "01",
week_minus_1 = "52",
week_minus_2 = "51",
week_minus_3 = "50",
use_breaks = TRUE,
borders = TRUE
),
NA # Expect no error
)
# Test with missing parameters
expect_error(
ci_plot(),
"pivotName is required"
)
# Test with invalid field name
expect_error(
ci_plot(
pivotName = "NonExistentField",
field_boundaries = mock_data$field_boundaries,
current_ci = mock_data$raster,
ci_minus_1 = mock_data$raster,
ci_minus_2 = mock_data$raster,
last_week_diff = mock_data$raster,
three_week_diff = mock_data$raster,
harvesting_data = mock_data$harvesting_data
),
regexp = NULL # We expect some error related to the field not being found
)
# Reset output
sink()
unlink(temp_file)
})
# Test the cum_ci_plot function
test_that("cum_ci_plot handles input parameters correctly", {
mock_data <- create_mock_data()
# Capture output to avoid cluttering the test output
temp_file <- tempfile()
sink(temp_file)
# Test with all required parameters - should not throw an error
expect_error(
cum_ci_plot(
pivotName = "Field1",
ci_quadrant_data = mock_data$ci_quadrant,
plot_type = "value",
facet_on = FALSE,
x_unit = "days"
),
NA # Expect no error
)
# Test with different plot types
expect_error(
cum_ci_plot(
pivotName = "Field1",
ci_quadrant_data = mock_data$ci_quadrant,
plot_type = "CI_rate"
),
NA # Expect no error
)
expect_error(
cum_ci_plot(
pivotName = "Field1",
ci_quadrant_data = mock_data$ci_quadrant,
plot_type = "cumulative_CI"
),
NA # Expect no error
)
# Test with invalid plot type
expect_error(
cum_ci_plot(
pivotName = "Field1",
ci_quadrant_data = mock_data$ci_quadrant,
plot_type = "invalid_type"
),
"plot_type must be one of: 'value', 'CI_rate', or 'cumulative_CI'"
)
# Test with missing parameters
expect_error(
cum_ci_plot(),
"pivotName is required"
)
# Reset output
sink()
unlink(temp_file)
})
# Test the get_week_path function
test_that("get_week_path returns correct path", {
# Test with valid inputs
path <- get_week_path(
mosaic_path = "ci_max_mosaics",
input_date = "2023-01-15",
week_offset = 0
)
# Extract the week number and year from the path
expect_match(path, "week_02_2023\\.tif$", all = FALSE) # Week 2 of 2023
# Test with offset
path_minus_1 <- get_week_path(
mosaic_path = "ci_max_mosaics",
input_date = "2023-01-15",
week_offset = -1
)
expect_match(path_minus_1, "week_01_2023\\.tif$", all = FALSE)
# Test with missing parameters
expect_error(
get_week_path(input_date = "2023-01-15", week_offset = 0),
"mosaic_path is required"
)
expect_error(
get_week_path(mosaic_path = "ci_max_mosaics", week_offset = 0),
"input_date is required"
)
})
# Clean up
teardown_test_env()
# Print success message
cat("Report utility function tests completed successfully\n")